From bef689e25ed816af5d83287efcd2454d6eae7665 Mon Sep 17 00:00:00 2001 From: Michael Beck Date: Sat, 8 Feb 2025 17:48:18 +0100 Subject: [PATCH] improves scraper error handling --- app/models.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/app/models.py b/app/models.py index 274bcd8..2634978 100644 --- a/app/models.py +++ b/app/models.py @@ -3,6 +3,9 @@ import pandas as pd import os import time from datetime import datetime, timedelta +from requests.exceptions import ConnectionError, Timeout, RequestException + + from app.logging_config import get_logger @@ -34,12 +37,23 @@ class Scraper: def fetch_user_activity(self, user_id): url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}" - response = requests.get(url) - if response.status_code == 200: - return response.json() - logger.error(f"Failed to fetch user activity for user ID {user_id}") + retries = 3 + for attempt in range(retries): + try: + response = requests.get(url, timeout=10) + response.raise_for_status() + return response.json() + except ConnectionError as e: + logger.error(f"Connection error while fetching user activity for user ID {user_id}: {e}") + except Timeout as e: + logger.error(f"Timeout error while fetching user activity for user ID {user_id}: {e}") + except RequestException as e: + logger.error(f"Error while fetching user activity for user ID {user_id}: {e}") + if attempt < retries - 1: + time.sleep(2 ** attempt) # Exponential backoff return None + def start_scraping(self): self.scraping_active = True logger.info(f"Starting scraping process for faction ID {self.faction_id}") @@ -54,7 +68,7 @@ class Scraper: user_activity_data = [] for user_id, user_info in faction_data['members'].items(): user_activity = self.fetch_user_activity(user_id) - if user_activity: + if user_activity is not None: user_activity_data.append({ 'user_id': user_id, 'name': user_activity.get('name', ''), @@ -63,8 +77,9 @@ class Scraper: 'timestamp': datetime.now().timestamp() }) logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})") + else: + logger.warning(f"Failed to fetch data for user {user_id}") - # Append data to the file df = pd.DataFrame(user_activity_data) df['last_action'] = pd.to_datetime(df['last_action'], unit='s') df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') @@ -75,6 +90,8 @@ class Scraper: df.to_csv(self.data_file_name, mode='a', header=False, index=False) logger.info(f"Data appended to {self.data_file_name}") + else: + logger.warning(f"Failed to fetch faction data or no members found for faction ID {self.faction_id}") time.sleep(self.fetch_interval) else: