improves scraper error handling

This commit is contained in:
Michael Beck
2025-02-08 17:48:18 +01:00
parent 056acd9cfc
commit bef689e25e

View File

@@ -3,6 +3,9 @@ import pandas as pd
import os import os
import time import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
from requests.exceptions import ConnectionError, Timeout, RequestException
from app.logging_config import get_logger from app.logging_config import get_logger
@@ -34,12 +37,23 @@ class Scraper:
def fetch_user_activity(self, user_id): def fetch_user_activity(self, user_id):
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}" url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
response = requests.get(url) retries = 3
if response.status_code == 200: for attempt in range(retries):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
return response.json() return response.json()
logger.error(f"Failed to fetch user activity for user ID {user_id}") except ConnectionError as e:
logger.error(f"Connection error while fetching user activity for user ID {user_id}: {e}")
except Timeout as e:
logger.error(f"Timeout error while fetching user activity for user ID {user_id}: {e}")
except RequestException as e:
logger.error(f"Error while fetching user activity for user ID {user_id}: {e}")
if attempt < retries - 1:
time.sleep(2 ** attempt) # Exponential backoff
return None return None
def start_scraping(self): def start_scraping(self):
self.scraping_active = True self.scraping_active = True
logger.info(f"Starting scraping process for faction ID {self.faction_id}") logger.info(f"Starting scraping process for faction ID {self.faction_id}")
@@ -54,7 +68,7 @@ class Scraper:
user_activity_data = [] user_activity_data = []
for user_id, user_info in faction_data['members'].items(): for user_id, user_info in faction_data['members'].items():
user_activity = self.fetch_user_activity(user_id) user_activity = self.fetch_user_activity(user_id)
if user_activity: if user_activity is not None:
user_activity_data.append({ user_activity_data.append({
'user_id': user_id, 'user_id': user_id,
'name': user_activity.get('name', ''), 'name': user_activity.get('name', ''),
@@ -63,8 +77,9 @@ class Scraper:
'timestamp': datetime.now().timestamp() 'timestamp': datetime.now().timestamp()
}) })
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})") logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
else:
logger.warning(f"Failed to fetch data for user {user_id}")
# Append data to the file
df = pd.DataFrame(user_activity_data) df = pd.DataFrame(user_activity_data)
df['last_action'] = pd.to_datetime(df['last_action'], unit='s') df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
@@ -75,6 +90,8 @@ class Scraper:
df.to_csv(self.data_file_name, mode='a', header=False, index=False) df.to_csv(self.data_file_name, mode='a', header=False, index=False)
logger.info(f"Data appended to {self.data_file_name}") logger.info(f"Data appended to {self.data_file_name}")
else:
logger.warning(f"Failed to fetch faction data or no members found for faction ID {self.faction_id}")
time.sleep(self.fetch_interval) time.sleep(self.fetch_interval)
else: else: