improves scraper error handling
This commit is contained in:
@@ -3,6 +3,9 @@ import pandas as pd
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from requests.exceptions import ConnectionError, Timeout, RequestException
|
||||
|
||||
|
||||
|
||||
from app.logging_config import get_logger
|
||||
|
||||
@@ -34,12 +37,23 @@ class Scraper:
|
||||
|
||||
def fetch_user_activity(self, user_id):
|
||||
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
logger.error(f"Failed to fetch user activity for user ID {user_id}")
|
||||
retries = 3
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
response = requests.get(url, timeout=10)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except ConnectionError as e:
|
||||
logger.error(f"Connection error while fetching user activity for user ID {user_id}: {e}")
|
||||
except Timeout as e:
|
||||
logger.error(f"Timeout error while fetching user activity for user ID {user_id}: {e}")
|
||||
except RequestException as e:
|
||||
logger.error(f"Error while fetching user activity for user ID {user_id}: {e}")
|
||||
if attempt < retries - 1:
|
||||
time.sleep(2 ** attempt) # Exponential backoff
|
||||
return None
|
||||
|
||||
|
||||
def start_scraping(self):
|
||||
self.scraping_active = True
|
||||
logger.info(f"Starting scraping process for faction ID {self.faction_id}")
|
||||
@@ -54,7 +68,7 @@ class Scraper:
|
||||
user_activity_data = []
|
||||
for user_id, user_info in faction_data['members'].items():
|
||||
user_activity = self.fetch_user_activity(user_id)
|
||||
if user_activity:
|
||||
if user_activity is not None:
|
||||
user_activity_data.append({
|
||||
'user_id': user_id,
|
||||
'name': user_activity.get('name', ''),
|
||||
@@ -63,8 +77,9 @@ class Scraper:
|
||||
'timestamp': datetime.now().timestamp()
|
||||
})
|
||||
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
|
||||
else:
|
||||
logger.warning(f"Failed to fetch data for user {user_id}")
|
||||
|
||||
# Append data to the file
|
||||
df = pd.DataFrame(user_activity_data)
|
||||
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
||||
@@ -75,6 +90,8 @@ class Scraper:
|
||||
df.to_csv(self.data_file_name, mode='a', header=False, index=False)
|
||||
|
||||
logger.info(f"Data appended to {self.data_file_name}")
|
||||
else:
|
||||
logger.warning(f"Failed to fetch faction data or no members found for faction ID {self.faction_id}")
|
||||
|
||||
time.sleep(self.fetch_interval)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user