improves scraper error handling
This commit is contained in:
@@ -3,6 +3,9 @@ import pandas as pd
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
from requests.exceptions import ConnectionError, Timeout, RequestException
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from app.logging_config import get_logger
|
from app.logging_config import get_logger
|
||||||
|
|
||||||
@@ -34,12 +37,23 @@ class Scraper:
|
|||||||
|
|
||||||
def fetch_user_activity(self, user_id):
|
def fetch_user_activity(self, user_id):
|
||||||
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
|
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
|
||||||
response = requests.get(url)
|
retries = 3
|
||||||
if response.status_code == 200:
|
for attempt in range(retries):
|
||||||
|
try:
|
||||||
|
response = requests.get(url, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
logger.error(f"Failed to fetch user activity for user ID {user_id}")
|
except ConnectionError as e:
|
||||||
|
logger.error(f"Connection error while fetching user activity for user ID {user_id}: {e}")
|
||||||
|
except Timeout as e:
|
||||||
|
logger.error(f"Timeout error while fetching user activity for user ID {user_id}: {e}")
|
||||||
|
except RequestException as e:
|
||||||
|
logger.error(f"Error while fetching user activity for user ID {user_id}: {e}")
|
||||||
|
if attempt < retries - 1:
|
||||||
|
time.sleep(2 ** attempt) # Exponential backoff
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def start_scraping(self):
|
def start_scraping(self):
|
||||||
self.scraping_active = True
|
self.scraping_active = True
|
||||||
logger.info(f"Starting scraping process for faction ID {self.faction_id}")
|
logger.info(f"Starting scraping process for faction ID {self.faction_id}")
|
||||||
@@ -54,7 +68,7 @@ class Scraper:
|
|||||||
user_activity_data = []
|
user_activity_data = []
|
||||||
for user_id, user_info in faction_data['members'].items():
|
for user_id, user_info in faction_data['members'].items():
|
||||||
user_activity = self.fetch_user_activity(user_id)
|
user_activity = self.fetch_user_activity(user_id)
|
||||||
if user_activity:
|
if user_activity is not None:
|
||||||
user_activity_data.append({
|
user_activity_data.append({
|
||||||
'user_id': user_id,
|
'user_id': user_id,
|
||||||
'name': user_activity.get('name', ''),
|
'name': user_activity.get('name', ''),
|
||||||
@@ -63,8 +77,9 @@ class Scraper:
|
|||||||
'timestamp': datetime.now().timestamp()
|
'timestamp': datetime.now().timestamp()
|
||||||
})
|
})
|
||||||
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
|
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Failed to fetch data for user {user_id}")
|
||||||
|
|
||||||
# Append data to the file
|
|
||||||
df = pd.DataFrame(user_activity_data)
|
df = pd.DataFrame(user_activity_data)
|
||||||
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
|
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
|
||||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
||||||
@@ -75,6 +90,8 @@ class Scraper:
|
|||||||
df.to_csv(self.data_file_name, mode='a', header=False, index=False)
|
df.to_csv(self.data_file_name, mode='a', header=False, index=False)
|
||||||
|
|
||||||
logger.info(f"Data appended to {self.data_file_name}")
|
logger.info(f"Data appended to {self.data_file_name}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Failed to fetch faction data or no members found for faction ID {self.faction_id}")
|
||||||
|
|
||||||
time.sleep(self.fetch_interval)
|
time.sleep(self.fetch_interval)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user