114 lines
5.0 KiB
Python
114 lines
5.0 KiB
Python
import requests
|
|
import pandas as pd
|
|
import os
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from requests.exceptions import ConnectionError, Timeout, RequestException
|
|
|
|
|
|
|
|
from app.logging_config import get_logger
|
|
|
|
from app.config import load_config
|
|
|
|
config = load_config()
|
|
API_KEY = config['DEFAULT']['API_KEY']
|
|
|
|
logger = get_logger()
|
|
|
|
class Scraper:
|
|
def __init__(self, faction_id, fetch_interval, run_interval, app):
|
|
self.faction_id = faction_id
|
|
self.fetch_interval = fetch_interval
|
|
self.run_interval = run_interval
|
|
self.end_time = datetime.now() + timedelta(days=run_interval)
|
|
self.data_file_name = os.path.join(app.config['DATA']['DATA_DIR'], f"{self.faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv")
|
|
self.scraping_active = False
|
|
|
|
print(self.data_file_name)
|
|
|
|
def fetch_faction_data(self):
|
|
url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={API_KEY}"
|
|
response = requests.get(url)
|
|
if response.status_code == 200:
|
|
return response.json()
|
|
logger.warning(f"Failed to fetch faction data for faction ID {self.faction_id}")
|
|
return None
|
|
|
|
def fetch_user_activity(self, user_id):
|
|
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
|
|
retries = 3
|
|
for attempt in range(retries):
|
|
try:
|
|
response = requests.get(url, timeout=10)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except ConnectionError as e:
|
|
logger.error(f"Connection error while fetching user activity for user ID {user_id}: {e}")
|
|
except Timeout as e:
|
|
logger.error(f"Timeout error while fetching user activity for user ID {user_id}: {e}")
|
|
except RequestException as e:
|
|
logger.error(f"Error while fetching user activity for user ID {user_id}: {e}")
|
|
if attempt < retries - 1:
|
|
time.sleep(2 ** attempt) # Exponential backoff
|
|
return None
|
|
|
|
|
|
def start_scraping(self):
|
|
self.scraping_active = True
|
|
logger.info(f"Starting scraping process for faction ID {self.faction_id}")
|
|
logger.debug(f"Fetch interval: {self.fetch_interval}")
|
|
logger.debug(f"Run interval: {self.run_interval}")
|
|
logger.debug(f"End time: {self.end_time}")
|
|
|
|
while datetime.now() < self.end_time and self.scraping_active:
|
|
logger.info(f"Fetching data at {datetime.now()}")
|
|
faction_data = self.fetch_faction_data()
|
|
if faction_data and 'members' in faction_data:
|
|
user_activity_data = []
|
|
for user_id, user_info in faction_data['members'].items():
|
|
user_activity = self.fetch_user_activity(user_id)
|
|
if user_activity is not None:
|
|
user_activity_data.append({
|
|
'user_id': user_id,
|
|
'name': user_activity.get('name', ''),
|
|
'last_action': user_activity.get('last_action', {}).get('timestamp', 0),
|
|
'status': user_activity.get('status', {}).get('state', ''),
|
|
'timestamp': datetime.now().timestamp()
|
|
})
|
|
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
|
|
else:
|
|
logger.warning(f"Failed to fetch data for user {user_id}")
|
|
|
|
df = pd.DataFrame(user_activity_data)
|
|
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
|
|
|
if not os.path.isfile(self.data_file_name):
|
|
df.to_csv(self.data_file_name, index=False)
|
|
else:
|
|
df.to_csv(self.data_file_name, mode='a', header=False, index=False)
|
|
|
|
logger.info(f"Data appended to {self.data_file_name}")
|
|
else:
|
|
logger.warning(f"Failed to fetch faction data or no members found for faction ID {self.faction_id}")
|
|
|
|
time.sleep(self.fetch_interval)
|
|
else:
|
|
if datetime.now() < self.end_time:
|
|
logger.warning(f"Scraping stopped at {datetime.now()} because of timeout ({self.run_interval} days, end time: {self.end_time})")
|
|
elif not self.scraping_active:
|
|
logger.warning(f"Scraping stopped at {datetime.now()} due to user request")
|
|
else:
|
|
logger.error(f"Scraping stopped due to timeout at {datetime.now()}")
|
|
logger.info("Scraping completed.")
|
|
self.scraping_active = False
|
|
|
|
def stop_scraping(self):
|
|
self.scraping_active = False
|
|
logger.debug("Scraping stopped by user")
|
|
|
|
def generate_statistics(df):
|
|
df['hour'] = df['timestamp'].dt.hour # No need to convert timestamp again
|
|
return df.groupby('hour').size() # Activity by hour
|