269 lines
9.2 KiB
Python
269 lines
9.2 KiB
Python
from flask import Flask, request, render_template, Response, jsonify, url_for
|
|
from flask_bootstrap import Bootstrap5
|
|
from forms import ScrapingForm
|
|
import requests
|
|
import pandas as pd
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
import threading
|
|
import logging
|
|
from logging.handlers import QueueHandler
|
|
from queue import Queue
|
|
import os
|
|
import glob
|
|
from datetime import datetime
|
|
from flask import send_from_directory
|
|
import configparser
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Load configuration
|
|
config = configparser.ConfigParser()
|
|
config.read('config.ini')
|
|
|
|
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
|
|
API_KEY = config['DEFAULT']['API_KEY']
|
|
|
|
bootstrap = Bootstrap5(app)
|
|
|
|
# Initialize the logger
|
|
logger = logging.getLogger(__name__)
|
|
logger.setLevel(logging.DEBUG) # Adjust as needed
|
|
|
|
# Make any logger.info() call go to both the log file and the queue.
|
|
# 1) FILE HANDLER
|
|
logFile = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
|
|
file_handler = logging.FileHandler(logFile, mode='w')
|
|
file_handler.setLevel(logging.DEBUG) # or INFO, WARNING, etc.
|
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s',
|
|
datefmt='%m/%d/%Y %I:%M:%S %p')
|
|
file_handler.setFormatter(formatter)
|
|
|
|
logger.addHandler(file_handler)
|
|
|
|
# 2) QUEUE HANDLER
|
|
log_queue = Queue()
|
|
queue_handler = QueueHandler(log_queue)
|
|
queue_handler.setLevel(logging.DEBUG)
|
|
logger.addHandler(queue_handler)
|
|
|
|
# Global state
|
|
scraping_active = False
|
|
scraping_thread = None
|
|
|
|
def fetch_faction_data(faction_id):
|
|
url = f"https://api.torn.com/faction/{faction_id}?selections=&key={API_KEY}"
|
|
response = requests.get(url)
|
|
if response.status_code == 200:
|
|
logger.info(f"Fetched data for faction ID {faction_id}")
|
|
return response.json()
|
|
else:
|
|
logger.warning(f"Failed to fetch faction data for faction ID {faction_id}")
|
|
return None
|
|
|
|
def fetch_user_activity(user_id):
|
|
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
|
|
response = requests.get(url)
|
|
if response.status_code == 200:
|
|
return response.json()
|
|
else:
|
|
logger.error(f"Failed to fetch user activity for user ID {user_id}")
|
|
return None
|
|
|
|
def scrape_data(faction_id, fetch_interval, run_interval):
|
|
global scraping_active
|
|
end_time = datetime.now() + timedelta(days=run_interval)
|
|
filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
|
|
|
|
while datetime.now() < end_time and scraping_active:
|
|
logger.info(f"Fetching data at {datetime.now()}")
|
|
faction_data = fetch_faction_data(faction_id)
|
|
if faction_data and 'members' in faction_data:
|
|
user_activity_data = []
|
|
for user_id, user_info in faction_data['members'].items():
|
|
user_activity = fetch_user_activity(user_id)
|
|
if user_activity:
|
|
user_activity_data.append({
|
|
'user_id': user_id,
|
|
'name': user_activity.get('name', ''),
|
|
'last_action': user_activity.get('last_action', {}).get('timestamp', 0),
|
|
'status': user_activity.get('status', {}).get('state', ''),
|
|
'timestamp': datetime.now().timestamp()
|
|
})
|
|
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
|
|
|
|
# Append data to the file
|
|
df = pd.DataFrame(user_activity_data)
|
|
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
|
|
|
if not os.path.isfile(filename):
|
|
df.to_csv(filename, index=False)
|
|
else:
|
|
df.to_csv(filename, mode='a', header=False, index=False)
|
|
|
|
logger.info(f"Data appended to {filename}")
|
|
|
|
time.sleep(fetch_interval)
|
|
else:
|
|
if datetime.now() < end_time:
|
|
logger.warning(f"Scraping stopped at {datetime.now()}")
|
|
elif scraping_active == False:
|
|
logger.warning(f"Scraping stopped at {datetime.now()} due to user request")
|
|
else:
|
|
logger.error(f"Scraping stopped due to timeout at {datetime.now()}")
|
|
logger.info("Scraping completed.")
|
|
scraping_active = False
|
|
|
|
def generate_statistics(df):
|
|
df['hour'] = df['timestamp'].dt.hour
|
|
activity_by_hour = df.groupby('hour').size()
|
|
return activity_by_hour
|
|
|
|
@app.route('/')
|
|
def index():
|
|
form = ScrapingForm()
|
|
return render_template('index.html', form=form)
|
|
|
|
@app.route('/start_scraping', methods=['POST'])
|
|
def start_scraping():
|
|
global scraping_active, scraping_thread
|
|
form = ScrapingForm()
|
|
if form.validate_on_submit():
|
|
if scraping_active:
|
|
logger.warning("Can't start scraping process: scraping already in progress")
|
|
return jsonify({"status": "Scraping already in progress"})
|
|
|
|
scraping_active = True
|
|
|
|
faction_id = form.faction_id.data
|
|
fetch_interval = form.fetch_interval.data
|
|
run_interval = form.run_interval.data
|
|
|
|
# Start scraping in a separate thread
|
|
scraping_thread = threading.Thread(target=scrape_data, args=(faction_id, fetch_interval, run_interval))
|
|
scraping_thread.daemon = True
|
|
scraping_thread.start()
|
|
|
|
return jsonify({"status": "Scraping started"})
|
|
return jsonify({"status": "Invalid form data"})
|
|
|
|
@app.route('/stop_scraping', methods=['POST'])
|
|
def stop_scraping():
|
|
global scraping_active
|
|
if not scraping_active:
|
|
return jsonify({"status": "No scraping in progress"})
|
|
|
|
scraping_active = False
|
|
logger.debug("scraping_active set to False")
|
|
return jsonify({"status": "Scraping stopped"})
|
|
|
|
@app.route('/scraping_status', methods=['GET'])
|
|
def scraping_status():
|
|
global scraping_active
|
|
logger.debug(f"scraping_status called: scraping_active = {scraping_active}")
|
|
return jsonify({"scraping_active": scraping_active})
|
|
|
|
@app.route('/logs')
|
|
def logs():
|
|
def generate():
|
|
while True:
|
|
if not log_queue.empty():
|
|
log = log_queue.get().getMessage()
|
|
yield f"data: {log}\n\n"
|
|
time.sleep(0.1)
|
|
return Response(generate(), mimetype='text/event-stream')
|
|
|
|
@app.route('/logfile', methods=['GET'])
|
|
def logfile():
|
|
lines = int(request.args.get('lines', 100)) # Number of lines to read
|
|
log_file_path = logFile # Path to the current log file
|
|
|
|
if not os.path.isfile(log_file_path):
|
|
return jsonify({"error": "Log file not found"}), 404
|
|
|
|
with open(log_file_path, 'r') as file:
|
|
log_lines = file.readlines()
|
|
|
|
return jsonify({"log": log_lines[-lines:]})
|
|
|
|
@app.route('/results')
|
|
def results():
|
|
# Assuming the scraping is done and data is saved somewhere
|
|
faction_id = request.args.get('faction_id')
|
|
filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
|
|
if os.path.isfile(filename):
|
|
df = pd.read_csv(filename)
|
|
stats = generate_statistics(df)
|
|
return render_template('results.html', stats=stats.to_dict())
|
|
else:
|
|
return "No data found."
|
|
|
|
@app.route('/download_results')
|
|
def download_results():
|
|
data_files = glob.glob("data/*.csv")
|
|
log_files = glob.glob("log/*.log")
|
|
|
|
def get_file_info(file_path):
|
|
return {
|
|
"name": file_path,
|
|
"name_display": os.path.basename(file_path),
|
|
"last_modified": os.path.getmtime(file_path),
|
|
"created": os.path.getctime(file_path),
|
|
"size": get_size(file_path)
|
|
}
|
|
|
|
data_files_info = [get_file_info(file) for file in data_files]
|
|
log_files_info = [get_file_info(file) for file in log_files]
|
|
|
|
files = {"data": data_files_info, "log": log_files_info}
|
|
|
|
return render_template('download_results.html', files=files)
|
|
|
|
@app.route('/delete_files', methods=['POST'])
|
|
def delete_files():
|
|
file_paths = request.form.getlist('file_paths')
|
|
|
|
if not file_paths:
|
|
return jsonify({"error": "No files specified"}), 400
|
|
|
|
errors = []
|
|
for file_path in file_paths:
|
|
if not os.path.isfile(file_path):
|
|
errors.append({"file": file_path, "error": "File not found"})
|
|
continue
|
|
|
|
try:
|
|
os.remove(file_path)
|
|
except Exception as e:
|
|
errors.append({"file": file_path, "error": str(e)})
|
|
|
|
if errors:
|
|
return jsonify({"errors": errors}), 207 # Multi-Status response
|
|
return jsonify({"success": True}), 200
|
|
|
|
@app.template_filter('datetimeformat')
|
|
def datetimeformat(value):
|
|
return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
def get_size(path):
|
|
size = os.path.getsize(path)
|
|
if size < 1024:
|
|
return f"{size} bytes"
|
|
elif size < pow(1024,2):
|
|
return f"{round(size/1024, 2)} KB"
|
|
elif size < pow(1024,3):
|
|
return f"{round(size/(pow(1024,2)), 2)} MB"
|
|
elif size < pow(1024,4):
|
|
return f"{round(size/(pow(1024,3)), 2)} GB"
|
|
|
|
@app.route('/data/<path:filename>')
|
|
def download_data_file(filename):
|
|
return send_from_directory('data', filename)
|
|
|
|
@app.route('/logs/<path:filename>')
|
|
def download_log_file(filename):
|
|
return send_from_directory('logs', filename)
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True, threaded=True) |