from flask import Flask, request, render_template, Response, jsonify, url_for from flask_bootstrap import Bootstrap5 from forms import ScrapingForm import requests import pandas as pd import time from datetime import datetime, timedelta import threading import logging from logging.handlers import QueueHandler from queue import Queue import os import glob from datetime import datetime from flask import send_from_directory import configparser app = Flask(__name__) # Load configuration config = configparser.ConfigParser() config.read('config.ini') app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY'] API_KEY = config['DEFAULT']['API_KEY'] bootstrap = Bootstrap5(app) # Initialize the logger logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # Adjust as needed # Make any logger.info() call go to both the log file and the queue. # 1) FILE HANDLER logFile = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log' file_handler = logging.FileHandler(logFile, mode='w') file_handler.setLevel(logging.DEBUG) # or INFO, WARNING, etc. formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') file_handler.setFormatter(formatter) logger.addHandler(file_handler) # 2) QUEUE HANDLER log_queue = Queue() queue_handler = QueueHandler(log_queue) queue_handler.setLevel(logging.DEBUG) logger.addHandler(queue_handler) # Global state scraping_active = False scraping_thread = None def fetch_faction_data(faction_id): url = f"https://api.torn.com/faction/{faction_id}?selections=&key={API_KEY}" response = requests.get(url) if response.status_code == 200: logger.info(f"Fetched data for faction ID {faction_id}") return response.json() else: logger.warning(f"Failed to fetch faction data for faction ID {faction_id}") return None def fetch_user_activity(user_id): url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}" response = requests.get(url) if response.status_code == 200: return response.json() else: logger.error(f"Failed to fetch user activity for user ID {user_id}") return None def scrape_data(faction_id, fetch_interval, run_interval): global scraping_active end_time = datetime.now() + timedelta(days=run_interval) filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv" while datetime.now() < end_time and scraping_active: logger.info(f"Fetching data at {datetime.now()}") faction_data = fetch_faction_data(faction_id) if faction_data and 'members' in faction_data: user_activity_data = [] for user_id, user_info in faction_data['members'].items(): user_activity = fetch_user_activity(user_id) if user_activity: user_activity_data.append({ 'user_id': user_id, 'name': user_activity.get('name', ''), 'last_action': user_activity.get('last_action', {}).get('timestamp', 0), 'status': user_activity.get('status', {}).get('state', ''), 'timestamp': datetime.now().timestamp() }) logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})") # Append data to the file df = pd.DataFrame(user_activity_data) df['last_action'] = pd.to_datetime(df['last_action'], unit='s') df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') if not os.path.isfile(filename): df.to_csv(filename, index=False) else: df.to_csv(filename, mode='a', header=False, index=False) logger.info(f"Data appended to {filename}") time.sleep(fetch_interval) else: if datetime.now() < end_time: logger.warning(f"Scraping stopped at {datetime.now()}") elif scraping_active == False: logger.warning(f"Scraping stopped at {datetime.now()} due to user request") else: logger.error(f"Scraping stopped due to timeout at {datetime.now()}") logger.info("Scraping completed.") scraping_active = False def generate_statistics(df): df['hour'] = df['timestamp'].dt.hour activity_by_hour = df.groupby('hour').size() return activity_by_hour # Taken from: # https://gist.github.com/amitsaha/5990310?permalink_comment_id=3017951#gistcomment-3017951 def tail(filename, n): stat = os.stat(filename) n = int(n) if stat.st_size == 0 or n == 0: yield '' return page_size = config['LOGGING']['TAIL_PAGE_SIZE'] offsets = [] count = _n = n if n >= 0 else -n last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1 with open(filename, 'r') as f: while count > 0: starting_byte = last_byte_read - page_size if last_byte_read == 0: offsets.append(0) break elif starting_byte < 0: f.seek(0) text = f.read(last_byte_read) else: f.seek(starting_byte) text = f.read(page_size) for i in range(-1, -1*len(text)-1, -1): last_byte_read -= 1 if text[i] == '\n': last_nl_byte = last_byte_read starting_offset = last_nl_byte + 1 offsets.append(starting_offset) count -= 1 offsets = offsets[len(offsets)-_n:] offsets.reverse() with open(filename, 'r') as f: for i, offset in enumerate(offsets): f.seek(offset) if i == len(offsets) - 1: yield f.read() else: bytes_to_read = offsets[i+1] - offset yield f.read(bytes_to_read) @app.route('/') def index(): form = ScrapingForm() return render_template('index.html', form=form) @app.route('/start_scraping', methods=['POST']) def start_scraping(): global scraping_active, scraping_thread form = ScrapingForm() if form.validate_on_submit(): if scraping_active: logger.warning("Can't start scraping process: scraping already in progress") return jsonify({"status": "Scraping already in progress"}) scraping_active = True faction_id = form.faction_id.data fetch_interval = form.fetch_interval.data run_interval = form.run_interval.data # Start scraping in a separate thread scraping_thread = threading.Thread(target=scrape_data, args=(faction_id, fetch_interval, run_interval)) scraping_thread.daemon = True scraping_thread.start() return jsonify({"status": "Scraping started"}) return jsonify({"status": "Invalid form data"}) @app.route('/stop_scraping', methods=['POST']) def stop_scraping(): global scraping_active if not scraping_active: return jsonify({"status": "No scraping in progress"}) scraping_active = False logger.debug("scraping_active set to False") return jsonify({"status": "Scraping stopped"}) @app.route('/scraping_status', methods=['GET']) def scraping_status(): global scraping_active logger.debug(f"scraping_status called: scraping_active = {scraping_active}") return jsonify({"scraping_active": scraping_active}) @app.route('/logs') def logs(): def generate(): while True: if not log_queue.empty(): log = log_queue.get().getMessage() yield f"data: {log}\n\n" time.sleep(0.1) return Response(generate(), mimetype='text/event-stream') @app.route('/logfile', methods=['GET']) def logfile(): page = int(request.args.get('page', 0)) # Page number lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page log_file_path = logFile # Path to the current log file if not os.path.isfile(log_file_path): logging.error("Log file not found") return jsonify({"error": "Log file not found"}), 404 log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES'])) log_lines = log_lines[::-1] # Reverse the list start = page * lines_per_page end = start + lines_per_page paginated_lines = log_lines[start:end] if start < len(log_lines) else [] return jsonify({ "log": paginated_lines, "total_lines": len(log_lines), "pages": (len(log_lines) + lines_per_page - 1) // lines_per_page, "start_line": len(log_lines) - start # Starting line number for the current page }) @app.route('/results') def results(): # Assuming the scraping is done and data is saved somewhere faction_id = request.args.get('faction_id') filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv" if os.path.isfile(filename): df = pd.read_csv(filename) stats = generate_statistics(df) return render_template('results.html', stats=stats.to_dict()) else: return "No data found." @app.route('/download_results') def download_results(): data_files = glob.glob("data/*.csv") log_files = glob.glob("log/*.log") def get_file_info(file_path): return { "name": file_path, "name_display": os.path.basename(file_path), "last_modified": os.path.getmtime(file_path), "created": os.path.getctime(file_path), "size": get_size(file_path) } data_files_info = [get_file_info(file) for file in data_files] log_files_info = [get_file_info(file) for file in log_files] files = {"data": data_files_info, "log": log_files_info} return render_template('download_results.html', files=files) @app.route('/delete_files', methods=['POST']) def delete_files(): file_paths = request.form.getlist('file_paths') if not file_paths: return jsonify({"error": "No files specified"}), 400 errors = [] for file_path in file_paths: if not os.path.isfile(file_path): errors.append({"file": file_path, "error": "File not found"}) continue try: os.remove(file_path) except Exception as e: errors.append({"file": file_path, "error": str(e)}) if errors: return jsonify({"errors": errors}), 207 # Multi-Status response return jsonify({"success": True}), 200 @app.template_filter('datetimeformat') def datetimeformat(value): return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S') def get_size(path): size = os.path.getsize(path) if size < 1024: return f"{size} bytes" elif size < pow(1024,2): return f"{round(size/1024, 2)} KB" elif size < pow(1024,3): return f"{round(size/(pow(1024,2)), 2)} MB" elif size < pow(1024,4): return f"{round(size/(pow(1024,3)), 2)} GB" @app.route('/data/') def download_data_file(filename): return send_from_directory('data', filename) @app.route('/logs/') def download_log_file(filename): return send_from_directory('logs', filename) @app.route('/config/lines_per_page') def get_lines_per_page(): lines_per_page = config['LOGGING']['VIEW_PAGE_LINES'] return jsonify({"lines_per_page": lines_per_page}) if __name__ == '__main__': app.run(debug=True, threaded=True)