from flask import Flask, request, render_template, Response, jsonify, url_for from flask_bootstrap import Bootstrap5 # from package boostrap_flask from app.forms import ScrapingForm import requests import pandas as pd import time from datetime import datetime, timedelta import threading import logging from logging.handlers import QueueHandler from queue import Queue import os import glob from datetime import datetime from flask import send_from_directory import configparser import zipfile import os from datetime import timedelta app = Flask(__name__) # Load configuration config = configparser.ConfigParser() config.read('config.ini') app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY'] API_KEY = config['DEFAULT']['API_KEY'] bootstrap = Bootstrap5(app) # Move every setting from config['BOOTSTRAP'] to the root level of config for key in config['BOOTSTRAP']: key = key.upper() app.config[key] = config['BOOTSTRAP'][key] if key == 'SECRET_KEY': continue elif key == 'API_KEY': continue print(f"Loaded config: {key} = {app.config[key]}") # Global state scraping_active = False scraping_thread = None data_file_name = None log_file_name = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log' # Initialize the logger logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # Adjust as needed # Make any logger.info() call go to both the log file and the queue. # 1) FILE HANDLER file_handler = logging.FileHandler(log_file_name, mode='w') file_handler.setLevel(logging.DEBUG) # or INFO, WARNING, etc. formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') file_handler.setFormatter(formatter) logger.addHandler(file_handler) # 2) QUEUE HANDLER log_queue = Queue() queue_handler = QueueHandler(log_queue) queue_handler.setLevel(logging.DEBUG) logger.addHandler(queue_handler) def create_zip(file_paths, zip_name): zip_path = os.path.join(app.root_path, 'temp', zip_name) with zipfile.ZipFile(zip_path, 'w') as zipf: for file_path in file_paths: arcname = os.path.basename(file_path) zipf.write(file_path, arcname) return zip_path def delete_old_zips(): temp_dir = os.path.join(app.root_path, 'temp') now = datetime.now() for filename in os.listdir(temp_dir): if filename.endswith('.zip'): file_path = os.path.join(temp_dir, filename) file_time = datetime.fromtimestamp(os.path.getmtime(file_path)) if now - file_time > timedelta(hours=1): os.remove(file_path) logger.info(f"Deleted old zip file: {filename}") def fetch_faction_data(faction_id): url = f"https://api.torn.com/faction/{faction_id}?selections=&key={API_KEY}" response = requests.get(url) if response.status_code == 200: logger.info(f"Fetched data for faction ID {faction_id}") return response.json() else: logger.warning(f"Failed to fetch faction data for faction ID {faction_id}") return None def fetch_user_activity(user_id): url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}" response = requests.get(url) if response.status_code == 200: return response.json() else: logger.error(f"Failed to fetch user activity for user ID {user_id}") return None def scrape_data(faction_id, fetch_interval, run_interval): global scraping_active global data_file_name end_time = datetime.now() + timedelta(days=run_interval) data_file_name = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv" while datetime.now() < end_time and scraping_active: logger.info(f"Fetching data at {datetime.now()}") faction_data = fetch_faction_data(faction_id) if faction_data and 'members' in faction_data: user_activity_data = [] for user_id, user_info in faction_data['members'].items(): user_activity = fetch_user_activity(user_id) if user_activity: user_activity_data.append({ 'user_id': user_id, 'name': user_activity.get('name', ''), 'last_action': user_activity.get('last_action', {}).get('timestamp', 0), 'stadata_file_nametus': user_activity.get('status', {}).get('state', ''), 'timestamp': datetime.now().timestamp() }) logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})") # Append data to the file df = pd.DataFrame(user_activity_data) df['last_action'] = pd.to_datetime(df['last_action'], unit='s') df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') if not os.path.isfile(data_file_name): df.to_csv(data_file_name, index=False) else: df.to_csv(data_file_name, mode='a', header=False, index=False) logger.info(f"Data appended to {data_file_name}") time.sleep(fetch_interval) else: if datetime.now() < end_time: logger.warning(f"Scraping stopped at {datetime.now()}") elif scraping_active == False: logger.warning(f"Scraping stopped at {datetime.now()} due to user request") else: logger.error(f"Scraping stopped due to timeout at {datetime.now()}") logger.info("Scraping completed.") scraping_active = False def generate_statistics(df): df['hour'] = df['timestamp'].dt.hour activity_by_hour = df.groupby('hour').size() return activity_by_hour # Taken from: # https://gist.github.com/amitsaha/5990310?permalink_comment_id=3017951#gistcomment-3017951 def tail(filename, n): stat = os.stat(filename) n = int(n) if stat.st_size == 0 or n == 0: yield '' return page_size = int(config['LOGGING']['TAIL_PAGE_SIZE']) offsets = [] count = _n = n if n >= 0 else -n last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1 with open(filename, 'r') as f: while count > 0: starting_byte = last_byte_read - page_size if last_byte_read == 0: offsets.append(0) break elif starting_byte < 0: f.seek(0) text = f.read(last_byte_read) else: f.seek(starting_byte) text = f.read(page_size) for i in range(-1, -1*len(text)-1, -1): last_byte_read -= 1 if text[i] == '\n': last_nl_byte = last_byte_read starting_offset = last_nl_byte + 1 offsets.append(starting_offset) count -= 1 offsets = offsets[len(offsets)-_n:] offsets.reverse() with open(filename, 'r') as f: for i, offset in enumerate(offsets): f.seek(offset) if i == len(offsets) - 1: yield f.read() else: bytes_to_read = offsets[i+1] - offset yield f.read(bytes_to_read) def is_data_file_in_use(filename): if(data_file_name == None): return False if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, data_file_name.lstrip('/')) and scraping_active: return True return False @app.route('/is_data_file_in_use/') def is_data_file_in_use_json(filename): return jsonify(is_data_file_in_use(filename)) def is_log_file_in_use(filename): if(log_file_name == None): return False if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, log_file_name.lstrip('/')): return True return False @app.route('/is_log_file_in_use/') def is_log_file_in_use_json(filename): print(filename) return jsonify(is_log_file_in_use(filename)) @app.route('/') def index(): form = ScrapingForm() return render_template('index.html', form=form) @app.route('/start_scraping', methods=['POST']) def start_scraping(): global scraping_active, scraping_thread form = ScrapingForm() if form.validate_on_submit(): if scraping_active: logger.warning("Can't start scraping process: scraping already in progress") return jsonify({"status": "Scraping already in progress"}) scraping_active = True faction_id = form.faction_id.data fetch_interval = form.fetch_interval.data run_interval = form.run_interval.data # Start scraping in a separate thread scraping_thread = threading.Thread(target=scrape_data, args=(faction_id, fetch_interval, run_interval)) scraping_thread.daemon = True scraping_thread.start() return jsonify({"status": "Scraping started"}) return jsonify({"status": "Invalid form data"}) @app.route('/stop_scraping', methods=['POST']) def stop_scraping(): global scraping_active if not scraping_active: return jsonify({"status": "No scraping in progress"}) scraping_active = False logger.debug("scraping_active set to False") return jsonify({"status": "Scraping stopped"}) @app.route('/scraping_status', methods=['GET']) def scraping_status(): global scraping_active logger.debug(f"scraping_status called: scraping_active = {scraping_active}") return jsonify({"scraping_active": scraping_active}) @app.route('/logs') def logs(): def generate(): while True: if not log_queue.empty(): log = log_queue.get().getMessage() yield f"data: {log}\n\n" time.sleep(0.1) return Response(generate(), mimetype='text/event-stream') @app.route('/logfile', methods=['GET']) def logfile(): page = int(request.args.get('page', 0)) # Page number lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page log_file_path = log_file_name # Path to the current log file if not os.path.isfile(log_file_path): logging.error("Log file not found") return jsonify({"error": "Log file not found"}), 404 log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES'])) log_lines = log_lines[::-1] # Reverse the list start = page * lines_per_page end = start + lines_per_page paginated_lines = log_lines[start:end] if start < len(log_lines) else [] return jsonify({ "log": paginated_lines, "total_lines": len(log_lines), "pages": (len(log_lines) + lines_per_page - 1) // lines_per_page, "start_line": len(log_lines) - start # Starting line number for the current page }) @app.route('/results') def results(): # Assuming the scraping is done and data is saved somewhere faction_id = request.args.get('faction_id') filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv" if os.path.isfile(filename): df = pd.read_csv(filename) stats = generate_statistics(df) return render_template('results.html', stats=stats.to_dict()) else: return "No data found." @app.route('/analyze') def analyze(): return render_template('analyze.html'); @app.route('/log_viewer') def log_viewer(): return render_template('log_viewer.html'); @app.route('/download_results') def download_results(): data_files = glob.glob("data/*.csv") log_files = glob.glob("log/*.log") def get_file_info(file_path): return { "name": file_path, "name_display": os.path.basename(file_path), "last_modified": os.path.getmtime(file_path), "created": os.path.getctime(file_path), "size": get_size(file_path) } data_files_info = [get_file_info(file) for file in data_files] log_files_info = [get_file_info(file) for file in log_files] for data_file in data_files_info: if is_data_file_in_use(data_file['name']): data_file['active'] = True else: data_file['active'] = False for log_file in log_files_info: if is_log_file_in_use(log_file['name']): log_file['active'] = True else: log_file['active'] = False files = {"data": data_files_info, "log": log_files_info} return render_template('download_results.html', files=files) @app.route('/download_files', methods=['POST']) def download_files(): delete_old_zips() # Clean up old zip files file_paths = request.json.get('file_paths', []) if not file_paths: return jsonify({"error": "No files specified"}), 400 # Validate and correct file paths valid_file_paths = [] for file_path in file_paths: if file_path.startswith('/data/'): corrected_path = file_path.lstrip('/') full_path = os.path.join(app.root_path, corrected_path) if os.path.isfile(full_path): valid_file_paths.append(full_path) elif file_path.startswith('/log/'): corrected_path = file_path.lstrip('/') full_path = os.path.join(app.root_path, corrected_path) if os.path.isfile(full_path): valid_file_paths.append(full_path) if not valid_file_paths: return jsonify({"error": "No valid files specified"}), 400 # Create a unique zip file name zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip" zip_path = create_zip(valid_file_paths, zip_name) return send_from_directory(directory='temp', path=zip_name, as_attachment=True) @app.route('/delete_files', methods=['POST']) def delete_files(): file_paths = request.json.get('file_paths', []) if not file_paths: return jsonify({"error": "No files specified"}), 400 errors = [] for file_path in file_paths: full_file_path = os.path.join(app.root_path, file_path.lstrip('/')) print(f"Attempting to delete: {file_path}") # Debugging line print(f"Full path: {full_file_path}") # Debugging line print(f"file_path: {file_path}") # Debugging line # Check if the file is in either the logs or the data files folder if not (full_file_path.startswith(os.path.join(app.root_path, 'log')) or full_file_path.startswith(os.path.join(app.root_path, 'data'))): errors.append({"file": file_path, "error": "File not in allowed directory"}) continue # Check if it's the currently active log file if is_log_file_in_use(file_path): errors.append({"file": file_path, "error": "Cannot delete active log file."}) continue # Check if it's an active data file if is_data_file_in_use(file_path): errors.append({"file": file_path, "error": "Cannot delete active data file."}) continue if not os.path.isfile(full_file_path): errors.append({"file": file_path, "error": "File not found"}) continue try: os.remove(full_file_path) except Exception as e: errors.append({"file": file_path, "error": str(e)}) if errors: return jsonify({"errors": errors}), 207 # Multi-Status response return jsonify({"success": True}), 200 @app.template_filter('datetimeformat') def datetimeformat(value): return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S') def get_size(path): size = os.path.getsize(path) if size < 1024: return f"{size} bytes" elif size < pow(1024,2): return f"{round(size/1024, 2)} KB" elif size < pow(1024,3): return f"{round(size/(pow(1024,2)), 2)} MB" elif size < pow(1024,4): return f"{round(size/(pow(1024,3)), 2)} GB" @app.route('/data/') def download_data_file(filename): return send_from_directory('data', filename) @app.route('/log/') def download_log_file(filename): return send_from_directory('log', filename) @app.route('/config/lines_per_page') def get_lines_per_page(): lines_per_page = config['LOGGING']['VIEW_PAGE_LINES'] return jsonify({"lines_per_page": lines_per_page}) if __name__ == '__main__': app.run(debug=True, threaded=True)