diff --git a/app/api.py b/app/api.py new file mode 100644 index 0000000..7fa8767 --- /dev/null +++ b/app/api.py @@ -0,0 +1,210 @@ +# filepath: /home/michaelb/Dokumente/TornActivityTracker/app/api.py +from flask import jsonify, request, Response, send_from_directory, current_app +import threading +import os +import glob +from datetime import datetime +import pandas as pd + +from app.models import Scraper, generate_statistics +from app.util import create_zip, delete_old_zips, tail, get_size +from app.config import load_config +from app.logging_config import get_logger +from app.forms import ScrapingForm + +config = load_config() +logger = get_logger() +log_file_name = logger.handlers[0].baseFilename + +scraping_thread = None +scraper = None +scrape_lock = threading.Lock() + +def register_api(app): + @app.route('/start_scraping', methods=['POST']) + def start_scraping(): + with scrape_lock: + scraper = current_app.config.get('SCRAPER') + if scraper is not None and scraper.scraping_active: + logger.warning("Can't start scraping process: scraping already in progress") + return jsonify({"status": "Scraping already in progress"}) + + form = ScrapingForm() + if form.validate_on_submit(): + faction_id = form.faction_id.data + fetch_interval = form.fetch_interval.data + run_interval = form.run_interval.data + + scraper = Scraper(faction_id, fetch_interval, run_interval, current_app) + scraper.scraping_active = True + + scraping_thread = threading.Thread(target=scraper.start_scraping) + scraping_thread.daemon = True + scraping_thread.start() + + current_app.config['SCRAPER'] = scraper + current_app.config['SCRAPING_THREAD'] = scraping_thread + + return jsonify({"status": "Scraping started"}) + return jsonify({"status": "Invalid form data"}) + + @app.route('/stop_scraping', methods=['POST']) + def stop_scraping(): + scraper = current_app.config.get('SCRAPER') + if scraper is None or not scraper.scraping_active: + return jsonify({"status": "Scraping is not running"}) + + scraper.stop_scraping() + current_app.config['SCRAPING_ACTIVE'] = False + logger.debug("Scraping stopped by user") + return jsonify({"status": "Scraping stopped"}) + @app.route('/logfile', methods=['GET']) + def logfile(): + page = int(request.args.get('page', 0)) # Page number + lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page + log_file_path = log_file_name # Path to the current log file + + if not os.path.isfile(log_file_path): + logger.error("Log file not found") + return jsonify({"error": "Log file not found"}), 404 + + log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES'])) + + log_lines = log_lines[::-1] # Reverse the list + + start = page * lines_per_page + end = start + lines_per_page + paginated_lines = log_lines[start:end] if start < len(log_lines) else [] + + return jsonify({ + "log": paginated_lines, + "total_lines": len(log_lines), + "pages": (len(log_lines) + lines_per_page - 1) // lines_per_page, + "start_line": len(log_lines) - start + }) + + + @app.route('/download_files', methods=['POST']) + def download_files(): + delete_old_zips() # Clean up old zip files + + file_paths = request.json.get('file_paths') + if not file_paths: + return jsonify({"error": "No files specified"}), 400 + + # Get the absolute path of the parent directory + parent_dir = os.path.abspath(os.path.join(app.root_path, os.pardir)) + + # Validate and correct file paths + valid_file_paths = [] + for file_path in file_paths: + if file_path.startswith('/data/'): + corrected_path = file_path.lstrip('/') + full_path = os.path.join(parent_dir, corrected_path) + if os.path.isfile(full_path): + valid_file_paths.append(full_path) + elif file_path.startswith('/log/'): + corrected_path = file_path.lstrip('/') + full_path = os.path.join(parent_dir, corrected_path) + if os.path.isfile(full_path): + valid_file_paths.append(full_path) + + if not valid_file_paths: + return jsonify({"error": "No valid files specified"}), 400 + + # Create a unique zip file name + zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip" + zip_path = create_zip(valid_file_paths, zip_name) + + # Log the directory and file path for debugging + current_app.logger.debug(f"Sending file from directory: temp, file: {zip_name}") + + return download_tmp_file(zip_name) + + @app.route('/delete_files', methods=['POST']) + def delete_files(): + file_paths = request.json.get('file_paths', []) + + if not file_paths: + return jsonify({"error": "No files specified"}), 400 + + errors = [] + data_dir = os.path.abspath(config['DATA']['DATA_DIR']) + log_dir = os.path.abspath(config['LOGGING']['LOG_DIR']) + + for file_path in file_paths: + if file_path.startswith('/data/'): + full_file_path = os.path.join(data_dir, file_path.lstrip('/data/')) + elif file_path.startswith('/log/'): + full_file_path = os.path.join(log_dir, file_path.lstrip('/log/')) + else: + errors.append({"file": file_path, "error": "File not in allowed directory"}) + continue + + # Check if the file is in either the logs or the data files folder + #if not (full_file_path.startswith(data_dir) or full_file_path.startswith(log_dir)): + # errors.append({"file": file_path, "error": "File not in allowed directory"}) + # continue + + # Check if it's the currently active log file + if full_file_path == log_file_name: + errors.append({"file": file_path, "error": "Cannot delete active log file."}) + continue + + # Check if it's an active data file + if scraper and scraper.data_file_name == full_file_path: + errors.append({"file": file_path, "error": "Cannot delete active data file."}) + continue + + if not os.path.isfile(full_file_path): + errors.append({"file": file_path, "error": "File not found"}) + continue + + try: + os.remove(full_file_path) + except Exception as e: + errors.append({"file": file_path, "error": str(e)}) + + if errors: + return jsonify({"errors": errors}), 207 # Multi-Status response + return jsonify({"success": True}), 200 + + @app.route('/data/') + def download_data_file(filename): + data_dir = os.path.abspath(config['DATA']['DATA_DIR']) + file_path = os.path.join(data_dir, filename) + + return send_from_directory(directory=data_dir, path=filename, as_attachment=True) + + @app.route('/log/') + def download_log_file(filename): + log_dir = os.path.abspath(config['LOGGING']['LOG_DIR']) + file_path = os.path.join(log_dir, filename) + + return send_from_directory(directory=log_dir, path=filename, as_attachment=True) + + @app.route('/tmp/') + def download_tmp_file(filename): + tmp_dir = os.path.abspath(config['TEMP']['TEMP_DIR']) + file_path = os.path.join(tmp_dir, filename) + + return send_from_directory(directory=tmp_dir, path=filename, as_attachment=True) + + + @app.route('/config/lines_per_page') + def get_lines_per_page(): + lines_per_page = config['LOGGING']['VIEW_PAGE_LINES'] + return jsonify({"lines_per_page": lines_per_page}) + + @app.route('/scraping_status', methods=['GET']) + def scraping_status(): + if scraper is None: + logger.debug("Scraper is not initialized.") + return jsonify({"scraping_active": False}) + + if scraper.scraping_active: + logger.debug("Scraping is active.") + return jsonify({"scraping_active": True}) + else: + logger.debug("Scraping is not active.") + return jsonify({"scraping_active": False}) \ No newline at end of file diff --git a/app/app.py b/app/app.py index ccbf241..b9afd09 100644 --- a/app/app.py +++ b/app/app.py @@ -1,470 +1,41 @@ -from flask import Flask, request, render_template, Response, jsonify, url_for -from flask_bootstrap import Bootstrap5 # from package boostrap_flask -from app.forms import ScrapingForm -import requests -import pandas as pd -import time -from datetime import datetime, timedelta -import threading -import logging -from logging.handlers import QueueHandler -from queue import Queue -import os -import glob +from flask import Flask +from flask_bootstrap import Bootstrap5 from datetime import datetime -from flask import send_from_directory -import configparser -import zipfile -import os -from datetime import timedelta +from app.views import register_views +from app.api import register_api +from app.config import load_config +from app.filters import register_filters +def init_app(): + config = load_config() -app = Flask(__name__) + # Initialize app + app = Flask(__name__) -# Load configuration -config = configparser.ConfigParser() -config.read('config.ini') + # Load configuration + app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY'] + app.config['API_KEY'] = config['DEFAULT']['API_KEY'] -app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY'] -API_KEY = config['DEFAULT']['API_KEY'] - -bootstrap = Bootstrap5(app) - -# Move every setting from config['BOOTSTRAP'] to the root level of config -for key in config['BOOTSTRAP']: - key = key.upper() - app.config[key] = config['BOOTSTRAP'][key] - if key == 'SECRET_KEY': - continue - elif key == 'API_KEY': - continue - print(f"Loaded config: {key} = {app.config[key]}") - -# Global state -scraping_active = False -scraping_thread = None -data_file_name = None -log_file_name = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log' - -# Initialize the logger -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) # Adjust as needed - -# Make any logger.info() call go to both the log file and the queue. -# 1) FILE HANDLER -file_handler = logging.FileHandler(log_file_name, mode='w') -file_handler.setLevel(logging.DEBUG) # or INFO, WARNING, etc. -formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s', - datefmt='%m/%d/%Y %I:%M:%S %p') -file_handler.setFormatter(formatter) - -logger.addHandler(file_handler) - -# 2) QUEUE HANDLER -log_queue = Queue() -queue_handler = QueueHandler(log_queue) -queue_handler.setLevel(logging.DEBUG) -logger.addHandler(queue_handler) - - - -def create_zip(file_paths, zip_name): - zip_path = os.path.join(app.root_path, 'temp', zip_name) - with zipfile.ZipFile(zip_path, 'w') as zipf: - for file_path in file_paths: - arcname = os.path.basename(file_path) - zipf.write(file_path, arcname) - return zip_path - -def delete_old_zips(): - temp_dir = os.path.join(app.root_path, 'temp') - now = datetime.now() - for filename in os.listdir(temp_dir): - if filename.endswith('.zip'): - file_path = os.path.join(temp_dir, filename) - file_time = datetime.fromtimestamp(os.path.getmtime(file_path)) - if now - file_time > timedelta(hours=1): - os.remove(file_path) - logger.info(f"Deleted old zip file: {filename}") - -def fetch_faction_data(faction_id): - url = f"https://api.torn.com/faction/{faction_id}?selections=&key={API_KEY}" - response = requests.get(url) - if response.status_code == 200: - logger.info(f"Fetched data for faction ID {faction_id}") - return response.json() - else: - logger.warning(f"Failed to fetch faction data for faction ID {faction_id}") - return None - -def fetch_user_activity(user_id): - url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}" - response = requests.get(url) - if response.status_code == 200: - return response.json() - else: - logger.error(f"Failed to fetch user activity for user ID {user_id}") - return None - -def scrape_data(faction_id, fetch_interval, run_interval): - global scraping_active - global data_file_name + app.config['DATA'] = config['DATA'] + app.config['TEMP'] = config['TEMP'] + app.config['LOGGING'] = config['LOGGING'] - end_time = datetime.now() + timedelta(days=run_interval) - data_file_name = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv" + # Move bootstrap settings to root level + for key in config['BOOTSTRAP']: + app.config[key.upper()] = config['BOOTSTRAP'][key] - while datetime.now() < end_time and scraping_active: - logger.info(f"Fetching data at {datetime.now()}") - faction_data = fetch_faction_data(faction_id) - if faction_data and 'members' in faction_data: - user_activity_data = [] - for user_id, user_info in faction_data['members'].items(): - user_activity = fetch_user_activity(user_id) - if user_activity: - user_activity_data.append({ - 'user_id': user_id, - 'name': user_activity.get('name', ''), - 'last_action': user_activity.get('last_action', {}).get('timestamp', 0), - 'stadata_file_nametus': user_activity.get('status', {}).get('state', ''), - 'timestamp': datetime.now().timestamp() - }) - logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})") + bootstrap = Bootstrap5(app) - # Append data to the file - df = pd.DataFrame(user_activity_data) - df['last_action'] = pd.to_datetime(df['last_action'], unit='s') - df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + # Initialize global variables + app.config['SCRAPING_ACTIVE'] = False + app.config['SCRAPING_THREAD'] = None + app.config['DATA_FILE_NAME'] = None + app.config['LOG_FILE_NAME'] = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log' - if not os.path.isfile(data_file_name): - df.to_csv(data_file_name, index=False) - else: - df.to_csv(data_file_name, mode='a', header=False, index=False) - - logger.info(f"Data appended to {data_file_name}") - - time.sleep(fetch_interval) - else: - if datetime.now() < end_time: - logger.warning(f"Scraping stopped at {datetime.now()}") - elif scraping_active == False: - logger.warning(f"Scraping stopped at {datetime.now()} due to user request") - else: - logger.error(f"Scraping stopped due to timeout at {datetime.now()}") - logger.info("Scraping completed.") - scraping_active = False - -def generate_statistics(df): - df['hour'] = df['timestamp'].dt.hour - activity_by_hour = df.groupby('hour').size() - return activity_by_hour - -# Taken from: -# https://gist.github.com/amitsaha/5990310?permalink_comment_id=3017951#gistcomment-3017951 -def tail(filename, n): - stat = os.stat(filename) - n = int(n) - if stat.st_size == 0 or n == 0: - yield '' - return - - page_size = int(config['LOGGING']['TAIL_PAGE_SIZE']) - offsets = [] - count = _n = n if n >= 0 else -n - - last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1 - - with open(filename, 'r') as f: - while count > 0: - starting_byte = last_byte_read - page_size - if last_byte_read == 0: - offsets.append(0) - break - elif starting_byte < 0: - f.seek(0) - text = f.read(last_byte_read) - else: - f.seek(starting_byte) - text = f.read(page_size) - - for i in range(-1, -1*len(text)-1, -1): - last_byte_read -= 1 - if text[i] == '\n': - last_nl_byte = last_byte_read - starting_offset = last_nl_byte + 1 - offsets.append(starting_offset) - count -= 1 - - offsets = offsets[len(offsets)-_n:] - offsets.reverse() - - with open(filename, 'r') as f: - for i, offset in enumerate(offsets): - f.seek(offset) - - if i == len(offsets) - 1: - yield f.read() - else: - bytes_to_read = offsets[i+1] - offset - yield f.read(bytes_to_read) - -def is_data_file_in_use(filename): - if(data_file_name == None): - return False - if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, data_file_name.lstrip('/')) and scraping_active: - return True - return False - -@app.route('/is_data_file_in_use/') -def is_data_file_in_use_json(filename): - return jsonify(is_data_file_in_use(filename)) - -def is_log_file_in_use(filename): - if(log_file_name == None): - return False - if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, log_file_name.lstrip('/')): - return True - return False - -@app.route('/is_log_file_in_use/') -def is_log_file_in_use_json(filename): - print(filename) - return jsonify(is_log_file_in_use(filename)) - -@app.route('/') -def index(): - form = ScrapingForm() - return render_template('index.html', form=form) - -@app.route('/start_scraping', methods=['POST']) -def start_scraping(): - global scraping_active, scraping_thread - form = ScrapingForm() - if form.validate_on_submit(): - if scraping_active: - logger.warning("Can't start scraping process: scraping already in progress") - return jsonify({"status": "Scraping already in progress"}) - - scraping_active = True - - faction_id = form.faction_id.data - fetch_interval = form.fetch_interval.data - run_interval = form.run_interval.data - - # Start scraping in a separate thread - scraping_thread = threading.Thread(target=scrape_data, args=(faction_id, fetch_interval, run_interval)) - scraping_thread.daemon = True - scraping_thread.start() - - return jsonify({"status": "Scraping started"}) - return jsonify({"status": "Invalid form data"}) - -@app.route('/stop_scraping', methods=['POST']) -def stop_scraping(): - global scraping_active - if not scraping_active: - return jsonify({"status": "No scraping in progress"}) + # Register routes + register_views(app) + register_api(app) + register_filters(app) - scraping_active = False - logger.debug("scraping_active set to False") - return jsonify({"status": "Scraping stopped"}) - -@app.route('/scraping_status', methods=['GET']) -def scraping_status(): - global scraping_active - logger.debug(f"scraping_status called: scraping_active = {scraping_active}") - return jsonify({"scraping_active": scraping_active}) - -@app.route('/logs') -def logs(): - def generate(): - while True: - if not log_queue.empty(): - log = log_queue.get().getMessage() - yield f"data: {log}\n\n" - time.sleep(0.1) - return Response(generate(), mimetype='text/event-stream') - -@app.route('/logfile', methods=['GET']) -def logfile(): - page = int(request.args.get('page', 0)) # Page number - lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page - log_file_path = log_file_name # Path to the current log file - - if not os.path.isfile(log_file_path): - logging.error("Log file not found") - return jsonify({"error": "Log file not found"}), 404 - - log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES'])) - - log_lines = log_lines[::-1] # Reverse the list - - start = page * lines_per_page - end = start + lines_per_page - paginated_lines = log_lines[start:end] if start < len(log_lines) else [] - - return jsonify({ - "log": paginated_lines, - "total_lines": len(log_lines), - "pages": (len(log_lines) + lines_per_page - 1) // lines_per_page, - "start_line": len(log_lines) - start # Starting line number for the current page - }) - -@app.route('/results') -def results(): - # Assuming the scraping is done and data is saved somewhere - faction_id = request.args.get('faction_id') - filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv" - if os.path.isfile(filename): - df = pd.read_csv(filename) - stats = generate_statistics(df) - return render_template('results.html', stats=stats.to_dict()) - else: - return "No data found." - -@app.route('/analyze') -def analyze(): - return render_template('analyze.html'); - -@app.route('/log_viewer') -def log_viewer(): - return render_template('log_viewer.html'); - -@app.route('/download_results') -def download_results(): - data_files = glob.glob("data/*.csv") - log_files = glob.glob("log/*.log") - - def get_file_info(file_path): - return { - "name": file_path, - "name_display": os.path.basename(file_path), - "last_modified": os.path.getmtime(file_path), - "created": os.path.getctime(file_path), - "size": get_size(file_path) - } - - data_files_info = [get_file_info(file) for file in data_files] - log_files_info = [get_file_info(file) for file in log_files] - - for data_file in data_files_info: - if is_data_file_in_use(data_file['name']): - data_file['active'] = True - else: - data_file['active'] = False - - for log_file in log_files_info: - if is_log_file_in_use(log_file['name']): - log_file['active'] = True - else: - log_file['active'] = False - - files = {"data": data_files_info, "log": log_files_info} - - return render_template('download_results.html', files=files) - -@app.route('/download_files', methods=['POST']) -def download_files(): - delete_old_zips() # Clean up old zip files - - file_paths = request.json.get('file_paths', []) - - if not file_paths: - return jsonify({"error": "No files specified"}), 400 - - # Validate and correct file paths - valid_file_paths = [] - for file_path in file_paths: - if file_path.startswith('/data/'): - corrected_path = file_path.lstrip('/') - full_path = os.path.join(app.root_path, corrected_path) - if os.path.isfile(full_path): - valid_file_paths.append(full_path) - elif file_path.startswith('/log/'): - corrected_path = file_path.lstrip('/') - full_path = os.path.join(app.root_path, corrected_path) - if os.path.isfile(full_path): - valid_file_paths.append(full_path) - - if not valid_file_paths: - return jsonify({"error": "No valid files specified"}), 400 - - # Create a unique zip file name - zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip" - zip_path = create_zip(valid_file_paths, zip_name) - - return send_from_directory(directory='temp', path=zip_name, as_attachment=True) - -@app.route('/delete_files', methods=['POST']) -def delete_files(): - file_paths = request.json.get('file_paths', []) - - if not file_paths: - return jsonify({"error": "No files specified"}), 400 - - errors = [] - for file_path in file_paths: - full_file_path = os.path.join(app.root_path, file_path.lstrip('/')) - print(f"Attempting to delete: {file_path}") # Debugging line - print(f"Full path: {full_file_path}") # Debugging line - print(f"file_path: {file_path}") # Debugging line - - # Check if the file is in either the logs or the data files folder - if not (full_file_path.startswith(os.path.join(app.root_path, 'log')) or - full_file_path.startswith(os.path.join(app.root_path, 'data'))): - errors.append({"file": file_path, "error": "File not in allowed directory"}) - continue - - # Check if it's the currently active log file - if is_log_file_in_use(file_path): - errors.append({"file": file_path, "error": "Cannot delete active log file."}) - continue - - # Check if it's an active data file - if is_data_file_in_use(file_path): - errors.append({"file": file_path, "error": "Cannot delete active data file."}) - continue - - if not os.path.isfile(full_file_path): - errors.append({"file": file_path, "error": "File not found"}) - continue - - try: - os.remove(full_file_path) - except Exception as e: - errors.append({"file": file_path, "error": str(e)}) - - if errors: - return jsonify({"errors": errors}), 207 # Multi-Status response - return jsonify({"success": True}), 200 - -@app.template_filter('datetimeformat') -def datetimeformat(value): - return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S') - -def get_size(path): - size = os.path.getsize(path) - if size < 1024: - return f"{size} bytes" - elif size < pow(1024,2): - return f"{round(size/1024, 2)} KB" - elif size < pow(1024,3): - return f"{round(size/(pow(1024,2)), 2)} MB" - elif size < pow(1024,4): - return f"{round(size/(pow(1024,3)), 2)} GB" - -@app.route('/data/') -def download_data_file(filename): - return send_from_directory('data', filename) - -@app.route('/log/') -def download_log_file(filename): - return send_from_directory('log', filename) - -@app.route('/config/lines_per_page') -def get_lines_per_page(): - lines_per_page = config['LOGGING']['VIEW_PAGE_LINES'] - return jsonify({"lines_per_page": lines_per_page}) - -if __name__ == '__main__': - app.run(debug=True, threaded=True) \ No newline at end of file + return app \ No newline at end of file diff --git a/app/config.py b/app/config.py index e69de29..96fd0be 100644 --- a/app/config.py +++ b/app/config.py @@ -0,0 +1,7 @@ +import configparser +import os + +def load_config(): + config = configparser.ConfigParser() + config.read(os.path.join(os.path.dirname(__file__), '..', 'config.ini')) + return config \ No newline at end of file diff --git a/app/filters.py b/app/filters.py new file mode 100644 index 0000000..a5e78c6 --- /dev/null +++ b/app/filters.py @@ -0,0 +1,7 @@ +from flask import Blueprint, request, jsonify +from datetime import datetime + +def register_filters(app): + @app.template_filter('datetimeformat') + def datetimeformat(value): + return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S') \ No newline at end of file diff --git a/app/logging_config.py b/app/logging_config.py new file mode 100644 index 0000000..f407007 --- /dev/null +++ b/app/logging_config.py @@ -0,0 +1,39 @@ +import logging +from logging.handlers import QueueHandler +from queue import Queue +import os +from datetime import datetime + +from app.config import load_config + +config = load_config() + +# Define the log directory and ensure it exists +LOG_DIR = config['LOGGING']['LOG_DIR'] +if not os.path.exists(LOG_DIR): + os.makedirs(LOG_DIR) + +# Generate the log filename dynamically +log_file_name = os.path.join(LOG_DIR, datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log') + +# Initialize the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +# File handler +file_handler = logging.FileHandler(log_file_name, mode='w') +file_handler.setLevel(logging.DEBUG) +formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s', + datefmt='%m/%d/%Y %I:%M:%S %p') +file_handler.setFormatter(formatter) +logger.addHandler(file_handler) + +# Queue handler for real-time logging +log_queue = Queue() +queue_handler = QueueHandler(log_queue) +queue_handler.setLevel(logging.DEBUG) +logger.addHandler(queue_handler) + +# Function to get logger in other modules +def get_logger(): + return logger diff --git a/app/models.py b/app/models.py index e69de29..274bcd8 100644 --- a/app/models.py +++ b/app/models.py @@ -0,0 +1,96 @@ +import requests +import pandas as pd +import os +import time +from datetime import datetime, timedelta + +from app.logging_config import get_logger + +from app.config import load_config + +config = load_config() +API_KEY = config['DEFAULT']['API_KEY'] + +logger = get_logger() + +class Scraper: + def __init__(self, faction_id, fetch_interval, run_interval, app): + self.faction_id = faction_id + self.fetch_interval = fetch_interval + self.run_interval = run_interval + self.end_time = datetime.now() + timedelta(days=run_interval) + self.data_file_name = os.path.join(app.config['DATA']['DATA_DIR'], f"{self.faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv") + self.scraping_active = False + + print(self.data_file_name) + + def fetch_faction_data(self): + url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={API_KEY}" + response = requests.get(url) + if response.status_code == 200: + return response.json() + logger.warning(f"Failed to fetch faction data for faction ID {self.faction_id}") + return None + + def fetch_user_activity(self, user_id): + url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}" + response = requests.get(url) + if response.status_code == 200: + return response.json() + logger.error(f"Failed to fetch user activity for user ID {user_id}") + return None + + def start_scraping(self): + self.scraping_active = True + logger.info(f"Starting scraping process for faction ID {self.faction_id}") + logger.debug(f"Fetch interval: {self.fetch_interval}") + logger.debug(f"Run interval: {self.run_interval}") + logger.debug(f"End time: {self.end_time}") + + while datetime.now() < self.end_time and self.scraping_active: + logger.info(f"Fetching data at {datetime.now()}") + faction_data = self.fetch_faction_data() + if faction_data and 'members' in faction_data: + user_activity_data = [] + for user_id, user_info in faction_data['members'].items(): + user_activity = self.fetch_user_activity(user_id) + if user_activity: + user_activity_data.append({ + 'user_id': user_id, + 'name': user_activity.get('name', ''), + 'last_action': user_activity.get('last_action', {}).get('timestamp', 0), + 'status': user_activity.get('status', {}).get('state', ''), + 'timestamp': datetime.now().timestamp() + }) + logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})") + + # Append data to the file + df = pd.DataFrame(user_activity_data) + df['last_action'] = pd.to_datetime(df['last_action'], unit='s') + df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + + if not os.path.isfile(self.data_file_name): + df.to_csv(self.data_file_name, index=False) + else: + df.to_csv(self.data_file_name, mode='a', header=False, index=False) + + logger.info(f"Data appended to {self.data_file_name}") + + time.sleep(self.fetch_interval) + else: + if datetime.now() < self.end_time: + logger.warning(f"Scraping stopped at {datetime.now()} because of timeout ({self.run_interval} days, end time: {self.end_time})") + elif not self.scraping_active: + logger.warning(f"Scraping stopped at {datetime.now()} due to user request") + else: + logger.error(f"Scraping stopped due to timeout at {datetime.now()}") + logger.info("Scraping completed.") + self.scraping_active = False + + def stop_scraping(self): + self.scraping_active = False + logger.debug("Scraping stopped by user") + +def generate_statistics(df): + df['hour'] = df['timestamp'].dt.hour # No need to convert timestamp again + return df.groupby('hour').size() # Activity by hour diff --git a/app/state.py b/app/state.py new file mode 100644 index 0000000..a929a53 --- /dev/null +++ b/app/state.py @@ -0,0 +1,2 @@ +data_file_name = None +log_file_name = None \ No newline at end of file diff --git a/app/templates/download_results.html b/app/templates/download_results.html index d705991..644a825 100644 --- a/app/templates/download_results.html +++ b/app/templates/download_results.html @@ -30,7 +30,7 @@ {% for file in files.data %} - + {{ file.name_display }} {{ file.last_modified | datetimeformat }} {{ file.created | datetimeformat }} diff --git a/app/util.py b/app/util.py index e69de29..56bff4c 100644 --- a/app/util.py +++ b/app/util.py @@ -0,0 +1,85 @@ +import os +import zipfile +from datetime import datetime, timedelta + +from app.state import data_file_name, log_file_name + +from app.config import load_config + +config = load_config() + +def create_zip(file_paths, zip_name, app): + temp_dir = os.path.abspath(app.config['TEMP']['TEMP_DIR']) + zip_path = os.path.join(temp_dir, zip_name) + with zipfile.ZipFile(zip_path, 'w') as zipf: + for file_path in file_paths: + zipf.write(file_path, os.path.basename(file_path)) + print(f"Zip file created: {zip_path}") + return zip_path + +def delete_old_zips(): + temp_dir = os.path.abspath(config['TEMP']['TEMP_DIR']) + now = datetime.now() + for filename in os.listdir(temp_dir): + if filename.endswith('.zip'): + file_path = os.path.join(temp_dir, filename) + if now - datetime.fromtimestamp(os.path.getmtime(file_path)) > timedelta(hours=1): + os.remove(file_path) + +def tail(filename, n): + stat = os.stat(filename) + n = int(n) + if stat.st_size == 0 or n == 0: + yield '' + return + + page_size = int(config['LOGGING']['TAIL_PAGE_SIZE']) + offsets = [] + count = _n = n if n >= 0 else -n + + last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1 + + with open(filename, 'r') as f: + while count > 0: + starting_byte = last_byte_read - page_size + if last_byte_read == 0: + offsets.append(0) + break + elif starting_byte < 0: + f.seek(0) + text = f.read(last_byte_read) + else: + f.seek(starting_byte) + text = f.read(page_size) + + for i in range(-1, -1*len(text)-1, -1): + last_byte_read -= 1 + if text[i] == '\n': + last_nl_byte = last_byte_read + starting_offset = last_nl_byte + 1 + offsets.append(starting_offset) + count -= 1 + + offsets = offsets[len(offsets)-_n:] + offsets.reverse() + + with open(filename, 'r') as f: + for i, offset in enumerate(offsets): + f.seek(offset) + + if i == len(offsets) - 1: + yield f.read() + else: + bytes_to_read = offsets[i+1] - offset + yield f.read(bytes_to_read) + +def get_size(path): + size = os.path.getsize(path) + if size < 1024: + return f"{size} bytes" + elif size < pow(1024,2): + return f"{round(size/1024, 2)} KB" + elif size < pow(1024,3): + return f"{round(size/(pow(1024,2)), 2)} MB" + elif size < pow(1024,4): + return f"{round(size/(pow(1024,3)), 2)} GB" \ No newline at end of file diff --git a/app/views.py b/app/views.py index e69de29..e4f5032 100644 --- a/app/views.py +++ b/app/views.py @@ -0,0 +1,82 @@ +import os +import glob +from flask import render_template + +from app.forms import ScrapingForm +from app.util import get_size +from app.config import load_config +from app.api import scraper as scraper# Import the scraper instance +from app.logging_config import get_logger + +from app.state import log_file_name + +print(f"A imported log_file_name: {log_file_name}") + +config = load_config() +logger = get_logger() + +def register_views(app): + @app.route('/') + def index(): + form = ScrapingForm() + return render_template('index.html', form=form) + + @app.route('/results') + def results(): + return render_template('results.html') + + @app.route('/analyze') + def analyze(): + return render_template('analyze.html') + + @app.route('/log_viewer') + def log_viewer(): + return render_template('log_viewer.html') + + @app.route('/download_results') + def download_results(): + log_file_name = os.path.abspath(app.config['LOG_FILE_NAME']) + scraper = app.config.get('SCRAPER') + + if scraper: + print(scraper.data_file_name) + if not scraper: + print("Scraper not initialized") + + data_dir = os.path.abspath(config['DATA']['DATA_DIR']) + log_dir = os.path.abspath(config['LOGGING']['LOG_DIR']) + + data_files = glob.glob(os.path.join(data_dir, "*.csv")) + log_files = glob.glob(os.path.join(log_dir, "*.log")) + + def get_file_info(file_path): + return { + "name": file_path, + "name_display": os.path.basename(file_path), + "last_modified": os.path.getmtime(file_path), + "created": os.path.getctime(file_path), + "size": get_size(file_path) + } + + data_files_info = [get_file_info(file) for file in data_files] + log_files_info = [get_file_info(file) for file in log_files] + + if scraper and scraper.scraping_active: + for data_file in data_files_info: + if os.path.abspath(scraper.data_file_name) == data_file['name']: + data_file['active'] = True + else: + data_file['active'] = False + + for log_file in log_files_info: + if log_file_name == os.path.abspath(log_file['name']): + log_file['active'] = True + else: + log_file['active'] = False + + data_files_info.sort(key=lambda x: x['last_modified'], reverse=True) + log_files_info.sort(key=lambda x: x['last_modified'], reverse=True) + + files = {"data": data_files_info, "log": log_files_info} + + return render_template('download_results.html', files=files) \ No newline at end of file diff --git a/run.py b/run.py index ca5709b..59e2eb8 100644 --- a/run.py +++ b/run.py @@ -1,4 +1,5 @@ -from app.app import app +from app.app import init_app if __name__ == '__main__': + app = init_app() app.run(debug=True, threaded=True) \ No newline at end of file