1st restructure

2025-02-06 23:27:30 +01:00
parent f552601c4b
commit ceebbafed5
24 changed files with 56 additions and 198 deletions
--- a/app/app.py
+++ b/app/app.py
@@ -0,0 +1,470 @@
+from flask import Flask, request, render_template, Response, jsonify, url_for
+from flask_bootstrap import Bootstrap5 # from package boostrap_flask
+from app.forms import ScrapingForm
+import requests
+import pandas as pd
+import time
+from datetime import datetime, timedelta
+import threading
+import logging
+from logging.handlers import QueueHandler
+from queue import Queue
+import os
+import glob
+from datetime import datetime
+from flask import send_from_directory
+import configparser
+
+import zipfile
+import os
+from datetime import timedelta
+
+
+app = Flask(__name__)
+
+# Load configuration
+config = configparser.ConfigParser()
+config.read('config.ini')
+
+app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
+API_KEY = config['DEFAULT']['API_KEY']
+
+bootstrap = Bootstrap5(app)
+
+# Move every setting from config['BOOTSTRAP'] to the root level of config
+for key in config['BOOTSTRAP']:
+    key = key.upper()
+    app.config[key] = config['BOOTSTRAP'][key]
+    if key == 'SECRET_KEY':
+        continue
+    elif key == 'API_KEY':
+        continue
+    print(f"Loaded config: {key} = {app.config[key]}")
+
+# Global state
+scraping_active = False
+scraping_thread = None
+data_file_name = None
+log_file_name = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
+
+# Initialize the logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)  # Adjust as needed
+
+# Make any logger.info() call go to both the log file and the queue.
+# 1) FILE HANDLER
+file_handler = logging.FileHandler(log_file_name, mode='w')
+file_handler.setLevel(logging.DEBUG)  # or INFO, WARNING, etc.
+formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s',
+                              datefmt='%m/%d/%Y %I:%M:%S %p')
+file_handler.setFormatter(formatter)
+
+logger.addHandler(file_handler)
+
+# 2) QUEUE HANDLER
+log_queue = Queue()
+queue_handler = QueueHandler(log_queue)
+queue_handler.setLevel(logging.DEBUG)
+logger.addHandler(queue_handler)
+
+
+
+def create_zip(file_paths, zip_name):
+    zip_path = os.path.join(app.root_path, 'temp', zip_name)
+    with zipfile.ZipFile(zip_path, 'w') as zipf:
+        for file_path in file_paths:
+            arcname = os.path.basename(file_path)
+            zipf.write(file_path, arcname)
+    return zip_path
+
+def delete_old_zips():
+    temp_dir = os.path.join(app.root_path, 'temp')
+    now = datetime.now()
+    for filename in os.listdir(temp_dir):
+        if filename.endswith('.zip'):
+            file_path = os.path.join(temp_dir, filename)
+            file_time = datetime.fromtimestamp(os.path.getmtime(file_path))
+            if now - file_time > timedelta(hours=1):
+                os.remove(file_path)
+                logger.info(f"Deleted old zip file: {filename}")
+
+def fetch_faction_data(faction_id):
+    url = f"https://api.torn.com/faction/{faction_id}?selections=&key={API_KEY}"
+    response = requests.get(url)
+    if response.status_code == 200:
+        logger.info(f"Fetched data for faction ID {faction_id}")
+        return response.json()
+    else:
+        logger.warning(f"Failed to fetch faction data for faction ID {faction_id}")
+        return None
+
+def fetch_user_activity(user_id):
+    url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
+    response = requests.get(url)
+    if response.status_code == 200:
+        return response.json()
+    else:
+        logger.error(f"Failed to fetch user activity for user ID {user_id}")
+        return None
+
+def scrape_data(faction_id, fetch_interval, run_interval):
+    global scraping_active
+    global data_file_name
+    
+    end_time = datetime.now() + timedelta(days=run_interval)
+    data_file_name = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
+
+    while datetime.now() < end_time and scraping_active:
+        logger.info(f"Fetching data at {datetime.now()}")
+        faction_data = fetch_faction_data(faction_id)
+        if faction_data and 'members' in faction_data:
+            user_activity_data = []
+            for user_id, user_info in faction_data['members'].items():
+                user_activity = fetch_user_activity(user_id)
+                if user_activity:
+                    user_activity_data.append({
+                        'user_id': user_id,
+                        'name': user_activity.get('name', ''),
+                        'last_action': user_activity.get('last_action', {}).get('timestamp', 0),
+                        'stadata_file_nametus': user_activity.get('status', {}).get('state', ''),
+                        'timestamp': datetime.now().timestamp()
+                    })
+                    logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
+
+            # Append data to the file
+            df = pd.DataFrame(user_activity_data)
+            df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
+            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
+
+            if not os.path.isfile(data_file_name):
+                df.to_csv(data_file_name, index=False)
+            else:
+                df.to_csv(data_file_name, mode='a', header=False, index=False)
+
+            logger.info(f"Data appended to {data_file_name}")
+
+        time.sleep(fetch_interval)
+    else:
+        if datetime.now() < end_time:
+            logger.warning(f"Scraping stopped at {datetime.now()}")
+        elif scraping_active == False:
+            logger.warning(f"Scraping stopped at {datetime.now()} due to user request")
+        else:
+            logger.error(f"Scraping stopped due to timeout at {datetime.now()}")
+    logger.info("Scraping completed.")
+    scraping_active = False
+
+def generate_statistics(df):
+    df['hour'] = df['timestamp'].dt.hour
+    activity_by_hour = df.groupby('hour').size()
+    return activity_by_hour
+
+# Taken from:
+# https://gist.github.com/amitsaha/5990310?permalink_comment_id=3017951#gistcomment-3017951
+def tail(filename, n):
+    stat = os.stat(filename)
+    n = int(n)
+    if stat.st_size == 0 or n == 0:
+        yield ''
+        return
+
+    page_size = int(config['LOGGING']['TAIL_PAGE_SIZE'])
+    offsets = []
+    count = _n = n if n >= 0 else -n
+
+    last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1
+
+    with open(filename, 'r') as f:
+        while count > 0:
+            starting_byte = last_byte_read - page_size
+            if last_byte_read == 0:
+                offsets.append(0)
+                break
+            elif starting_byte < 0:
+                f.seek(0)
+                text = f.read(last_byte_read)
+            else:
+                f.seek(starting_byte)
+                text = f.read(page_size)
+
+            for i in range(-1, -1*len(text)-1, -1):
+                last_byte_read -= 1
+                if text[i] == '\n':
+                    last_nl_byte = last_byte_read
+                    starting_offset = last_nl_byte + 1
+                    offsets.append(starting_offset)
+            count -= 1
+
+    offsets = offsets[len(offsets)-_n:]
+    offsets.reverse()
+
+    with open(filename, 'r') as f:
+        for i, offset in enumerate(offsets):
+            f.seek(offset)
+
+            if i == len(offsets) - 1:
+                yield f.read()
+            else:
+                bytes_to_read = offsets[i+1] - offset
+                yield f.read(bytes_to_read)
+
+def is_data_file_in_use(filename):
+    if(data_file_name == None):
+        return False
+    if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, data_file_name.lstrip('/')) and scraping_active:
+        return True
+    return False
+
+@app.route('/is_data_file_in_use/<path:filename>')
+def is_data_file_in_use_json(filename):
+    return jsonify(is_data_file_in_use(filename))
+
+def is_log_file_in_use(filename):
+    if(log_file_name == None):
+        return False
+    if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, log_file_name.lstrip('/')):
+        return True
+    return False
+
+@app.route('/is_log_file_in_use/<path:filename>')
+def is_log_file_in_use_json(filename):
+    print(filename)
+    return jsonify(is_log_file_in_use(filename))
+
+@app.route('/')
+def index():
+    form = ScrapingForm()
+    return render_template('index.html', form=form)
+
+@app.route('/start_scraping', methods=['POST'])
+def start_scraping():
+    global scraping_active, scraping_thread
+    form = ScrapingForm()
+    if form.validate_on_submit():
+        if scraping_active:
+            logger.warning("Can't start scraping process: scraping already in progress")
+            return jsonify({"status": "Scraping already in progress"})
+        
+        scraping_active = True
+        
+        faction_id = form.faction_id.data
+        fetch_interval = form.fetch_interval.data
+        run_interval = form.run_interval.data
+
+        # Start scraping in a separate thread
+        scraping_thread = threading.Thread(target=scrape_data, args=(faction_id, fetch_interval, run_interval))
+        scraping_thread.daemon = True
+        scraping_thread.start()
+
+        return jsonify({"status": "Scraping started"})
+    return jsonify({"status": "Invalid form data"})
+
+@app.route('/stop_scraping', methods=['POST'])
+def stop_scraping():
+    global scraping_active
+    if not scraping_active:
+        return jsonify({"status": "No scraping in progress"})
+    
+    scraping_active = False
+    logger.debug("scraping_active set to False")
+    return jsonify({"status": "Scraping stopped"})
+
+@app.route('/scraping_status', methods=['GET'])
+def scraping_status():
+    global scraping_active
+    logger.debug(f"scraping_status called: scraping_active = {scraping_active}")
+    return jsonify({"scraping_active": scraping_active})
+
+@app.route('/logs')
+def logs():
+    def generate():
+        while True:
+            if not log_queue.empty():
+                log = log_queue.get().getMessage()
+                yield f"data: {log}\n\n"
+            time.sleep(0.1)
+    return Response(generate(), mimetype='text/event-stream')
+
+@app.route('/logfile', methods=['GET'])
+def logfile():
+    page = int(request.args.get('page', 0))  # Page number
+    lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES']))  # Lines per page
+    log_file_path = log_file_name  # Path to the current log file
+
+    if not os.path.isfile(log_file_path):
+        logging.error("Log file not found")
+        return jsonify({"error": "Log file not found"}), 404
+
+    log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES']))
+    
+    log_lines = log_lines[::-1]  # Reverse the list
+    
+    start = page * lines_per_page
+    end = start + lines_per_page
+    paginated_lines = log_lines[start:end] if start < len(log_lines) else []
+
+    return jsonify({
+        "log": paginated_lines,
+        "total_lines": len(log_lines),
+        "pages": (len(log_lines) + lines_per_page - 1) // lines_per_page,
+        "start_line": len(log_lines) - start  # Starting line number for the current page
+    })
+
+@app.route('/results')
+def results():
+    # Assuming the scraping is done and data is saved somewhere
+    faction_id = request.args.get('faction_id')
+    filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
+    if os.path.isfile(filename):
+        df = pd.read_csv(filename)
+        stats = generate_statistics(df)
+        return render_template('results.html', stats=stats.to_dict())
+    else:
+        return "No data found."
+
+@app.route('/analyze')
+def analyze():
+    return render_template('analyze.html');
+
+@app.route('/log_viewer')
+def log_viewer():
+    return render_template('log_viewer.html');
+
+@app.route('/download_results')
+def download_results():
+    data_files = glob.glob("data/*.csv")
+    log_files = glob.glob("log/*.log")
+    
+    def get_file_info(file_path):
+        return {
+            "name": file_path,
+            "name_display": os.path.basename(file_path),
+            "last_modified": os.path.getmtime(file_path),
+            "created": os.path.getctime(file_path),
+            "size": get_size(file_path)
+        }
+       
+    data_files_info = [get_file_info(file) for file in data_files]
+    log_files_info = [get_file_info(file) for file in log_files]
+    
+    for data_file in data_files_info:
+        if is_data_file_in_use(data_file['name']):
+            data_file['active'] = True
+        else:
+            data_file['active'] = False
+    
+    for log_file in log_files_info:
+        if is_log_file_in_use(log_file['name']):
+            log_file['active'] = True
+        else:
+            log_file['active'] = False
+    
+    files = {"data": data_files_info, "log": log_files_info}
+    
+    return render_template('download_results.html', files=files)
+
+@app.route('/download_files', methods=['POST'])
+def download_files():
+    delete_old_zips()  # Clean up old zip files
+    
+    file_paths = request.json.get('file_paths', [])
+    
+    if not file_paths:
+        return jsonify({"error": "No files specified"}), 400
+    
+    # Validate and correct file paths
+    valid_file_paths = []
+    for file_path in file_paths:
+        if file_path.startswith('/data/'):
+            corrected_path = file_path.lstrip('/')
+            full_path = os.path.join(app.root_path, corrected_path)
+            if os.path.isfile(full_path):
+                valid_file_paths.append(full_path)
+        elif file_path.startswith('/log/'):
+            corrected_path = file_path.lstrip('/')
+            full_path = os.path.join(app.root_path, corrected_path)
+            if os.path.isfile(full_path):
+                valid_file_paths.append(full_path)
+    
+    if not valid_file_paths:
+        return jsonify({"error": "No valid files specified"}), 400
+    
+    # Create a unique zip file name
+    zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip"
+    zip_path = create_zip(valid_file_paths, zip_name)
+    
+    return send_from_directory(directory='temp', path=zip_name, as_attachment=True)
+
+@app.route('/delete_files', methods=['POST'])
+def delete_files():
+    file_paths = request.json.get('file_paths', [])
+    
+    if not file_paths:
+        return jsonify({"error": "No files specified"}), 400
+    
+    errors = []
+    for file_path in file_paths:
+        full_file_path = os.path.join(app.root_path, file_path.lstrip('/'))
+        print(f"Attempting to delete: {file_path}")  # Debugging line
+        print(f"Full path: {full_file_path}")  # Debugging line
+        print(f"file_path: {file_path}")  # Debugging line
+
+        # Check if the file is in either the logs or the data files folder
+        if not (full_file_path.startswith(os.path.join(app.root_path, 'log')) or
+                full_file_path.startswith(os.path.join(app.root_path, 'data'))):
+            errors.append({"file": file_path, "error": "File not in allowed directory"})
+            continue
+
+        # Check if it's the currently active log file
+        if is_log_file_in_use(file_path):
+            errors.append({"file": file_path, "error": "Cannot delete active log file."})
+            continue
+        
+        # Check if it's an active data file
+        if is_data_file_in_use(file_path):
+            errors.append({"file": file_path, "error": "Cannot delete active data file."})
+            continue
+
+        if not os.path.isfile(full_file_path):
+            errors.append({"file": file_path, "error": "File not found"})
+            continue
+
+        try:
+            os.remove(full_file_path)
+        except Exception as e:
+            errors.append({"file": file_path, "error": str(e)})
+
+    if errors:
+        return jsonify({"errors": errors}), 207  # Multi-Status response
+    return jsonify({"success": True}), 200
+
+@app.template_filter('datetimeformat')
+def datetimeformat(value):
+    return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
+
+def get_size(path):
+        size = os.path.getsize(path)
+        if size < 1024:
+            return f"{size} bytes"
+        elif size < pow(1024,2):
+            return f"{round(size/1024, 2)} KB"
+        elif size < pow(1024,3):
+            return f"{round(size/(pow(1024,2)), 2)} MB"
+        elif size < pow(1024,4):
+            return f"{round(size/(pow(1024,3)), 2)} GB"
+
+@app.route('/data/<path:filename>')
+def download_data_file(filename):
+    return send_from_directory('data', filename)
+
+@app.route('/log/<path:filename>')
+def download_log_file(filename):
+    return send_from_directory('log', filename)
+
+@app.route('/config/lines_per_page')
+def get_lines_per_page():
+    lines_per_page = config['LOGGING']['VIEW_PAGE_LINES']
+    return jsonify({"lines_per_page": lines_per_page})
+
+if __name__ == '__main__':
+    app.run(debug=True, threaded=True)