adds docker & fly.toml

2025-02-20 14:26:39 +01:00
12 changed files with 586 additions and 435 deletions
--- a/README.md
+++ b/README.md
@@ -1,22 +1,18 @@
-# Torn User Activity Tracker
+# Torn User Activity Scraper
 > [!WARNING]  
 > **Development is still in its early stages; do not put it to productive use!**
 ## Features
 - Start and stop scraping user activity data
 - View real-time logs
 - Download data and log files
- View scraping results
+- View scraping results and statistics
 - Plugin based analysis system
 - Toggle between light and dark mode
 **Note:** Many features are not fully implemented yet, but the activity tracker/grabber works as intended.
 ## Planned Features
- Additional analyses plugins
+- Additional analyses
 - Selector for Torn API data to choose which data shall be tracked
 - Improved / fixed log viewer
--- a/app/init.py
+++ b/app/init.py
@@ -7,28 +7,22 @@ from app.views import register_views
 from app.api import register_api
 from app.config import load_config
 from app.filters import register_filters
 from app.tasks import celery
 from app.logging_config import init_logger
-def create_app(config=None):
+def create_app():
    app = Flask(__name__)
    if config is None:
        config = load_config()
    app.config.update(config)
    os.environ['TZ'] = 'UTC'
    config = load_config()
    app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
    # Move bootstrap settings to root level
    for key, value in config.get('BOOTSTRAP', {}).items():
        app.config[key.upper()] = value
    # Initialize Celery
    celery.conf.update(app.config)
    bootstrap = Bootstrap5(app)
    # Store the entire config in Flask app
--- a/app/api.py
+++ b/app/api.py
@@ -1,3 +1,4 @@
 # filepath: /home/michaelb/Dokumente/TornActivityTracker/app/api.py
 from flask import jsonify, request, Response, send_from_directory, current_app
 import threading
 import os
@@ -9,7 +10,6 @@ from app.models import Scraper
 from app.util import create_zip, delete_old_zips, tail
 from app.config import load_config
 from app.forms import ScrapingForm
 from app.tasks import start_scraping_task, stop_scraping_task, get_redis
 scraping_thread = None
 scraper = None
@@ -18,40 +18,42 @@ scrape_lock = threading.Lock()
 def register_api(app):
    @app.route('/start_scraping', methods=['POST'])
    def start_scraping():
-        form = ScrapingForm()
+        global scraping_thread, scraper
-        if form.validate_on_submit():
+        with scrape_lock:
-            redis_client = get_redis()
+            scraper = current_app.config.get('SCRAPER')
-            faction_id = form.faction_id.data
+            if scraper is not None and scraper.scraping_active:
-            
+                current_app.logger.warning("Can't start scraping process: scraping already in progress")
            # Check if scraping is already active
            if redis_client.hget(f"scraper:{faction_id}", "scraping_active") == "1":
                return jsonify({"status": "Scraping already in progress"})
-            # Convert config to a serializable dict with only needed values
+            form = ScrapingForm()
-            config_dict = {
+            if form.validate_on_submit():
-                'DATA': {'DATA_DIR': current_app.config['DATA']['DATA_DIR']},
+                faction_id = form.faction_id.data
-                'DEFAULT': {'API_KEY': current_app.config['DEFAULT']['API_KEY']}
+                fetch_interval = form.fetch_interval.data
-            }
+                run_interval = form.run_interval.data
                scraper = Scraper(faction_id, fetch_interval, run_interval, app)
                scraper.scraping_active = True
                scraping_thread = threading.Thread(target=scraper.start_scraping, args=(app,))
                scraping_thread.daemon = True
                scraping_thread.start()
                current_app.config['SCRAPER'] = scraper
                current_app.config['SCRAPING_THREAD'] = scraping_thread
            start_scraping_task.delay(
                faction_id,
                int(form.fetch_interval.data),  # Ensure this is an int
                int(form.run_interval.data),    # Ensure this is an int
                config_dict
            )
                return jsonify({"status": "Scraping started"})
            return jsonify({"status": "Invalid form data"})
    @app.route('/stop_scraping', methods=['POST'])
    def stop_scraping():
-        redis_client = get_redis()
+        scraper = current_app.config.get('SCRAPER')
-        faction_id = redis_client.get("current_faction_id")
+        if scraper is None or not scraper.scraping_active:
-        if not faction_id:
+            return jsonify({"status": "Scraping is not running"})
            return jsonify({"status": "No active scraping session"})
        stop_scraping_task.delay(faction_id)
        return jsonify({"status": "Stopping scraping"})
        scraper.stop_scraping()
        current_app.config['SCRAPING_ACTIVE'] = False
        current_app.logger.debug("Scraping stopped by user")
        return jsonify({"status": "Scraping stopped"})    
    @app.route('/logfile', methods=['GET'])
    def logfile():
        log_file_name = current_app.logger.handlers[0].baseFilename
@@ -194,37 +196,20 @@ def register_api(app):
    @app.route('/scraping_status', methods=['GET'])
    def scraping_status():
-        redis_client = get_redis()
+        if scraper is None:
-        current_faction_id = redis_client.get("current_faction_id")
+            current_app.logger.debug("Scraper is not initialized.")
        if not current_faction_id:
            return jsonify({"scraping_active": False})
-        scraping_active = redis_client.hget(f"scraper:{current_faction_id}", "scraping_active")
+        if scraper.scraping_active:
-        
+            current_app.logger.debug("Scraping is active.")
-        # If we have a faction_id but scraping is not active, clean up the stale state
+            return jsonify({"scraping_active": True})
-        if not scraping_active or scraping_active == "0":
+        else:
-            redis_client.delete("current_faction_id")
+            current_app.logger.debug("Scraping is not active.")
            return jsonify({"scraping_active": False})
        return jsonify({
            "scraping_active": True,
            "faction_id": current_faction_id
        })
    @app.route('/scraping_get_end_time')
    def scraping_get_end_time():
-        redis_client = get_redis()
+        if scraper is None:
-        current_faction_id = redis_client.get("current_faction_id")
+            current_app.logger.debug("Scraper is not initialized.")
-        
+            return jsonify({"scraping_active":False})
-        if not current_faction_id:
+        return jsonify({"end_time": scraper.end_time})
            return jsonify({"scraping_active": False})
        end_time = redis_client.hget(f"scraper:{current_faction_id}", "end_time")
        if not end_time:
            return jsonify({"scraping_active": False})
        return jsonify({
            "end_time": end_time,
            "faction_id": current_faction_id
        })
--- a/app/filters.py
+++ b/app/filters.py
@@ -4,12 +4,4 @@ from datetime import datetime
 def register_filters(app):
    @app.template_filter('datetimeformat')
    def datetimeformat(value):
-        """Convert datetime or timestamp to formatted string"""
+        return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
        if isinstance(value, datetime):
            dt = value
        else:
            try:
                dt = datetime.fromtimestamp(float(value))
            except (ValueError, TypeError):
                return str(value)
        return dt.strftime('%Y-%m-%d %H:%M:%S')
--- a/app/models.py
+++ b/app/models.py
@@ -5,60 +5,21 @@ import os
 import time
 from datetime import datetime, timedelta
 from requests.exceptions import ConnectionError, Timeout, RequestException
 import redis
 import threading
 from flask import current_app
 class Scraper:
-    _instances = {}  # Track all instances by faction_id
+    def __init__(self, faction_id, fetch_interval, run_interval, app):
    _lock = threading.Lock()
    def __new__(cls, faction_id, *args, **kwargs):
        with cls._lock:
            # Stop any existing instance for this faction
            if faction_id in cls._instances:
                old_instance = cls._instances[faction_id]
                old_instance.stop_scraping()
            instance = super().__new__(cls)
            cls._instances[faction_id] = instance
            return instance
    def __init__(self, faction_id, fetch_interval, run_interval, config):
        # Only initialize if not already initialized
        if not hasattr(self, 'faction_id'):
            self.redis_client = redis.StrictRedis(
                host='localhost', port=6379, db=0, decode_responses=True
            )
        self.faction_id = faction_id
        self.fetch_interval = fetch_interval
        self.run_interval = run_interval
-            self.API_KEY = config['DEFAULT']['API_KEY']
+        self.end_time = datetime.now() + timedelta(days=run_interval)
-            self.data_file_name = os.path.join(
+        self.data_file_name = os.path.join(app.config['DATA']['DATA_DIR'], f"{self.faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv")
-                config['DATA']['DATA_DIR'],
+        self.scraping_active = False
-                f"{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
+        self.API_KEY = app.config['DEFAULT']['API_KEY']
-            )
+        self.logger = app.logger
            self.end_time = datetime.now() + timedelta(days=int(run_interval))
-            # Store scraper state in Redis
+        print(self.data_file_name)
            self.redis_client.hmset(f"scraper:{faction_id}", {
                "faction_id": faction_id,
                "fetch_interval": fetch_interval,
                "run_interval": run_interval,
                "end_time": self.end_time.isoformat(),
                "data_file_name": self.data_file_name,
                "scraping_active": "0",
                "api_key": self.API_KEY
            })
    @property
    def scraping_active(self):
        return bool(int(self.redis_client.hget(f"scraper:{self.faction_id}", "scraping_active")))
    @scraping_active.setter 
    def scraping_active(self, value):
        self.redis_client.hset(f"scraper:{self.faction_id}", "scraping_active", "1" if value else "0")
    def fetch_faction_data(self):
        url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={self.API_KEY}"
@@ -87,14 +48,16 @@ class Scraper:
                time.sleep(2 ** attempt)  # Exponential backoff
        return None
-    def start_scraping(self) -> None:
+    def start_scraping(self, app) -> None:
        """Starts the scraping process until the end time is reached or stopped manually."""
        self.scraping_active = True
        # Anwendungskontext explizit setzen
        with app.app_context():
            current_app.logger.info(f"Starting scraping for faction ID {self.faction_id}")
            current_app.logger.debug(f"Fetch interval: {self.fetch_interval}s, Run interval: {self.run_interval} days, End time: {self.end_time}")
-        MAX_FAILURES = 5
+            MAX_FAILURES = 5  # Stop after 5 consecutive failures
            failure_count = 0
            while datetime.now() < self.end_time and self.scraping_active:
@@ -159,18 +122,6 @@ class Scraper:
        except Exception as e:
            current_app.logger.error(f"Error saving data to {self.data_file_name}: {e}")
    def cleanup_redis_state(self):
        """Clean up all Redis state for this scraper instance"""
        if hasattr(self, 'faction_id'):
            self.redis_client.delete(f"scraper:{self.faction_id}")
            current_id = self.redis_client.get("current_faction_id")
            if current_id and current_id == str(self.faction_id):
                self.redis_client.delete("current_faction_id")
            # Remove from instances tracking
            with self._lock:
                if self.faction_id in self._instances:
                    del self._instances[self.faction_id]
    def handle_scraping_end(self) -> None:
        """Handles cleanup and logging when scraping ends."""
        if not self.scraping_active:
@@ -182,13 +133,7 @@ class Scraper:
        current_app.logger.info("Scraping completed.")
        self.scraping_active = False
        self.cleanup_redis_state()
    def stop_scraping(self):
        self.scraping_active = False
-        self.cleanup_redis_state()
+        current_app.logger.debug("Scraping stopped by user")
        current_app.logger.debug(f"Scraping stopped for faction {self.faction_id}")
    def __del__(self):
        """Ensure Redis cleanup on object destruction"""
        self.cleanup_redis_state()
--- a/app/static/scraper_utils.js
+++ b/app/static/scraper_utils.js
@@ -12,7 +12,6 @@ export class ScraperUtils {
        this.serverTime = null;
        this.endTime = null;
        this.pollInterval = null;  // Add this line
        this.init();
    }
@@ -21,6 +20,7 @@ export class ScraperUtils {
        this.showLoadingIndicator();
        try {
            // Ensure each function runs only once
            await Promise.all([
                this.updateServerTime(), 
                this.checkScrapingStatus()
@@ -29,12 +29,8 @@ export class ScraperUtils {
            console.error("Error during initialization:", error);
        }
-        // Start polling for status updates
+        // Ensure end time is fetched only if scraping is active
-        this.startPolling();
+        if (this.endTime === null) {
        // Only start the clock and wait for end time if scraping is active
        if (this.activityIndicator.textContent === 'Active') {
            if (!this.endTime) {
            try {
                await this.fetchEndTime();
            } catch (error) {
@@ -42,28 +38,22 @@ export class ScraperUtils {
            }
        }
        // Ensure UI is only updated once everything is ready
        if (this.serverTime && this.endTime) {
            this.startClock();
-            }
+            this.hideLoadingIndicator();
-        }
+        } else {
-        
+            console.warn("Delaying hiding the loading indicator due to missing data...");
-        // Hide loading indicator regardless of scraping status
+            const checkDataInterval = setInterval(() => {
                if (this.serverTime && this.endTime) {
                    clearInterval(checkDataInterval);
                    this.startClock();
                    this.hideLoadingIndicator();
                }
-
+            }, 500);
-    startPolling() {
+        }
        // Poll every 2 seconds
        this.pollInterval = setInterval(async () => {
            await this.checkScrapingStatus();
        }, 2000);
    }
    stopPolling() {
        if (this.pollInterval) {
            clearInterval(this.pollInterval);
            this.pollInterval = null;
        }
    }
    showLoadingIndicator() {
        this.statusContainer.classList.remove('d-none');
@@ -89,7 +79,9 @@ export class ScraperUtils {
                this.activityIndicator.classList.add('text-bg-success');
                this.activityIndicator.textContent = 'Active';
-                // Fetch end time if we don't have it yet
+                console.log(`Scraping is active until ${data.end_time} TCT`);
                // Only call fetchEndTime() if endTime is not already set
                if (!this.endTime) {
                    await this.fetchEndTime();
                }
@@ -106,9 +98,6 @@ export class ScraperUtils {
                this.endTimeElement.classList.add('d-none');
                this.timeLeftElement.classList.add('d-none');
                // Reset end time when inactive
                this.endTime = null;
            }
        } catch (error) {
            console.error('Error checking scraping status:', error);
@@ -188,16 +177,4 @@ export class ScraperUtils {
        const seconds = totalSeconds % 60;
        return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
    }
    // Add cleanup method
    cleanup() {
        this.stopPolling();
    }
 }
 // Add event listener for page unload
 window.addEventListener('unload', () => {
    if (window.scraperUtils) {
        window.scraperUtils.cleanup();
    }
 });
--- a/app/tasks.py
+++ b/app/tasks.py
@@ -1,93 +0,0 @@
 from celery import Celery
 from app.models import Scraper
 import redis
 from datetime import timedelta
 from flask import current_app
 def create_celery():
    celery = Celery('tasks', broker='redis://localhost:6379/0')
    celery.conf.update(
        task_serializer='json',
        accept_content=['json'],
        result_serializer='json',
        timezone='UTC'
    )
    return celery
 def init_celery(app):
    """Initialize Celery with Flask app context"""
    celery = create_celery()
    celery.conf.update(app.config)
    class ContextTask(celery.Task):
        def __call__(self, *args, **kwargs):
            with app.app_context():
                return self.run(*args, **kwargs)
    celery.Task = ContextTask
    return celery
 celery = create_celery()  # This will be initialized properly in app/__init__.py
 def get_redis():
    return redis.StrictRedis(
        host='localhost',
        port=6379,
        db=0,
        decode_responses=True
    )
@celery.task
 def start_scraping_task(faction_id, fetch_interval, run_interval, config_dict):
    """
    Start scraping task with serializable parameters
    Args:
        faction_id: ID of the faction to scrape
        fetch_interval: Interval between fetches in seconds
        run_interval: How long to run the scraper in days
        config_dict: Dictionary containing configuration
    """
    try:
        redis_client = get_redis()
        # Set current faction ID at task start
        redis_client.set("current_faction_id", str(faction_id))
        scraper = Scraper(
            faction_id=faction_id,
            fetch_interval=int(fetch_interval),
            run_interval=int(run_interval),
            config=config_dict
        )
        scraper.start_scraping()
        return {"status": "success"}
    except Exception as e:
        # Clean up Redis state on error
        redis_client = get_redis()
        redis_client.delete("current_faction_id")
        return {"status": "error", "message": str(e)}
@celery.task
 def stop_scraping_task(faction_id):
    """Stop scraping task and clean up Redis state"""
    try:
        redis_client = get_redis()
        # Clean up Redis state
        redis_client.hset(f"scraper:{faction_id}", "scraping_active", "0")
        redis_client.delete(f"scraper:{faction_id}")
        # Clean up current_faction_id if it matches
        current_id = redis_client.get("current_faction_id")
        if current_id and current_id == str(faction_id):
            redis_client.delete("current_faction_id")
        # Revoke any running tasks for this faction
        celery.control.revoke(
            celery.current_task.request.id,
            terminate=True,
            signal='SIGTERM'
        )
        return {"status": "success", "message": f"Stopped scraping for faction {faction_id}"}
    except Exception as e:
        return {"status": "error", "message": str(e)}
--- a/app/views.py
+++ b/app/views.py
@@ -2,8 +2,6 @@ import os
 import glob
 from flask import render_template, Blueprint, current_app, request
 from app.tasks import get_redis
 from app.forms import ScrapingForm
 from app.util import get_size
 from app.config import load_config
@@ -15,14 +13,6 @@ from datetime import datetime
 views_bp = Blueprint("views", __name__)
 def sizeof_fmt(num, suffix="B"):
    """Convert bytes to human readable format"""
    for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
        if abs(num) < 1024.0:
            return f"{num:3.1f} {unit}{suffix}"
        num /= 1024.0
    return f"{num:.1f} Yi{suffix}"
 def register_views(app):
    @app.route('/')
    def index():
@@ -39,59 +29,49 @@ def register_views(app):
    @app.route('/download_results')
    def download_results():
-        # Get the current active log file and data file from Redis and app config
+        log_file_name = os.path.abspath(app.config['LOG_FILE_NAME'])
-        redis_client = get_redis()
+        scraper = app.config.get('SCRAPER')
        current_faction_id = redis_client.get("current_faction_id")
-        active_data_file = None
+        if scraper:
-        if current_faction_id:
+            print(scraper.data_file_name)
-            active_data_file = redis_client.hget(f"scraper:{current_faction_id}", "data_file_name")
+        if not scraper:
            print("Scraper not initialized")
-        active_log_file = app.config['LOG_FILE_NAME']
+        data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
        log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
-        def get_file_info(file_path, file_type='data'):
+        data_files = glob.glob(os.path.join(data_dir, "*.csv"))
-            stats = os.stat(file_path)
+        log_files = glob.glob(os.path.join(log_dir, "*.log"))
            name = os.path.basename(file_path)
            # Determine if file is active
            is_active = False
            if file_type == 'data' and active_data_file:
                is_active = os.path.abspath(file_path) == os.path.abspath(active_data_file)
            elif file_type == 'log' and active_log_file:
                is_active = os.path.basename(file_path) == os.path.basename(active_log_file)
        def get_file_info(file_path):
            return {
-                'name': file_path,  # Full path for internal use
+                "name": file_path,
-                'name_display': name,  # Just filename for display
+                "name_display": os.path.basename(file_path),
-                'last_modified': stats.st_mtime,  # Send timestamp instead of datetime
+                "last_modified": os.path.getmtime(file_path),
-                'created': stats.st_ctime,        # Send timestamp instead of datetime
+                "created": os.path.getctime(file_path),
-                'size': sizeof_fmt(stats.st_size),
+                "size": get_size(file_path)
                'active': is_active
            }
-        data_files = []
+        data_files_info = [get_file_info(file) for file in data_files]
-        log_files = []
+        log_files_info = [get_file_info(file) for file in log_files]
-        # Get data files
+        if scraper and scraper.scraping_active:
-        data_dir = os.path.abspath(app.config['DATA']['DATA_DIR'])
+            for data_file in data_files_info:
-        if os.path.exists(data_dir):
+                if os.path.abspath(scraper.data_file_name) == data_file['name']:
-            for file in glob.glob(os.path.join(data_dir, "*.csv")):
+                    data_file['active'] = True
-                data_files.append(get_file_info(file, 'data'))
+                else:
                    data_file['active'] = False
-        # Get log files
+        for log_file in log_files_info:
-        log_dir = os.path.abspath(app.config['LOGGING']['LOG_DIR'])
+            if log_file_name == os.path.abspath(log_file['name']):
-        if os.path.exists(log_dir):
+                log_file['active'] = True
-            for file in glob.glob(os.path.join(log_dir, "*.log")):
+            else:
-                log_files.append(get_file_info(file, 'log'))
+                log_file['active'] = False
-        # Sort files by modification time, newest first
+        data_files_info.sort(key=lambda x: x['last_modified'], reverse=True)
-        data_files.sort(key=lambda x: x['last_modified'], reverse=True)
+        log_files_info.sort(key=lambda x: x['last_modified'], reverse=True)
        log_files.sort(key=lambda x: x['last_modified'], reverse=True)
-        files = {
+        files = {"data": data_files_info, "log": log_files_info}
            'data': data_files,
            'log': log_files
        }
        return render_template('download_results.html', files=files)
@@ -140,7 +120,7 @@ def register_views(app):
    @views_bp.route('/server_time')
    def server_time():
-        current_time = datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
+        current_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
        return {'server_time': current_time}
    app.register_blueprint(views_bp)
--- a/requirements.txt
+++ b/requirements.txt
@@ -78,7 +78,6 @@ python-dateutil==2.9.0.post0
    #   pandas
 pytz==2025.1
    # via pandas
 redis==5.0.1
 requests==2.32.3
    # via -r requirements.in
 seaborn==0.13.2
@@ -95,4 +94,3 @@ wtforms==3.2.1
    # via
    #   bootstrap-flask
    #   flask-wtf
 celery==5.3.6
--- a/run.py
+++ b/run.py
@@ -1,6 +1,5 @@
 from app import create_app
 app = create_app()
 if __name__ == '__main__':
-    app.run(debug=True)
+    app = create_app()
    app.run(debug=True, threaded=True)
--- a/stop_scraping.py
+++ b/stop_scraping.py
@@ -1,50 +0,0 @@
 import redis
 import argparse
 def get_redis():
    return redis.StrictRedis(
        host='localhost',
        port=6379,
        db=0,
        decode_responses=True
    )
 def stop_scraping(flush=False, force=False):
    redis_client = get_redis()
    if flush:
        redis_client.flushall()
        print("Flushed all Redis data")
        return True
    current_faction_id = redis_client.get("current_faction_id")
    if not current_faction_id:
        print("No active scraping session found.")
        return False if not force else True
    redis_client.hset(f"scraper:{current_faction_id}", "scraping_active", "0")
    print(f"Sent stop signal to scraping process for faction {current_faction_id}")
    return True
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Stop the Torn Activity Tracker scraping process.')
    parser.add_argument('--force', action='store_true', help='Force stop even if no active session is found')
    parser.add_argument('--flush', action='store_true', help='Flush all Redis data (WARNING: This will clear ALL Redis data)')
    args = parser.parse_args()
    if args.flush:
        if input("WARNING: This will delete ALL Redis data. Are you sure? (y/N) ").lower() != 'y':
            print("Operation cancelled.")
            exit(0)
    success = stop_scraping(flush=args.flush, force=args.force)
    if not success and args.force:
        print("Forcing stop for all potential scraping processes...")
        redis_client = get_redis()
        # Get all scraper keys
        for key in redis_client.keys("scraper:*"):
            redis_client.hset(key, "scraping_active", "0")
        print("Sent stop signal to all potential scraping processes.")
--- a/tests/analyses.ipynb
+++ b/tests/analyses.ipynb