diff --git a/app/__init__.py b/app/__init__.py index e983345..9356270 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -7,15 +7,18 @@ from app.views import register_views from app.api import register_api from app.config import load_config from app.filters import register_filters +from app.tasks import celery from app.logging_config import init_logger -def create_app(): +def create_app(config=None): app = Flask(__name__) - os.environ['TZ'] = 'UTC' + if config is None: + config = load_config() + app.config.update(config) - config = load_config() + os.environ['TZ'] = 'UTC' app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY'] @@ -23,6 +26,9 @@ def create_app(): for key, value in config.get('BOOTSTRAP', {}).items(): app.config[key.upper()] = value + # Initialize Celery + celery.conf.update(app.config) + bootstrap = Bootstrap5(app) # Store the entire config in Flask app diff --git a/app/api.py b/app/api.py index 4a6c443..a5177c4 100644 --- a/app/api.py +++ b/app/api.py @@ -1,4 +1,3 @@ -# filepath: /home/michaelb/Dokumente/TornActivityTracker/app/api.py from flask import jsonify, request, Response, send_from_directory, current_app import threading import os @@ -10,6 +9,7 @@ from app.models import Scraper from app.util import create_zip, delete_old_zips, tail from app.config import load_config from app.forms import ScrapingForm +from app.tasks import start_scraping_task, stop_scraping_task, get_redis scraping_thread = None scraper = None @@ -18,42 +18,40 @@ scrape_lock = threading.Lock() def register_api(app): @app.route('/start_scraping', methods=['POST']) def start_scraping(): - global scraping_thread, scraper - with scrape_lock: - scraper = current_app.config.get('SCRAPER') - if scraper is not None and scraper.scraping_active: - current_app.logger.warning("Can't start scraping process: scraping already in progress") + form = ScrapingForm() + if form.validate_on_submit(): + redis_client = get_redis() + faction_id = form.faction_id.data + + # Check if scraping is already active + if redis_client.hget(f"scraper:{faction_id}", "scraping_active") == "1": return jsonify({"status": "Scraping already in progress"}) - - form = ScrapingForm() - if form.validate_on_submit(): - faction_id = form.faction_id.data - fetch_interval = form.fetch_interval.data - run_interval = form.run_interval.data - - scraper = Scraper(faction_id, fetch_interval, run_interval, app) - scraper.scraping_active = True - - scraping_thread = threading.Thread(target=scraper.start_scraping, args=(app,)) - scraping_thread.daemon = True - scraping_thread.start() - - current_app.config['SCRAPER'] = scraper - current_app.config['SCRAPING_THREAD'] = scraping_thread - - return jsonify({"status": "Scraping started"}) - return jsonify({"status": "Invalid form data"}) + + # Convert config to a serializable dict with only needed values + config_dict = { + 'DATA': {'DATA_DIR': current_app.config['DATA']['DATA_DIR']}, + 'DEFAULT': {'API_KEY': current_app.config['DEFAULT']['API_KEY']} + } + + start_scraping_task.delay( + faction_id, + int(form.fetch_interval.data), # Ensure this is an int + int(form.run_interval.data), # Ensure this is an int + config_dict + ) + return jsonify({"status": "Scraping started"}) + return jsonify({"status": "Invalid form data"}) @app.route('/stop_scraping', methods=['POST']) def stop_scraping(): - scraper = current_app.config.get('SCRAPER') - if scraper is None or not scraper.scraping_active: - return jsonify({"status": "Scraping is not running"}) + redis_client = get_redis() + faction_id = redis_client.get("current_faction_id") + if not faction_id: + return jsonify({"status": "No active scraping session"}) + + stop_scraping_task.delay(faction_id) + return jsonify({"status": "Stopping scraping"}) - scraper.stop_scraping() - current_app.config['SCRAPING_ACTIVE'] = False - current_app.logger.debug("Scraping stopped by user") - return jsonify({"status": "Scraping stopped"}) @app.route('/logfile', methods=['GET']) def logfile(): log_file_name = current_app.logger.handlers[0].baseFilename @@ -196,20 +194,37 @@ def register_api(app): @app.route('/scraping_status', methods=['GET']) def scraping_status(): - if scraper is None: - current_app.logger.debug("Scraper is not initialized.") + redis_client = get_redis() + current_faction_id = redis_client.get("current_faction_id") + + if not current_faction_id: return jsonify({"scraping_active": False}) - if scraper.scraping_active: - current_app.logger.debug("Scraping is active.") - return jsonify({"scraping_active": True}) - else: - current_app.logger.debug("Scraping is not active.") + scraping_active = redis_client.hget(f"scraper:{current_faction_id}", "scraping_active") + + # If we have a faction_id but scraping is not active, clean up the stale state + if not scraping_active or scraping_active == "0": + redis_client.delete("current_faction_id") return jsonify({"scraping_active": False}) + + return jsonify({ + "scraping_active": True, + "faction_id": current_faction_id + }) @app.route('/scraping_get_end_time') def scraping_get_end_time(): - if scraper is None: - current_app.logger.debug("Scraper is not initialized.") - return jsonify({"scraping_active":False}) - return jsonify({"end_time": scraper.end_time}) \ No newline at end of file + redis_client = get_redis() + current_faction_id = redis_client.get("current_faction_id") + + if not current_faction_id: + return jsonify({"scraping_active": False}) + + end_time = redis_client.hget(f"scraper:{current_faction_id}", "end_time") + if not end_time: + return jsonify({"scraping_active": False}) + + return jsonify({ + "end_time": end_time, + "faction_id": current_faction_id + }) diff --git a/app/filters.py b/app/filters.py index a5e78c6..60ded1f 100644 --- a/app/filters.py +++ b/app/filters.py @@ -4,4 +4,12 @@ from datetime import datetime def register_filters(app): @app.template_filter('datetimeformat') def datetimeformat(value): - return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S') \ No newline at end of file + """Convert datetime or timestamp to formatted string""" + if isinstance(value, datetime): + dt = value + else: + try: + dt = datetime.fromtimestamp(float(value)) + except (ValueError, TypeError): + return str(value) + return dt.strftime('%Y-%m-%d %H:%M:%S') \ No newline at end of file diff --git a/app/models.py b/app/models.py index 0f6e7d7..4c2647a 100644 --- a/app/models.py +++ b/app/models.py @@ -5,21 +5,60 @@ import os import time from datetime import datetime, timedelta from requests.exceptions import ConnectionError, Timeout, RequestException +import redis +import threading from flask import current_app class Scraper: - def __init__(self, faction_id, fetch_interval, run_interval, app): - self.faction_id = faction_id - self.fetch_interval = fetch_interval - self.run_interval = run_interval - self.end_time = datetime.now() + timedelta(days=run_interval) - self.data_file_name = os.path.join(app.config['DATA']['DATA_DIR'], f"{self.faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv") - self.scraping_active = False - self.API_KEY = app.config['DEFAULT']['API_KEY'] - self.logger = app.logger - - print(self.data_file_name) + _instances = {} # Track all instances by faction_id + _lock = threading.Lock() + + def __new__(cls, faction_id, *args, **kwargs): + with cls._lock: + # Stop any existing instance for this faction + if faction_id in cls._instances: + old_instance = cls._instances[faction_id] + old_instance.stop_scraping() + + instance = super().__new__(cls) + cls._instances[faction_id] = instance + return instance + + def __init__(self, faction_id, fetch_interval, run_interval, config): + # Only initialize if not already initialized + if not hasattr(self, 'faction_id'): + self.redis_client = redis.StrictRedis( + host='localhost', port=6379, db=0, decode_responses=True + ) + self.faction_id = faction_id + self.fetch_interval = fetch_interval + self.run_interval = run_interval + self.API_KEY = config['DEFAULT']['API_KEY'] + self.data_file_name = os.path.join( + config['DATA']['DATA_DIR'], + f"{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv" + ) + self.end_time = datetime.now() + timedelta(days=int(run_interval)) + + # Store scraper state in Redis + self.redis_client.hmset(f"scraper:{faction_id}", { + "faction_id": faction_id, + "fetch_interval": fetch_interval, + "run_interval": run_interval, + "end_time": self.end_time.isoformat(), + "data_file_name": self.data_file_name, + "scraping_active": "0", + "api_key": self.API_KEY + }) + + @property + def scraping_active(self): + return bool(int(self.redis_client.hget(f"scraper:{self.faction_id}", "scraping_active"))) + + @scraping_active.setter + def scraping_active(self, value): + self.redis_client.hset(f"scraper:{self.faction_id}", "scraping_active", "1" if value else "0") def fetch_faction_data(self): url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={self.API_KEY}" @@ -48,40 +87,38 @@ class Scraper: time.sleep(2 ** attempt) # Exponential backoff return None - def start_scraping(self, app) -> None: + def start_scraping(self) -> None: """Starts the scraping process until the end time is reached or stopped manually.""" self.scraping_active = True + + current_app.logger.info(f"Starting scraping for faction ID {self.faction_id}") + current_app.logger.debug(f"Fetch interval: {self.fetch_interval}s, Run interval: {self.run_interval} days, End time: {self.end_time}") - # Anwendungskontext explizit setzen - with app.app_context(): - current_app.logger.info(f"Starting scraping for faction ID {self.faction_id}") - current_app.logger.debug(f"Fetch interval: {self.fetch_interval}s, Run interval: {self.run_interval} days, End time: {self.end_time}") + MAX_FAILURES = 5 + failure_count = 0 - MAX_FAILURES = 5 # Stop after 5 consecutive failures - failure_count = 0 + while datetime.now() < self.end_time and self.scraping_active: + current_app.logger.info(f"Fetching data at {datetime.now()}") + faction_data = self.fetch_faction_data() - while datetime.now() < self.end_time and self.scraping_active: - current_app.logger.info(f"Fetching data at {datetime.now()}") - faction_data = self.fetch_faction_data() - - if not faction_data or "members" not in faction_data: - current_app.logger.warning(f"No faction data found for ID {self.faction_id} (Failure {failure_count + 1}/{MAX_FAILURES})") - failure_count += 1 - if failure_count >= MAX_FAILURES: - current_app.logger.error(f"Max failures reached ({MAX_FAILURES}). Stopping scraping.") - break - time.sleep(self.fetch_interval) - continue - - current_app.logger.info(f"Fetched {len(faction_data['members'])} members for faction {self.faction_id}") - failure_count = 0 # Reset failure count on success - user_activity_data = self.process_faction_members(faction_data["members"]) - self.save_data(user_activity_data) - - current_app.logger.info(f"Data appended to {self.data_file_name}") + if not faction_data or "members" not in faction_data: + current_app.logger.warning(f"No faction data found for ID {self.faction_id} (Failure {failure_count + 1}/{MAX_FAILURES})") + failure_count += 1 + if failure_count >= MAX_FAILURES: + current_app.logger.error(f"Max failures reached ({MAX_FAILURES}). Stopping scraping.") + break time.sleep(self.fetch_interval) + continue - self.handle_scraping_end() + current_app.logger.info(f"Fetched {len(faction_data['members'])} members for faction {self.faction_id}") + failure_count = 0 # Reset failure count on success + user_activity_data = self.process_faction_members(faction_data["members"]) + self.save_data(user_activity_data) + + current_app.logger.info(f"Data appended to {self.data_file_name}") + time.sleep(self.fetch_interval) + + self.handle_scraping_end() def process_faction_members(self, members: Dict[str, Dict]) -> List[Dict]: @@ -122,6 +159,18 @@ class Scraper: except Exception as e: current_app.logger.error(f"Error saving data to {self.data_file_name}: {e}") + def cleanup_redis_state(self): + """Clean up all Redis state for this scraper instance""" + if hasattr(self, 'faction_id'): + self.redis_client.delete(f"scraper:{self.faction_id}") + current_id = self.redis_client.get("current_faction_id") + if current_id and current_id == str(self.faction_id): + self.redis_client.delete("current_faction_id") + # Remove from instances tracking + with self._lock: + if self.faction_id in self._instances: + del self._instances[self.faction_id] + def handle_scraping_end(self) -> None: """Handles cleanup and logging when scraping ends.""" if not self.scraping_active: @@ -133,7 +182,13 @@ class Scraper: current_app.logger.info("Scraping completed.") self.scraping_active = False + self.cleanup_redis_state() def stop_scraping(self): self.scraping_active = False - current_app.logger.debug("Scraping stopped by user") \ No newline at end of file + self.cleanup_redis_state() + current_app.logger.debug(f"Scraping stopped for faction {self.faction_id}") + + def __del__(self): + """Ensure Redis cleanup on object destruction""" + self.cleanup_redis_state() \ No newline at end of file diff --git a/app/static/scraper_utils.js b/app/static/scraper_utils.js index 9e0eb09..b7d137d 100644 --- a/app/static/scraper_utils.js +++ b/app/static/scraper_utils.js @@ -12,6 +12,7 @@ export class ScraperUtils { this.serverTime = null; this.endTime = null; + this.pollInterval = null; // Add this line this.init(); } @@ -20,7 +21,6 @@ export class ScraperUtils { this.showLoadingIndicator(); try { - // Ensure each function runs only once await Promise.all([ this.updateServerTime(), this.checkScrapingStatus() @@ -29,31 +29,41 @@ export class ScraperUtils { console.error("Error during initialization:", error); } - // Ensure end time is fetched only if scraping is active - if (this.endTime === null) { - try { - await this.fetchEndTime(); - } catch (error) { - console.error("Error fetching end time:", error); + // Start polling for status updates + this.startPolling(); + + // Only start the clock and wait for end time if scraping is active + if (this.activityIndicator.textContent === 'Active') { + if (!this.endTime) { + try { + await this.fetchEndTime(); + } catch (error) { + console.error("Error fetching end time:", error); + } + } + + if (this.serverTime && this.endTime) { + this.startClock(); } } - - // Ensure UI is only updated once everything is ready - if (this.serverTime && this.endTime) { - this.startClock(); - this.hideLoadingIndicator(); - } else { - console.warn("Delaying hiding the loading indicator due to missing data..."); - const checkDataInterval = setInterval(() => { - if (this.serverTime && this.endTime) { - clearInterval(checkDataInterval); - this.startClock(); - this.hideLoadingIndicator(); - } - }, 500); + + // Hide loading indicator regardless of scraping status + this.hideLoadingIndicator(); + } + + startPolling() { + // Poll every 2 seconds + this.pollInterval = setInterval(async () => { + await this.checkScrapingStatus(); + }, 2000); + } + + stopPolling() { + if (this.pollInterval) { + clearInterval(this.pollInterval); + this.pollInterval = null; } } - showLoadingIndicator() { this.statusContainer.classList.remove('d-none'); @@ -79,9 +89,7 @@ export class ScraperUtils { this.activityIndicator.classList.add('text-bg-success'); this.activityIndicator.textContent = 'Active'; - console.log(`Scraping is active until ${data.end_time} TCT`); - - // Only call fetchEndTime() if endTime is not already set + // Fetch end time if we don't have it yet if (!this.endTime) { await this.fetchEndTime(); } @@ -98,6 +106,9 @@ export class ScraperUtils { this.endTimeElement.classList.add('d-none'); this.timeLeftElement.classList.add('d-none'); + + // Reset end time when inactive + this.endTime = null; } } catch (error) { console.error('Error checking scraping status:', error); @@ -177,4 +188,16 @@ export class ScraperUtils { const seconds = totalSeconds % 60; return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`; } + + // Add cleanup method + cleanup() { + this.stopPolling(); + } } + +// Add event listener for page unload +window.addEventListener('unload', () => { + if (window.scraperUtils) { + window.scraperUtils.cleanup(); + } +}); diff --git a/app/tasks.py b/app/tasks.py new file mode 100644 index 0000000..895c18e --- /dev/null +++ b/app/tasks.py @@ -0,0 +1,93 @@ +from celery import Celery +from app.models import Scraper +import redis +from datetime import timedelta +from flask import current_app + +def create_celery(): + celery = Celery('tasks', broker='redis://localhost:6379/0') + celery.conf.update( + task_serializer='json', + accept_content=['json'], + result_serializer='json', + timezone='UTC' + ) + return celery + +def init_celery(app): + """Initialize Celery with Flask app context""" + celery = create_celery() + celery.conf.update(app.config) + + class ContextTask(celery.Task): + def __call__(self, *args, **kwargs): + with app.app_context(): + return self.run(*args, **kwargs) + + celery.Task = ContextTask + return celery + +celery = create_celery() # This will be initialized properly in app/__init__.py + +def get_redis(): + return redis.StrictRedis( + host='localhost', + port=6379, + db=0, + decode_responses=True + ) + +@celery.task +def start_scraping_task(faction_id, fetch_interval, run_interval, config_dict): + """ + Start scraping task with serializable parameters + Args: + faction_id: ID of the faction to scrape + fetch_interval: Interval between fetches in seconds + run_interval: How long to run the scraper in days + config_dict: Dictionary containing configuration + """ + try: + redis_client = get_redis() + # Set current faction ID at task start + redis_client.set("current_faction_id", str(faction_id)) + + scraper = Scraper( + faction_id=faction_id, + fetch_interval=int(fetch_interval), + run_interval=int(run_interval), + config=config_dict + ) + scraper.start_scraping() + return {"status": "success"} + except Exception as e: + # Clean up Redis state on error + redis_client = get_redis() + redis_client.delete("current_faction_id") + return {"status": "error", "message": str(e)} + +@celery.task +def stop_scraping_task(faction_id): + """Stop scraping task and clean up Redis state""" + try: + redis_client = get_redis() + + # Clean up Redis state + redis_client.hset(f"scraper:{faction_id}", "scraping_active", "0") + redis_client.delete(f"scraper:{faction_id}") + + # Clean up current_faction_id if it matches + current_id = redis_client.get("current_faction_id") + if current_id and current_id == str(faction_id): + redis_client.delete("current_faction_id") + + # Revoke any running tasks for this faction + celery.control.revoke( + celery.current_task.request.id, + terminate=True, + signal='SIGTERM' + ) + + return {"status": "success", "message": f"Stopped scraping for faction {faction_id}"} + except Exception as e: + return {"status": "error", "message": str(e)} diff --git a/app/views.py b/app/views.py index 63b3fbe..e89671c 100644 --- a/app/views.py +++ b/app/views.py @@ -2,6 +2,8 @@ import os import glob from flask import render_template, Blueprint, current_app, request +from app.tasks import get_redis + from app.forms import ScrapingForm from app.util import get_size from app.config import load_config @@ -13,6 +15,14 @@ from datetime import datetime views_bp = Blueprint("views", __name__) +def sizeof_fmt(num, suffix="B"): + """Convert bytes to human readable format""" + for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: + if abs(num) < 1024.0: + return f"{num:3.1f} {unit}{suffix}" + num /= 1024.0 + return f"{num:.1f} Yi{suffix}" + def register_views(app): @app.route('/') def index(): @@ -29,50 +39,60 @@ def register_views(app): @app.route('/download_results') def download_results(): - log_file_name = os.path.abspath(app.config['LOG_FILE_NAME']) - scraper = app.config.get('SCRAPER') - - if scraper: - print(scraper.data_file_name) - if not scraper: - print("Scraper not initialized") + # Get the current active log file and data file from Redis and app config + redis_client = get_redis() + current_faction_id = redis_client.get("current_faction_id") - data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR']) - log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR']) - - data_files = glob.glob(os.path.join(data_dir, "*.csv")) - log_files = glob.glob(os.path.join(log_dir, "*.log")) + active_data_file = None + if current_faction_id: + active_data_file = redis_client.hget(f"scraper:{current_faction_id}", "data_file_name") - def get_file_info(file_path): + active_log_file = app.config['LOG_FILE_NAME'] + + def get_file_info(file_path, file_type='data'): + stats = os.stat(file_path) + name = os.path.basename(file_path) + + # Determine if file is active + is_active = False + if file_type == 'data' and active_data_file: + is_active = os.path.abspath(file_path) == os.path.abspath(active_data_file) + elif file_type == 'log' and active_log_file: + is_active = os.path.basename(file_path) == os.path.basename(active_log_file) + return { - "name": file_path, - "name_display": os.path.basename(file_path), - "last_modified": os.path.getmtime(file_path), - "created": os.path.getctime(file_path), - "size": get_size(file_path) + 'name': file_path, # Full path for internal use + 'name_display': name, # Just filename for display + 'last_modified': stats.st_mtime, # Send timestamp instead of datetime + 'created': stats.st_ctime, # Send timestamp instead of datetime + 'size': sizeof_fmt(stats.st_size), + 'active': is_active } - - data_files_info = [get_file_info(file) for file in data_files] - log_files_info = [get_file_info(file) for file in log_files] - - if scraper and scraper.scraping_active: - for data_file in data_files_info: - if os.path.abspath(scraper.data_file_name) == data_file['name']: - data_file['active'] = True - else: - data_file['active'] = False - - for log_file in log_files_info: - if log_file_name == os.path.abspath(log_file['name']): - log_file['active'] = True - else: - log_file['active'] = False - - data_files_info.sort(key=lambda x: x['last_modified'], reverse=True) - log_files_info.sort(key=lambda x: x['last_modified'], reverse=True) - - files = {"data": data_files_info, "log": log_files_info} - + + data_files = [] + log_files = [] + + # Get data files + data_dir = os.path.abspath(app.config['DATA']['DATA_DIR']) + if os.path.exists(data_dir): + for file in glob.glob(os.path.join(data_dir, "*.csv")): + data_files.append(get_file_info(file, 'data')) + + # Get log files + log_dir = os.path.abspath(app.config['LOGGING']['LOG_DIR']) + if os.path.exists(log_dir): + for file in glob.glob(os.path.join(log_dir, "*.log")): + log_files.append(get_file_info(file, 'log')) + + # Sort files by modification time, newest first + data_files.sort(key=lambda x: x['last_modified'], reverse=True) + log_files.sort(key=lambda x: x['last_modified'], reverse=True) + + files = { + 'data': data_files, + 'log': log_files + } + return render_template('download_results.html', files=files) views_bp = Blueprint("views", __name__) @@ -120,7 +140,7 @@ def register_views(app): @views_bp.route('/server_time') def server_time(): - current_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + current_time = datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S') return {'server_time': current_time} app.register_blueprint(views_bp) diff --git a/requirements.txt b/requirements.txt index 065ab47..210deee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -78,6 +78,7 @@ python-dateutil==2.9.0.post0 # pandas pytz==2025.1 # via pandas +redis==5.0.1 requests==2.32.3 # via -r requirements.in seaborn==0.13.2 @@ -94,3 +95,4 @@ wtforms==3.2.1 # via # bootstrap-flask # flask-wtf +celery==5.3.6 diff --git a/run.py b/run.py index 28fb247..523d51a 100644 --- a/run.py +++ b/run.py @@ -1,5 +1,6 @@ from app import create_app +app = create_app() + if __name__ == '__main__': - app = create_app() - app.run(debug=True, threaded=True) \ No newline at end of file + app.run(debug=True) \ No newline at end of file diff --git a/stop_scraping.py b/stop_scraping.py new file mode 100644 index 0000000..13192bb --- /dev/null +++ b/stop_scraping.py @@ -0,0 +1,50 @@ +import redis +import argparse + +def get_redis(): + return redis.StrictRedis( + host='localhost', + port=6379, + db=0, + decode_responses=True + ) + +def stop_scraping(flush=False, force=False): + redis_client = get_redis() + + if flush: + redis_client.flushall() + print("Flushed all Redis data") + return True + + current_faction_id = redis_client.get("current_faction_id") + + if not current_faction_id: + print("No active scraping session found.") + return False if not force else True + + redis_client.hset(f"scraper:{current_faction_id}", "scraping_active", "0") + print(f"Sent stop signal to scraping process for faction {current_faction_id}") + return True + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Stop the Torn Activity Tracker scraping process.') + parser.add_argument('--force', action='store_true', help='Force stop even if no active session is found') + parser.add_argument('--flush', action='store_true', help='Flush all Redis data (WARNING: This will clear ALL Redis data)') + + args = parser.parse_args() + + if args.flush: + if input("WARNING: This will delete ALL Redis data. Are you sure? (y/N) ").lower() != 'y': + print("Operation cancelled.") + exit(0) + + success = stop_scraping(flush=args.flush, force=args.force) + + if not success and args.force: + print("Forcing stop for all potential scraping processes...") + redis_client = get_redis() + # Get all scraper keys + for key in redis_client.keys("scraper:*"): + redis_client.hset(key, "scraping_active", "0") + print("Sent stop signal to all potential scraping processes.")