18 Commits

Author SHA1 Message Date
5994d8ae7b refactors to use redis & celery 2025-02-20 19:56:37 +01:00
f68ada7204 adds docker & fly.toml 2025-02-20 14:32:09 +01:00
2217bd5855 README.md aktualisiert 2025-02-12 13:25:17 +01:00
fa5d59b069 tests/analyses.ipynb gelöscht 2025-02-12 13:15:32 +01:00
11e6348a8e updates 2025-02-11 02:49:55 +01:00
c078017b5f fucked up the config file 2025-02-11 02:18:49 +01:00
f3da58e202 adds template config variables (like app title) 2025-02-11 02:16:02 +01:00
57e969a647 adds activity indicators in header (ugly af) 2025-02-11 01:58:06 +01:00
0340dea4f8 adds status and server time indicators. fixes checkboxes 2025-02-10 18:22:06 +01:00
2b6aebdab4 moves app initialization to correct file 2025-02-10 17:45:24 +01:00
a6292d2d0f adds ending time to activity indicator 2025-02-10 17:33:37 +01:00
a44c2bfc04 seperates index and log viewer 2025-02-10 16:52:29 +01:00
33621bdec4 refactors logging and config 2025-02-10 16:34:11 +01:00
d1f562ce94 removed state file as current state will be stored in scraper class 2025-02-10 14:12:23 +01:00
5e00df4e13 Merge pull request 'feature/analysis-form' (#10) from feature/analysis-form into master
Reviewed-on: #10
2025-02-10 03:11:57 +01:00
293d3e26a6 Merge pull request 'corrects button display in download_results' (#9) from develop into master
Reviewed-on: #9
2025-02-10 03:11:34 +01:00
ae5310d764 corrects button display in download_results 2025-02-09 18:32:43 +01:00
487d59512a Merge pull request 'adds correct license' (#7) from develop into master
Reviewed-on: #7
2025-02-09 16:07:20 +01:00
39 changed files with 956 additions and 815 deletions

5
.dockerignore Normal file
View File

@@ -0,0 +1,5 @@
fly.toml
.git/
__pycache__/
.envrc
.venv/

15
Dockerfile Normal file
View File

@@ -0,0 +1,15 @@
FROM python:3.13.1 AS builder
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
WORKDIR /app
RUN python -m venv .venv
COPY requirements.txt ./
RUN .venv/bin/pip install -r requirements.txt
FROM python:3.13.1-slim
WORKDIR /app
COPY --from=builder /app/.venv .venv/
COPY . .
CMD ["/app/.venv/bin/flask", "run", "--host=0.0.0.0", "--port=8080"]

View File

@@ -1,18 +1,22 @@
# Torn User Activity Scraper # Torn User Activity Tracker
> [!WARNING]
> **Development is still in its early stages; do not put it to productive use!**
## Features ## Features
- Start and stop scraping user activity data - Start and stop scraping user activity data
- View real-time logs - View real-time logs
- Download data and log files - Download data and log files
- View scraping results and statistics - View scraping results
- Plugin based analysis system
- Toggle between light and dark mode - Toggle between light and dark mode
**Note:** Many features are not fully implemented yet, but the activity tracker/grabber works as intended. **Note:** Many features are not fully implemented yet, but the activity tracker/grabber works as intended.
## Planned Features ## Planned Features
- Additional analyses - Additional analyses plugins
- Selector for Torn API data to choose which data shall be tracked - Selector for Torn API data to choose which data shall be tracked
- Improved / fixed log viewer - Improved / fixed log viewer

View File

@@ -0,0 +1,56 @@
import os
from flask import Flask
from flask_bootstrap import Bootstrap5
from datetime import datetime
from app.views import register_views
from app.api import register_api
from app.config import load_config
from app.filters import register_filters
from app.tasks import celery
from app.logging_config import init_logger
def create_app(config=None):
app = Flask(__name__)
if config is None:
config = load_config()
app.config.update(config)
os.environ['TZ'] = 'UTC'
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
# Move bootstrap settings to root level
for key, value in config.get('BOOTSTRAP', {}).items():
app.config[key.upper()] = value
# Initialize Celery
celery.conf.update(app.config)
bootstrap = Bootstrap5(app)
# Store the entire config in Flask app
app.config.update(config)
# Initialize other settings
app.config['SCRAPING_ACTIVE'] = False
app.config['SCRAPING_THREAD'] = None
app.config['DATA_FILE_NAME'] = None
app.config['LOG_FILE_NAME'] = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
# Initialize logging
app.logger = init_logger(app.config)
# Register routes
register_views(app)
register_api(app)
register_filters(app)
@app.context_processor
def inject_main_config():
main_config = app.config.get('MAIN', {})
return dict(main_config=main_config)
return app

View File

@@ -2,7 +2,6 @@ import os
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns import seaborn as sns
from flask import url_for
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from .base import BaseAnalysis from .base import BaseAnalysis

View File

@@ -1,7 +1,6 @@
import os import os
import pandas as pd import pandas as pd
import plotly.graph_objects as go import plotly.graph_objects as go
from flask import url_for
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from .base import BaseAnalysis from .base import BaseAnalysis

View File

@@ -4,13 +4,9 @@ import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis from .basePlotAnalysis import BasePlotAnalysis
from flask import current_app, url_for from flask import current_app, url_for
from app.logging_config import get_logger
import matplotlib import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
logger = get_logger()
class PlotTopActiveUsers(BasePlotAnalysis): class PlotTopActiveUsers(BasePlotAnalysis):
""" """
Class for analyzing the most active users and generating a bar chart. Class for analyzing the most active users and generating a bar chart.

View File

@@ -3,7 +3,6 @@ import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis from .basePlotAnalysis import BasePlotAnalysis
from flask import current_app, url_for
import matplotlib import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')

View File

@@ -4,13 +4,9 @@ import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis from .basePlotAnalysis import BasePlotAnalysis
from flask import current_app, url_for from flask import current_app, url_for
from app.logging_config import get_logger
import matplotlib import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
logger = get_logger()
class PlotLineActivityAllUsers(BasePlotAnalysis): class PlotLineActivityAllUsers(BasePlotAnalysis):
""" """
Class for analyzing user activity trends over multiple days and generating a line graph. Class for analyzing user activity trends over multiple days and generating a line graph.

View File

@@ -5,10 +5,6 @@ import plotly.graph_objects as go
from .basePlotlyAnalysis import BasePlotlyAnalysis from .basePlotlyAnalysis import BasePlotlyAnalysis
from flask import current_app, url_for from flask import current_app, url_for
from app.logging_config import get_logger
logger = get_logger()
class PlotlyActivityHeatmap(BasePlotlyAnalysis): class PlotlyActivityHeatmap(BasePlotlyAnalysis):
""" """
Class for analyzing user activity trends over multiple days and generating an interactive heatmap. Class for analyzing user activity trends over multiple days and generating an interactive heatmap.

View File

@@ -4,10 +4,6 @@ from plotly.subplots import make_subplots
from .basePlotlyAnalysis import BasePlotlyAnalysis from .basePlotlyAnalysis import BasePlotlyAnalysis
from flask import current_app, url_for from flask import current_app, url_for
from app.logging_config import get_logger
logger = get_logger()
class PlotlyLineActivityAllUsers(BasePlotlyAnalysis): class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
""" """
Class for analyzing user activity trends over multiple days and generating an interactive line graph. Class for analyzing user activity trends over multiple days and generating an interactive line graph.

View File

@@ -1,4 +1,3 @@
# filepath: /home/michaelb/Dokumente/TornActivityTracker/app/api.py
from flask import jsonify, request, Response, send_from_directory, current_app from flask import jsonify, request, Response, send_from_directory, current_app
import threading import threading
import os import os
@@ -7,14 +6,10 @@ from datetime import datetime
import pandas as pd import pandas as pd
from app.models import Scraper from app.models import Scraper
from app.util import create_zip, delete_old_zips, tail, get_size from app.util import create_zip, delete_old_zips, tail
from app.config import load_config from app.config import load_config
from app.logging_config import get_logger
from app.forms import ScrapingForm from app.forms import ScrapingForm
from app.tasks import start_scraping_task, stop_scraping_task, get_redis
config = load_config()
logger = get_logger()
log_file_name = logger.handlers[0].baseFilename
scraping_thread = None scraping_thread = None
scraper = None scraper = None
@@ -23,52 +18,53 @@ scrape_lock = threading.Lock()
def register_api(app): def register_api(app):
@app.route('/start_scraping', methods=['POST']) @app.route('/start_scraping', methods=['POST'])
def start_scraping(): def start_scraping():
with scrape_lock: form = ScrapingForm()
scraper = current_app.config.get('SCRAPER') if form.validate_on_submit():
if scraper is not None and scraper.scraping_active: redis_client = get_redis()
logger.warning("Can't start scraping process: scraping already in progress") faction_id = form.faction_id.data
# Check if scraping is already active
if redis_client.hget(f"scraper:{faction_id}", "scraping_active") == "1":
return jsonify({"status": "Scraping already in progress"}) return jsonify({"status": "Scraping already in progress"})
form = ScrapingForm() # Convert config to a serializable dict with only needed values
if form.validate_on_submit(): config_dict = {
faction_id = form.faction_id.data 'DATA': {'DATA_DIR': current_app.config['DATA']['DATA_DIR']},
fetch_interval = form.fetch_interval.data 'DEFAULT': {'API_KEY': current_app.config['DEFAULT']['API_KEY']}
run_interval = form.run_interval.data }
scraper = Scraper(faction_id, fetch_interval, run_interval, current_app) start_scraping_task.delay(
scraper.scraping_active = True faction_id,
int(form.fetch_interval.data), # Ensure this is an int
scraping_thread = threading.Thread(target=scraper.start_scraping) int(form.run_interval.data), # Ensure this is an int
scraping_thread.daemon = True config_dict
scraping_thread.start() )
return jsonify({"status": "Scraping started"})
current_app.config['SCRAPER'] = scraper return jsonify({"status": "Invalid form data"})
current_app.config['SCRAPING_THREAD'] = scraping_thread
return jsonify({"status": "Scraping started"})
return jsonify({"status": "Invalid form data"})
@app.route('/stop_scraping', methods=['POST']) @app.route('/stop_scraping', methods=['POST'])
def stop_scraping(): def stop_scraping():
scraper = current_app.config.get('SCRAPER') redis_client = get_redis()
if scraper is None or not scraper.scraping_active: faction_id = redis_client.get("current_faction_id")
return jsonify({"status": "Scraping is not running"}) if not faction_id:
return jsonify({"status": "No active scraping session"})
stop_scraping_task.delay(faction_id)
return jsonify({"status": "Stopping scraping"})
scraper.stop_scraping()
current_app.config['SCRAPING_ACTIVE'] = False
logger.debug("Scraping stopped by user")
return jsonify({"status": "Scraping stopped"})
@app.route('/logfile', methods=['GET']) @app.route('/logfile', methods=['GET'])
def logfile(): def logfile():
log_file_name = current_app.logger.handlers[0].baseFilename
page = int(request.args.get('page', 0)) # Page number page = int(request.args.get('page', 0)) # Page number
lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page lines_per_page = int(request.args.get('lines_per_page', current_app.config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page
log_file_path = log_file_name # Path to the current log file log_file_path = log_file_name # Path to the current log file
if not os.path.isfile(log_file_path): if not os.path.isfile(log_file_path):
logger.error("Log file not found") current_app.logger.error("Log file not found")
return jsonify({"error": "Log file not found"}), 404 return jsonify({"error": "Log file not found"}), 404
log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES'])) log_lines = list(tail(log_file_path, current_app.config['LOGGING']['VIEW_MAX_LINES']))
log_lines = log_lines[::-1] # Reverse the list log_lines = log_lines[::-1] # Reverse the list
@@ -123,14 +119,15 @@ def register_api(app):
@app.route('/delete_files', methods=['POST']) @app.route('/delete_files', methods=['POST'])
def delete_files(): def delete_files():
log_file_name = current_app.logger.handlers[0].baseFilename
file_paths = request.json.get('file_paths', []) file_paths = request.json.get('file_paths', [])
if not file_paths: if not file_paths:
return jsonify({"error": "No files specified"}), 400 return jsonify({"error": "No files specified"}), 400
errors = [] errors = []
data_dir = os.path.abspath(config['DATA']['DATA_DIR']) data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
log_dir = os.path.abspath(config['LOGGING']['LOG_DIR']) log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
for file_path in file_paths: for file_path in file_paths:
if file_path.startswith('/data/'): if file_path.startswith('/data/'):
@@ -171,40 +168,63 @@ def register_api(app):
@app.route('/data/<path:filename>') @app.route('/data/<path:filename>')
def download_data_file(filename): def download_data_file(filename):
data_dir = os.path.abspath(config['DATA']['DATA_DIR']) data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
file_path = os.path.join(data_dir, filename) file_path = os.path.join(data_dir, filename)
return send_from_directory(directory=data_dir, path=filename, as_attachment=True) return send_from_directory(directory=data_dir, path=filename, as_attachment=True)
@app.route('/log/<path:filename>') @app.route('/log/<path:filename>')
def download_log_file(filename): def download_log_file(filename):
log_dir = os.path.abspath(config['LOGGING']['LOG_DIR']) log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
file_path = os.path.join(log_dir, filename) file_path = os.path.join(log_dir, filename)
return send_from_directory(directory=log_dir, path=filename, as_attachment=True) return send_from_directory(directory=log_dir, path=filename, as_attachment=True)
@app.route('/tmp/<path:filename>') @app.route('/tmp/<path:filename>')
def download_tmp_file(filename): def download_tmp_file(filename):
tmp_dir = os.path.abspath(config['TEMP']['TEMP_DIR']) tmp_dir = os.path.abspath(current_app.config['TEMP']['TEMP_DIR'])
file_path = os.path.join(tmp_dir, filename) file_path = os.path.join(tmp_dir, filename)
return send_from_directory(directory=tmp_dir, path=filename, as_attachment=True) return send_from_directory(directory=tmp_dir, path=filename, as_attachment=True)
@app.route('/config/lines_per_page') @app.route('/config/lines_per_page')
def get_lines_per_page(): def get_lines_per_page():
lines_per_page = config['LOGGING']['VIEW_PAGE_LINES'] lines_per_page = current_app.config['LOGGING']['VIEW_PAGE_LINES']
return jsonify({"lines_per_page": lines_per_page}) return jsonify({"lines_per_page": lines_per_page})
@app.route('/scraping_status', methods=['GET']) @app.route('/scraping_status', methods=['GET'])
def scraping_status(): def scraping_status():
if scraper is None: redis_client = get_redis()
logger.debug("Scraper is not initialized.") current_faction_id = redis_client.get("current_faction_id")
if not current_faction_id:
return jsonify({"scraping_active": False}) return jsonify({"scraping_active": False})
if scraper.scraping_active: scraping_active = redis_client.hget(f"scraper:{current_faction_id}", "scraping_active")
logger.debug("Scraping is active.")
return jsonify({"scraping_active": True}) # If we have a faction_id but scraping is not active, clean up the stale state
else: if not scraping_active or scraping_active == "0":
logger.debug("Scraping is not active.") redis_client.delete("current_faction_id")
return jsonify({"scraping_active": False}) return jsonify({"scraping_active": False})
return jsonify({
"scraping_active": True,
"faction_id": current_faction_id
})
@app.route('/scraping_get_end_time')
def scraping_get_end_time():
redis_client = get_redis()
current_faction_id = redis_client.get("current_faction_id")
if not current_faction_id:
return jsonify({"scraping_active": False})
end_time = redis_client.hget(f"scraper:{current_faction_id}", "end_time")
if not end_time:
return jsonify({"scraping_active": False})
return jsonify({
"end_time": end_time,
"faction_id": current_faction_id
})

View File

@@ -1,41 +0,0 @@
from flask import Flask
from flask_bootstrap import Bootstrap5
from datetime import datetime
from app.views import register_views
from app.api import register_api
from app.config import load_config
from app.filters import register_filters
def init_app():
config = load_config()
# Initialize app
app = Flask(__name__)
# Load configuration
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
app.config['API_KEY'] = config['DEFAULT']['API_KEY']
app.config['DATA'] = config['DATA']
app.config['TEMP'] = config['TEMP']
app.config['LOGGING'] = config['LOGGING']
# Move bootstrap settings to root level
for key in config['BOOTSTRAP']:
app.config[key.upper()] = config['BOOTSTRAP'][key]
bootstrap = Bootstrap5(app)
# Initialize global variables
app.config['SCRAPING_ACTIVE'] = False
app.config['SCRAPING_THREAD'] = None
app.config['DATA_FILE_NAME'] = None
app.config['LOG_FILE_NAME'] = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
# Register routes
register_views(app)
register_api(app)
register_filters(app)
return app

View File

@@ -1,7 +1,8 @@
import configparser from configobj import ConfigObj
import os import os
def load_config(): def load_config():
config = configparser.ConfigParser() config_path = os.path.join(os.path.dirname(__file__), '..', 'config.ini')
config.read(os.path.join(os.path.dirname(__file__), '..', 'config.ini'))
return config # Load config while preserving sections as nested dicts
return ConfigObj(config_path)

View File

@@ -4,4 +4,12 @@ from datetime import datetime
def register_filters(app): def register_filters(app):
@app.template_filter('datetimeformat') @app.template_filter('datetimeformat')
def datetimeformat(value): def datetimeformat(value):
return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S') """Convert datetime or timestamp to formatted string"""
if isinstance(value, datetime):
dt = value
else:
try:
dt = datetime.fromtimestamp(float(value))
except (ValueError, TypeError):
return str(value)
return dt.strftime('%Y-%m-%d %H:%M:%S')

View File

@@ -4,36 +4,31 @@ from queue import Queue
import os import os
from datetime import datetime from datetime import datetime
from app.config import load_config from flask import current_app
config = load_config() def init_logger(config):
LOG_DIR = config.get('LOGGING', {}).get('LOG_DIR', 'log')
# Define the log directory and ensure it exists if not os.path.exists(LOG_DIR):
LOG_DIR = config['LOGGING']['LOG_DIR'] os.makedirs(LOG_DIR)
if not os.path.exists(LOG_DIR):
os.makedirs(LOG_DIR)
# Generate the log filename dynamically log_file_name = os.path.join(LOG_DIR, datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log')
log_file_name = os.path.join(LOG_DIR, datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log')
# Initialize the logger logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG)
logger.setLevel(logging.DEBUG)
# File handler file_handler = logging.FileHandler(log_file_name, mode='w')
file_handler = logging.FileHandler(log_file_name, mode='w') file_handler.setLevel(logging.DEBUG)
file_handler.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s',
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
datefmt='%m/%d/%Y %I:%M:%S %p') file_handler.setFormatter(formatter)
file_handler.setFormatter(formatter) logger.addHandler(file_handler)
logger.addHandler(file_handler)
# Queue handler for real-time logging log_queue = Queue()
log_queue = Queue() queue_handler = QueueHandler(log_queue)
queue_handler = QueueHandler(log_queue) queue_handler.setLevel(logging.DEBUG)
queue_handler.setLevel(logging.DEBUG) logger.addHandler(queue_handler)
logger.addHandler(queue_handler)
logger.debug("Logger initialized")
# Function to get logger in other modules
def get_logger():
return logger return logger

View File

@@ -5,37 +5,71 @@ import os
import time import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
from requests.exceptions import ConnectionError, Timeout, RequestException from requests.exceptions import ConnectionError, Timeout, RequestException
import redis
import threading
from app.logging_config import get_logger from flask import current_app
from app.config import load_config
config = load_config()
API_KEY = config['DEFAULT']['API_KEY']
logger = get_logger()
class Scraper: class Scraper:
def __init__(self, faction_id, fetch_interval, run_interval, app): _instances = {} # Track all instances by faction_id
self.faction_id = faction_id _lock = threading.Lock()
self.fetch_interval = fetch_interval
self.run_interval = run_interval
self.end_time = datetime.now() + timedelta(days=run_interval)
self.data_file_name = os.path.join(app.config['DATA']['DATA_DIR'], f"{self.faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv")
self.scraping_active = False
print(self.data_file_name) def __new__(cls, faction_id, *args, **kwargs):
with cls._lock:
# Stop any existing instance for this faction
if faction_id in cls._instances:
old_instance = cls._instances[faction_id]
old_instance.stop_scraping()
instance = super().__new__(cls)
cls._instances[faction_id] = instance
return instance
def __init__(self, faction_id, fetch_interval, run_interval, config):
# Only initialize if not already initialized
if not hasattr(self, 'faction_id'):
self.redis_client = redis.StrictRedis(
host='localhost', port=6379, db=0, decode_responses=True
)
self.faction_id = faction_id
self.fetch_interval = fetch_interval
self.run_interval = run_interval
self.API_KEY = config['DEFAULT']['API_KEY']
self.data_file_name = os.path.join(
config['DATA']['DATA_DIR'],
f"{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
)
self.end_time = datetime.now() + timedelta(days=int(run_interval))
# Store scraper state in Redis
self.redis_client.hmset(f"scraper:{faction_id}", {
"faction_id": faction_id,
"fetch_interval": fetch_interval,
"run_interval": run_interval,
"end_time": self.end_time.isoformat(),
"data_file_name": self.data_file_name,
"scraping_active": "0",
"api_key": self.API_KEY
})
@property
def scraping_active(self):
return bool(int(self.redis_client.hget(f"scraper:{self.faction_id}", "scraping_active")))
@scraping_active.setter
def scraping_active(self, value):
self.redis_client.hset(f"scraper:{self.faction_id}", "scraping_active", "1" if value else "0")
def fetch_faction_data(self): def fetch_faction_data(self):
url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={API_KEY}" url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={self.API_KEY}"
response = requests.get(url) response = requests.get(url)
if response.status_code == 200: if response.status_code == 200:
return response.json() return response.json()
logger.warning(f"Failed to fetch faction data for faction ID {self.faction_id}. Response: {response.text}") current_app.logger.warning(f"Failed to fetch faction data for faction ID {self.faction_id}. Response: {response.text}")
return None return None
def fetch_user_activity(self, user_id): def fetch_user_activity(self, user_id):
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}" url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={self.API_KEY}"
retries = 3 retries = 3
for attempt in range(retries): for attempt in range(retries):
try: try:
@@ -43,46 +77,50 @@ class Scraper:
response.raise_for_status() response.raise_for_status()
return response.json() return response.json()
except ConnectionError as e: except ConnectionError as e:
logger.error(f"Connection error while fetching user activity for user ID {user_id}: {e}") current_app.logger.error(f"Connection error while fetching user activity for user ID {user_id}: {e}")
except Timeout as e: except Timeout as e:
logger.error(f"Timeout error while fetching user activity for user ID {user_id}: {e}") current_app.logger.error(f"Timeout error while fetching user activity for user ID {user_id}: {e}")
except RequestException as e: except RequestException as e:
logger.error(f"Error while fetching user activity for user ID {user_id}: {e}") current_app.logger.error(f"Error while fetching user activity for user ID {user_id}: {e}")
if attempt < retries - 1: if attempt < retries - 1:
current_app.logger.debug(f"Retrying {attempt + 1}/{retries} for user {user_id}")
time.sleep(2 ** attempt) # Exponential backoff time.sleep(2 ** attempt) # Exponential backoff
return None return None
def start_scraping(self) -> None: def start_scraping(self) -> None:
"""Starts the scraping process until the end time is reached or stopped manually.""" """Starts the scraping process until the end time is reached or stopped manually."""
self.scraping_active = True self.scraping_active = True
logger.info(f"Starting scraping for faction ID {self.faction_id}")
logger.debug(f"Fetch interval: {self.fetch_interval}s, Run interval: {self.run_interval} days, End time: {self.end_time}")
MAX_FAILURES = 5 # Stop after 5 consecutive failures current_app.logger.info(f"Starting scraping for faction ID {self.faction_id}")
current_app.logger.debug(f"Fetch interval: {self.fetch_interval}s, Run interval: {self.run_interval} days, End time: {self.end_time}")
MAX_FAILURES = 5
failure_count = 0 failure_count = 0
while datetime.now() < self.end_time and self.scraping_active: while datetime.now() < self.end_time and self.scraping_active:
logger.info(f"Fetching data at {datetime.now()}") current_app.logger.info(f"Fetching data at {datetime.now()}")
faction_data = self.fetch_faction_data() faction_data = self.fetch_faction_data()
if not faction_data or "members" not in faction_data: if not faction_data or "members" not in faction_data:
logger.warning(f"No faction data found for ID {self.faction_id} (Failure {failure_count + 1}/{MAX_FAILURES})") current_app.logger.warning(f"No faction data found for ID {self.faction_id} (Failure {failure_count + 1}/{MAX_FAILURES})")
failure_count += 1 failure_count += 1
if failure_count >= MAX_FAILURES: if failure_count >= MAX_FAILURES:
logger.error(f"Max failures reached ({MAX_FAILURES}). Stopping scraping.") current_app.logger.error(f"Max failures reached ({MAX_FAILURES}). Stopping scraping.")
break break
time.sleep(self.fetch_interval) time.sleep(self.fetch_interval)
continue continue
current_app.logger.info(f"Fetched {len(faction_data['members'])} members for faction {self.faction_id}")
failure_count = 0 # Reset failure count on success failure_count = 0 # Reset failure count on success
user_activity_data = self.process_faction_members(faction_data["members"]) user_activity_data = self.process_faction_members(faction_data["members"])
self.save_data(user_activity_data) self.save_data(user_activity_data)
logger.info(f"Data appended to {self.data_file_name}") current_app.logger.info(f"Data appended to {self.data_file_name}")
time.sleep(self.fetch_interval) time.sleep(self.fetch_interval)
self.handle_scraping_end() self.handle_scraping_end()
def process_faction_members(self, members: Dict[str, Dict]) -> List[Dict]: def process_faction_members(self, members: Dict[str, Dict]) -> List[Dict]:
"""Processes and retrieves user activity for all faction members.""" """Processes and retrieves user activity for all faction members."""
user_activity_data = [] user_activity_data = []
@@ -96,16 +134,16 @@ class Scraper:
"status": user_activity.get("status", {}).get("state", ""), "status": user_activity.get("status", {}).get("state", ""),
"timestamp": datetime.now().timestamp(), "timestamp": datetime.now().timestamp(),
}) })
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})") current_app.logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
else: else:
logger.warning(f"Failed to fetch data for user {user_id}") current_app.logger.warning(f"Failed to fetch data for user {user_id}")
return user_activity_data return user_activity_data
def save_data(self, user_activity_data: List[Dict]) -> None: def save_data(self, user_activity_data: List[Dict]) -> None:
"""Saves user activity data to a CSV file.""" """Saves user activity data to a CSV file."""
if not user_activity_data: if not user_activity_data:
logger.warning("No data to save.") current_app.logger.warning("No data to save.")
return return
df = pd.DataFrame(user_activity_data) df = pd.DataFrame(user_activity_data)
@@ -117,22 +155,40 @@ class Scraper:
try: try:
with open(self.data_file_name, "a" if file_exists else "w") as f: with open(self.data_file_name, "a" if file_exists else "w") as f:
df.to_csv(f, mode="a" if file_exists else "w", header=not file_exists, index=False) df.to_csv(f, mode="a" if file_exists else "w", header=not file_exists, index=False)
logger.info(f"Data successfully saved to {self.data_file_name}") current_app.logger.info(f"Data successfully saved to {self.data_file_name}")
except Exception as e: except Exception as e:
logger.error(f"Error saving data to {self.data_file_name}: {e}") current_app.logger.error(f"Error saving data to {self.data_file_name}: {e}")
def cleanup_redis_state(self):
"""Clean up all Redis state for this scraper instance"""
if hasattr(self, 'faction_id'):
self.redis_client.delete(f"scraper:{self.faction_id}")
current_id = self.redis_client.get("current_faction_id")
if current_id and current_id == str(self.faction_id):
self.redis_client.delete("current_faction_id")
# Remove from instances tracking
with self._lock:
if self.faction_id in self._instances:
del self._instances[self.faction_id]
def handle_scraping_end(self) -> None: def handle_scraping_end(self) -> None:
"""Handles cleanup and logging when scraping ends.""" """Handles cleanup and logging when scraping ends."""
if not self.scraping_active: if not self.scraping_active:
logger.warning(f"Scraping stopped manually at {datetime.now()}") current_app.logger.warning(f"Scraping stopped manually at {datetime.now()}")
elif datetime.now() >= self.end_time: elif datetime.now() >= self.end_time:
logger.warning(f"Scraping stopped due to timeout at {datetime.now()} (Run interval: {self.run_interval} days)") current_app.logger.warning(f"Scraping stopped due to timeout at {datetime.now()} (Run interval: {self.run_interval} days)")
else: else:
logger.error(f"Unexpected stop at {datetime.now()}") current_app.logger.error(f"Unexpected stop at {datetime.now()}")
logger.info("Scraping completed.") current_app.logger.info("Scraping completed.")
self.scraping_active = False self.scraping_active = False
self.cleanup_redis_state()
def stop_scraping(self): def stop_scraping(self):
self.scraping_active = False self.scraping_active = False
logger.debug("Scraping stopped by user") self.cleanup_redis_state()
current_app.logger.debug(f"Scraping stopped for faction {self.faction_id}")
def __del__(self):
"""Ensure Redis cleanup on object destruction"""
self.cleanup_redis_state()

View File

@@ -1,2 +0,0 @@
data_file_name = None
log_file_name = None

View File

@@ -1,7 +1,38 @@
function checkAllCheckboxes(tableId, checkAllCheckboxId) { import { ScraperUtils } from './scraper_utils.js';
const table = document.getElementById(tableId);
const checkboxes = table.querySelectorAll('input[type="checkbox"]');
const checkAllCheckbox = document.getElementById(checkAllCheckboxId);
checkboxes.forEach(checkbox => checkbox.checked = checkAllCheckbox.checked); class Common {
constructor() {
this.utils = new ScraperUtils();
this.addEventListeners();
this.scheduleUpdates();
}
scheduleUpdates() {
// Ensure server time updates every minute but only after initial fetch
setTimeout(() => {
setInterval(() => this.utils.updateServerTime(), 60000);
}, 5000); // Delay first scheduled update to prevent duplicate initial request
}
addEventListeners() {
if (this.utils.stopButton) {
this.utils.stopButton.addEventListener('click', () => this.utils.checkScrapingStatus());
}
}
} }
document.addEventListener('DOMContentLoaded', () => {
new Common();
});
window.checkAllCheckboxes = function(tableId, checkAllId) {
var table = document.getElementById(tableId);
var checkAll = document.getElementById(checkAllId);
var checkboxes = table.querySelectorAll('input[type="checkbox"]');
checkboxes.forEach(function(checkbox) {
if (!checkbox.disabled) {
checkbox.checked = checkAll.checked;
}
});
};

View File

@@ -1,91 +1,21 @@
class LogScraperApp { import { ScraperUtils } from './scraper_utils.js';
class ScraperApp {
constructor() { constructor() {
this.utils = new ScraperUtils();
this.form = document.getElementById('scrapingForm'); this.form = document.getElementById('scrapingForm');
this.stopButton = document.getElementById('stopButton'); this.stopButton = document.getElementById('stopButton');
this.logsElement = document.getElementById('logs');
this.prevPageButton = document.getElementById('prevPage');
this.nextPageButton = document.getElementById('nextPage');
this.pageInfo = document.getElementById('pageInfo');
this.startButton = document.getElementById('startButton'); this.startButton = document.getElementById('startButton');
this.currentPage = 0;
this.linesPerPage = null;
this.autoRefreshInterval = null;
this.init(); this.init();
} }
async init() { init() {
await this.fetchConfig(); this.utils.checkScrapingStatus();
await this.checkScrapingStatus();
this.addEventListeners(); this.addEventListeners();
} }
async fetchConfig() {
try {
const response = await fetch('/config/lines_per_page');
const data = await response.json();
this.linesPerPage = data.lines_per_page;
this.fetchLogs(this.currentPage);
} catch (error) {
console.error('Error fetching config:', error);
}
}
async fetchLogs(page) {
try {
const response = await fetch(`/logfile?page=${page}&lines_per_page=${this.linesPerPage}`);
const data = await response.json();
if (data.error) {
this.logsElement.textContent = data.error;
} else {
this.logsElement.innerHTML = data.log.map((line, index) => {
const lineNumber = data.start_line - index;
return `<span class="line-number">${lineNumber}</span> ${line}`;
}).join('');
this.updatePagination(data.total_lines);
}
} catch (error) {
console.error('Error fetching logs:', error);
}
}
updatePagination(totalLines) {
this.prevPageButton.disabled = this.currentPage === 0;
this.nextPageButton.disabled = (this.currentPage + 1) * this.linesPerPage >= totalLines;
this.pageInfo.textContent = `Page ${this.currentPage + 1} of ${Math.ceil(totalLines / this.linesPerPage)}`;
}
startAutoRefresh() {
this.autoRefreshInterval = setInterval(() => this.fetchLogs(this.currentPage), 5000);
}
stopAutoRefresh() {
clearInterval(this.autoRefreshInterval);
}
async checkScrapingStatus() {
try {
const response = await fetch('/scraping_status');
const data = await response.json();
if (data.scraping_active) {
this.startButton.disabled = true;
this.stopButton.disabled = false;
this.startAutoRefresh();
} else {
this.startButton.disabled = false;
this.stopButton.disabled = true;
}
this.fetchLogs(this.currentPage);
} catch (error) {
console.error('Error checking scraping status:', error);
}
}
async startScraping(event) { async startScraping(event) {
event.preventDefault(); event.preventDefault(); // Prevent default form submission
const formData = new FormData(this.form); const formData = new FormData(this.form);
try { try {
const response = await fetch('/start_scraping', { const response = await fetch('/start_scraping', {
@@ -93,12 +23,8 @@ class LogScraperApp {
body: formData body: formData
}); });
const data = await response.json(); const data = await response.json();
console.log(data);
if (data.status === "Scraping started") { if (data.status === "Scraping started") {
this.startButton.disabled = true; this.utils.checkScrapingStatus(); // Update UI
this.stopButton.disabled = false;
this.startAutoRefresh();
} }
} catch (error) { } catch (error) {
console.error('Error starting scraping:', error); console.error('Error starting scraping:', error);
@@ -107,14 +33,12 @@ class LogScraperApp {
async stopScraping() { async stopScraping() {
try { try {
const response = await fetch('/stop_scraping', { method: 'POST' }); const response = await fetch('/stop_scraping', {
method: 'POST'
});
const data = await response.json(); const data = await response.json();
console.log(data);
if (data.status === "Scraping stopped") { if (data.status === "Scraping stopped") {
this.startButton.disabled = false; this.utils.checkScrapingStatus(); // Update UI
this.stopButton.disabled = true;
this.stopAutoRefresh();
} }
} catch (error) { } catch (error) {
console.error('Error stopping scraping:', error); console.error('Error stopping scraping:', error);
@@ -122,23 +46,11 @@ class LogScraperApp {
} }
addEventListeners() { addEventListeners() {
this.prevPageButton.addEventListener('click', () => {
if (this.currentPage > 0) {
this.currentPage--;
this.fetchLogs(this.currentPage);
}
});
this.nextPageButton.addEventListener('click', () => {
this.currentPage++;
this.fetchLogs(this.currentPage);
});
this.form.addEventListener('submit', (event) => this.startScraping(event)); this.form.addEventListener('submit', (event) => this.startScraping(event));
this.stopButton.addEventListener('click', () => this.stopScraping()); this.stopButton.addEventListener('click', () => this.stopScraping());
} }
} }
// Initialize the application when DOM is fully loaded document.addEventListener('DOMContentLoaded', () => {
document.addEventListener('DOMContentLoaded', () => new LogScraperApp()); new ScraperApp();
});

97
app/static/log_viewer.js Normal file
View File

@@ -0,0 +1,97 @@
class LogViewerApp {
constructor() {
this.logsElement = document.getElementById('logs');
this.prevPageButton = document.getElementById('prevPage');
this.nextPageButton = document.getElementById('nextPage');
this.pageInfo = document.getElementById('pageInfo');
this.currentPage = 0;
this.linesPerPage = null;
this.autoRefreshInterval = null;
this.init();
}
async init() {
await this.fetchConfig();
await this.checkScrapingStatus();
this.addEventListeners();
}
async fetchConfig() {
try {
const response = await fetch('/config/lines_per_page');
const data = await response.json();
this.linesPerPage = data.lines_per_page;
this.fetchLogs(this.currentPage);
} catch (error) {
console.error('Error fetching config:', error);
}
}
async fetchLogs(page) {
try {
const response = await fetch(`/logfile?page=${page}&lines_per_page=${this.linesPerPage}`);
const data = await response.json();
if (data.error) {
this.logsElement.textContent = data.error;
} else {
this.logsElement.innerHTML = data.log.map((line, index) => {
const lineNumber = data.start_line - index;
return `<span class="line-number">${lineNumber}</span> ${line}`;
}).join('');
this.updatePagination(data.total_lines);
}
} catch (error) {
console.error('Error fetching logs:', error);
}
}
updatePagination(totalLines) {
this.prevPageButton.disabled = this.currentPage === 0;
this.nextPageButton.disabled = (this.currentPage + 1) * this.linesPerPage >= totalLines;
this.pageInfo.textContent = `Page ${this.currentPage + 1} of ${Math.ceil(totalLines / this.linesPerPage)}`;
}
startAutoRefresh() {
this.autoRefreshInterval = setInterval(() => this.fetchLogs(this.currentPage), 5000);
}
stopAutoRefresh() {
clearInterval(this.autoRefreshInterval);
}
async checkScrapingStatus() {
try {
const response = await fetch('/scraping_status');
const data = await response.json();
if (data.scraping_active) {
this.startAutoRefresh();
} else {
this.stopAutoRefresh();
}
this.fetchLogs(this.currentPage);
} catch (error) {
console.error('Error checking scraping status:', error);
}
}
addEventListeners() {
this.prevPageButton.addEventListener('click', () => {
if (this.currentPage > 0) {
this.currentPage--;
this.fetchLogs(this.currentPage);
}
});
this.nextPageButton.addEventListener('click', () => {
this.currentPage++;
this.fetchLogs(this.currentPage);
});
}
}
// Initialize the application when DOM is fully loaded
document.addEventListener('DOMContentLoaded', () => new LogViewerApp());

203
app/static/scraper_utils.js Normal file
View File

@@ -0,0 +1,203 @@
export class ScraperUtils {
constructor() {
this.activityIndicator = document.getElementById('activity_indicator');
this.endTimeElement = document.getElementById('end_time');
this.serverTimeElement = document.getElementById('server_time');
this.timeLeftElement = document.getElementById('time-left'); // New element for countdown
this.stopButton = document.getElementById('stopButton');
this.startButton = document.getElementById('startButton');
this.statusContainer = document.getElementById('status_container');
this.loadingIndicator = document.getElementById('loading_indicator');
this.statusContent = document.querySelectorAll('#status_content');
this.serverTime = null;
this.endTime = null;
this.pollInterval = null; // Add this line
this.init();
}
async init() {
this.showLoadingIndicator();
try {
await Promise.all([
this.updateServerTime(),
this.checkScrapingStatus()
]);
} catch (error) {
console.error("Error during initialization:", error);
}
// Start polling for status updates
this.startPolling();
// Only start the clock and wait for end time if scraping is active
if (this.activityIndicator.textContent === 'Active') {
if (!this.endTime) {
try {
await this.fetchEndTime();
} catch (error) {
console.error("Error fetching end time:", error);
}
}
if (this.serverTime && this.endTime) {
this.startClock();
}
}
// Hide loading indicator regardless of scraping status
this.hideLoadingIndicator();
}
startPolling() {
// Poll every 2 seconds
this.pollInterval = setInterval(async () => {
await this.checkScrapingStatus();
}, 2000);
}
stopPolling() {
if (this.pollInterval) {
clearInterval(this.pollInterval);
this.pollInterval = null;
}
}
showLoadingIndicator() {
this.statusContainer.classList.remove('d-none');
this.loadingIndicator.classList.remove('d-none');
this.statusContent.forEach(element => element.classList.add('d-none'));
}
hideLoadingIndicator() {
this.loadingIndicator.classList.add('d-none');
this.statusContent.forEach(element => element.classList.remove('d-none'));
}
async checkScrapingStatus() {
try {
const response = await fetch('/scraping_status');
const data = await response.json();
if (data.scraping_active) {
if (this.startButton) this.startButton.disabled = true;
if (this.stopButton) this.stopButton.disabled = false;
this.activityIndicator.classList.remove('text-bg-danger');
this.activityIndicator.classList.add('text-bg-success');
this.activityIndicator.textContent = 'Active';
// Fetch end time if we don't have it yet
if (!this.endTime) {
await this.fetchEndTime();
}
this.endTimeElement.classList.remove('d-none');
this.timeLeftElement.classList.remove('d-none');
} else {
if (this.startButton) this.startButton.disabled = false;
if (this.stopButton) this.stopButton.disabled = true;
this.activityIndicator.classList.remove('text-bg-success');
this.activityIndicator.classList.add('text-bg-danger');
this.activityIndicator.textContent = 'Inactive';
this.endTimeElement.classList.add('d-none');
this.timeLeftElement.classList.add('d-none');
// Reset end time when inactive
this.endTime = null;
}
} catch (error) {
console.error('Error checking scraping status:', error);
}
}
async updateServerTime() {
try {
const response = await fetch('/server_time');
const data = await response.json();
this.serverTime = new Date(data.server_time.replace(' ', 'T'));
this.serverTimeElement.textContent = `Server Time (TCT): ${this.formatDateToHHMMSS(this.serverTime)}`;
} catch (error) {
console.error('Error fetching server time:', error);
}
}
async fetchEndTime() {
if (this.endTime) return;
try {
const response = await fetch('/scraping_get_end_time');
const data = await response.json();
if (data.end_time) {
this.endTime = new Date(data.end_time);
this.endTimeElement.textContent = `Running until ${this.formatDateToYYYYMMDDHHMMSS(this.endTime)} TCT`;
}
} catch (error) {
this.endTimeElement.textContent = 'Error fetching end time';
console.error('Error fetching end time:', error);
}
}
startClock() {
const updateClock = () => {
if (this.serverTime) {
this.serverTime.setSeconds(this.serverTime.getSeconds() + 1);
this.serverTimeElement.textContent = `Server Time (TCT): ${this.formatDateToHHMMSS(this.serverTime)}`;
}
if (this.endTime && this.serverTime) {
const timeLeft = this.endTime - this.serverTime;
this.timeLeftElement.textContent = `Time Left: ${timeLeft > 0 ? this.formatMillisecondsToHHMMSS(timeLeft) : '00:00:00'}`;
}
};
// Immediately update the clock
updateClock();
// Continue updating every second
setInterval(updateClock, 1000);
}
formatDateToYYYYMMDDHHMMSS(date) {
if (!(date instanceof Date) || isNaN(date)) {
console.error('Invalid date:', date);
return '';
}
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')} ` +
`${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`;
}
formatDateToHHMMSS(date) {
if (!(date instanceof Date) || isNaN(date)) {
console.error('Invalid date:', date);
return '';
}
return `${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`;
}
formatMillisecondsToHHMMSS(ms) {
const totalSeconds = Math.floor(ms / 1000);
const hours = Math.floor(totalSeconds / 3600);
const minutes = Math.floor((totalSeconds % 3600) / 60);
const seconds = totalSeconds % 60;
return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
}
// Add cleanup method
cleanup() {
this.stopPolling();
}
}
// Add event listener for page unload
window.addEventListener('unload', () => {
if (window.scraperUtils) {
window.scraperUtils.cleanup();
}
});

93
app/tasks.py Normal file
View File

@@ -0,0 +1,93 @@
from celery import Celery
from app.models import Scraper
import redis
from datetime import timedelta
from flask import current_app
def create_celery():
celery = Celery('tasks', broker='redis://localhost:6379/0')
celery.conf.update(
task_serializer='json',
accept_content=['json'],
result_serializer='json',
timezone='UTC'
)
return celery
def init_celery(app):
"""Initialize Celery with Flask app context"""
celery = create_celery()
celery.conf.update(app.config)
class ContextTask(celery.Task):
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery.Task = ContextTask
return celery
celery = create_celery() # This will be initialized properly in app/__init__.py
def get_redis():
return redis.StrictRedis(
host='localhost',
port=6379,
db=0,
decode_responses=True
)
@celery.task
def start_scraping_task(faction_id, fetch_interval, run_interval, config_dict):
"""
Start scraping task with serializable parameters
Args:
faction_id: ID of the faction to scrape
fetch_interval: Interval between fetches in seconds
run_interval: How long to run the scraper in days
config_dict: Dictionary containing configuration
"""
try:
redis_client = get_redis()
# Set current faction ID at task start
redis_client.set("current_faction_id", str(faction_id))
scraper = Scraper(
faction_id=faction_id,
fetch_interval=int(fetch_interval),
run_interval=int(run_interval),
config=config_dict
)
scraper.start_scraping()
return {"status": "success"}
except Exception as e:
# Clean up Redis state on error
redis_client = get_redis()
redis_client.delete("current_faction_id")
return {"status": "error", "message": str(e)}
@celery.task
def stop_scraping_task(faction_id):
"""Stop scraping task and clean up Redis state"""
try:
redis_client = get_redis()
# Clean up Redis state
redis_client.hset(f"scraper:{faction_id}", "scraping_active", "0")
redis_client.delete(f"scraper:{faction_id}")
# Clean up current_faction_id if it matches
current_id = redis_client.get("current_faction_id")
if current_id and current_id == str(faction_id):
redis_client.delete("current_faction_id")
# Revoke any running tasks for this faction
celery.control.revoke(
celery.current_task.request.id,
terminate=True,
signal='SIGTERM'
)
return {"status": "success", "message": f"Stopped scraping for faction {faction_id}"}
except Exception as e:
return {"status": "error", "message": str(e)}

View File

@@ -22,6 +22,9 @@
{% block content %} {% block content %}
{% endblock %} {% endblock %}
</main> </main>
<footer>
{% include 'includes/footer.html' %}
</footer>
{% block scripts %} {% block scripts %}
{% include 'includes/scripts.html' %} {% include 'includes/scripts.html' %}
{% endblock %} {% endblock %}

View File

@@ -7,7 +7,7 @@
<div class="col"> <div class="col">
<h2>Data Files</h2> <h2>Data Files</h2>
</div> </div>
<div class="col"> <div class="col text-end">
<div class="btn-group btn-group-sm"> <div class="btn-group btn-group-sm">
<button class="btn btn-warning" onclick="deleteSelectedFiles()">Delete Selected Files</button> <button class="btn btn-warning" onclick="deleteSelectedFiles()">Delete Selected Files</button>
<button class="btn btn-success" onclick="downloadSelectedFiles()">Download Selected Files</button> <button class="btn btn-success" onclick="downloadSelectedFiles()">Download Selected Files</button>
@@ -67,7 +67,7 @@
<table id="logFilesTable" class="table table-striped table-bordered table-hover"> <table id="logFilesTable" class="table table-striped table-bordered table-hover">
<thead> <thead>
<tr> <tr>
<th width="2%"><input type="checkbox" id="checkAllLog" onclick="checkAllCheckboxes('logFilesTable', 'checkAllLog')"></th> <th width="2%"><input type="checkbox" id="checkAllLog" class="form-check-input" onclick="checkAllCheckboxes('logFilesTable', 'checkAllLog')"></th>
<th onclick="sortTable(1, 'logFilesTable')">File Name</th> <th onclick="sortTable(1, 'logFilesTable')">File Name</th>
<th onclick="sortTable(2, 'logFilesTable')">Last Modified</th> <th onclick="sortTable(2, 'logFilesTable')">Last Modified</th>
<th onclick="sortTable(3, 'logFilesTable')">Created</th> <th onclick="sortTable(3, 'logFilesTable')">Created</th>

View File

View File

@@ -1,7 +1,6 @@
<!-- app/templates/includes/navigation.html -->
<nav class="navbar navbar-nav navbar-expand-md bg-primary"> <nav class="navbar navbar-nav navbar-expand-md bg-primary">
<div class="container-fluid"> <div class="container-fluid">
<a class="navbar-brand" href="/">Torn User Activity Scraper</a> <a class="navbar-brand" href="/">{{ main_config.APP_TITLE }}</a>
{% from 'bootstrap4/nav.html' import render_nav_item %} {% from 'bootstrap4/nav.html' import render_nav_item %}
{{ render_nav_item('views.analyze', 'Data Visualization') }} {{ render_nav_item('views.analyze', 'Data Visualization') }}
{{ render_nav_item('download_results', 'Files') }} {{ render_nav_item('download_results', 'Files') }}
@@ -15,3 +14,26 @@
</div> </div>
</div> </div>
</nav> </nav>
<div id="status_container" class="container-fluid d-flex justify-content-center">
<div class="container-md my-1 shadow p-4 pb-0 m-1 w-50" id="status_badges">
<div id="loading_indicator" class="alert alert-info">Loading...</div>
<div id="status_content">
<div class="row justify-content-center">
<div class="col col-6 p-1">
<div id="activity_indicator" class="alert alert-danger fw-bolder">Inactive</div>
</div>
<div class="col col-6 p-1">
<div id="server_time" class="alert alert-primary">Server Time (TCT):</div>
</div>
</div>
<div class="row justify-content-center">
<div class="col col-6 p-1">
<div id="end_time" class="alert alert-info">Running until:</div>
</div>
<div class="col p-1">
<div id="time-left" class="alert alert-info">Time Left:</div>
</div>
</div>
</div>
</div>
</div>

View File

@@ -1,3 +1,3 @@
{{ bootstrap.load_js() }} {{ bootstrap.load_js() }}
<script src="{{url_for('static', filename='color_mode.js')}}"></script> <script src="{{url_for('static', filename='color_mode.js')}}"></script>
<script src="{{ url_for('static', filename='common.js') }}"></script> <script type="module" src="{{ url_for('static', filename='common.js') }}"></script>

View File

@@ -2,7 +2,13 @@
{% block content %} {% block content %}
<section id="scrapingFormContainer" class="container-fluid d-flex justify-content-center"> <section id="scrapingFormContainer" class="container-fluid d-flex justify-content-center">
<div class="container-md my-5 mx-2 shadow-lg p-4 "> <div class="container-md my-5 mx-2 shadow-lg p-4 ">
<h2>Scraper <span id="activity_indicator" class="badge text-bg-danger">Inactive</span></h2> <div class="row">
<div class="col">
<h2>Scraper</h2>
</div>
<div class="col text-end">
</div>
</div>
<form id="scrapingForm" method="POST" action="{{ url_for('start_scraping') }}"> <form id="scrapingForm" method="POST" action="{{ url_for('start_scraping') }}">
{{ form.hidden_tag() }} {{ form.hidden_tag() }}
<div class="form-group"> <div class="form-group">
@@ -24,23 +30,5 @@
</div> </div>
</div> </div>
</section> </section>
<section id="resultsContainer" class="container-fluid d-flex justify-content-center"> <script type="module" src="{{url_for('static', filename='index.js')}}"></script>
<div class="container-md my-5 mx-2 shadow-lg p-4" style="height: 500px;">
<div class="row">
<div class="col-8">
<h2>Logs</h2>
<pre id="logs" class="pre-scrollable" style="height: 350px; overflow:scroll; "><code></code></pre>
<div class="btn-group btn-group-sm">
<button class="btn btn-primary" id="prevPage">Previous</button>
<button class="btn btn-primary" id="pageInfo" disabled>Page 1 of 1</button>
<button class="btn btn-primary" id="nextPage">Next</button>
</div>
</div>
<div class="col">
<h2>Stats</h2>
</div>
</div>
</div>
</section>
<script src="{{url_for('static', filename='index.js')}}"></script>
{% endblock content %} {% endblock content %}

View File

@@ -1,3 +1,22 @@
{% extends 'base.html' %} {% extends 'base.html' %}
{% block content %} {% block content %}
<section id="resultsContainer" class="container-fluid d-flex justify-content-center">
<div class="container-md my-5 mx-2 shadow-lg p-4" style="height: 500px;">
<div class="row">
<div class="col-8">
<h2>Logs</h2>
<pre id="logs" class="pre-scrollable" style="height: 350px; overflow:scroll;"><code></code></pre>
<div class="btn-group btn-group-sm">
<button class="btn btn-primary" id="prevPage">Previous</button>
<button class="btn btn-primary" id="pageInfo" disabled>Page 1 of 1</button>
<button class="btn btn-primary" id="nextPage">Next</button>
</div>
</div>
<div class="col">
<h2>Stats</h2>
</div>
</div>
</div>
</section>
<script src="{{url_for('static', filename='log_viewer.js')}}"></script>
{% endblock content %} {% endblock content %}

View File

@@ -1,13 +1,10 @@
import os import os
import zipfile import zipfile
from datetime import datetime, timedelta from datetime import datetime, timedelta
from flask import current_app
from app.state import data_file_name, log_file_name
from app.config import load_config from app.config import load_config
config = load_config()
def create_zip(file_paths, zip_name, app): def create_zip(file_paths, zip_name, app):
temp_dir = os.path.abspath(app.config['TEMP']['TEMP_DIR']) temp_dir = os.path.abspath(app.config['TEMP']['TEMP_DIR'])
zip_path = os.path.join(temp_dir, zip_name) zip_path = os.path.join(temp_dir, zip_name)
@@ -18,7 +15,7 @@ def create_zip(file_paths, zip_name, app):
return zip_path return zip_path
def delete_old_zips(): def delete_old_zips():
temp_dir = os.path.abspath(config['TEMP']['TEMP_DIR']) temp_dir = os.path.abspath(current_app.config['TEMP']['TEMP_DIR'])
now = datetime.now() now = datetime.now()
for filename in os.listdir(temp_dir): for filename in os.listdir(temp_dir):
if filename.endswith('.zip'): if filename.endswith('.zip'):
@@ -33,7 +30,7 @@ def tail(filename, n):
yield '' yield ''
return return
page_size = int(config['LOGGING']['TAIL_PAGE_SIZE']) page_size = int(current_app.config['LOGGING']['TAIL_PAGE_SIZE'])
offsets = [] offsets = []
count = _n = n if n >= 0 else -n count = _n = n if n >= 0 else -n

View File

@@ -2,24 +2,27 @@ import os
import glob import glob
from flask import render_template, Blueprint, current_app, request from flask import render_template, Blueprint, current_app, request
from app.tasks import get_redis
from app.forms import ScrapingForm from app.forms import ScrapingForm
from app.util import get_size from app.util import get_size
from app.config import load_config from app.config import load_config
from app.api import scraper as scraper from app.api import scraper as scraper
from app.logging_config import get_logger
from app.analysis import load_data, load_analysis_modules from app.analysis import load_data, load_analysis_modules
from datetime import datetime
from app.state import log_file_name
print(f"A imported log_file_name: {log_file_name}")
config = load_config()
logger = get_logger()
views_bp = Blueprint("views", __name__) views_bp = Blueprint("views", __name__)
def sizeof_fmt(num, suffix="B"):
"""Convert bytes to human readable format"""
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
if abs(num) < 1024.0:
return f"{num:3.1f} {unit}{suffix}"
num /= 1024.0
return f"{num:.1f} Yi{suffix}"
def register_views(app): def register_views(app):
@app.route('/') @app.route('/')
def index(): def index():
@@ -36,49 +39,59 @@ def register_views(app):
@app.route('/download_results') @app.route('/download_results')
def download_results(): def download_results():
log_file_name = os.path.abspath(app.config['LOG_FILE_NAME']) # Get the current active log file and data file from Redis and app config
scraper = app.config.get('SCRAPER') redis_client = get_redis()
current_faction_id = redis_client.get("current_faction_id")
if scraper: active_data_file = None
print(scraper.data_file_name) if current_faction_id:
if not scraper: active_data_file = redis_client.hget(f"scraper:{current_faction_id}", "data_file_name")
print("Scraper not initialized")
data_dir = os.path.abspath(config['DATA']['DATA_DIR']) active_log_file = app.config['LOG_FILE_NAME']
log_dir = os.path.abspath(config['LOGGING']['LOG_DIR'])
data_files = glob.glob(os.path.join(data_dir, "*.csv")) def get_file_info(file_path, file_type='data'):
log_files = glob.glob(os.path.join(log_dir, "*.log")) stats = os.stat(file_path)
name = os.path.basename(file_path)
# Determine if file is active
is_active = False
if file_type == 'data' and active_data_file:
is_active = os.path.abspath(file_path) == os.path.abspath(active_data_file)
elif file_type == 'log' and active_log_file:
is_active = os.path.basename(file_path) == os.path.basename(active_log_file)
def get_file_info(file_path):
return { return {
"name": file_path, 'name': file_path, # Full path for internal use
"name_display": os.path.basename(file_path), 'name_display': name, # Just filename for display
"last_modified": os.path.getmtime(file_path), 'last_modified': stats.st_mtime, # Send timestamp instead of datetime
"created": os.path.getctime(file_path), 'created': stats.st_ctime, # Send timestamp instead of datetime
"size": get_size(file_path) 'size': sizeof_fmt(stats.st_size),
'active': is_active
} }
data_files_info = [get_file_info(file) for file in data_files] data_files = []
log_files_info = [get_file_info(file) for file in log_files] log_files = []
if scraper and scraper.scraping_active: # Get data files
for data_file in data_files_info: data_dir = os.path.abspath(app.config['DATA']['DATA_DIR'])
if os.path.abspath(scraper.data_file_name) == data_file['name']: if os.path.exists(data_dir):
data_file['active'] = True for file in glob.glob(os.path.join(data_dir, "*.csv")):
else: data_files.append(get_file_info(file, 'data'))
data_file['active'] = False
for log_file in log_files_info: # Get log files
if log_file_name == os.path.abspath(log_file['name']): log_dir = os.path.abspath(app.config['LOGGING']['LOG_DIR'])
log_file['active'] = True if os.path.exists(log_dir):
else: for file in glob.glob(os.path.join(log_dir, "*.log")):
log_file['active'] = False log_files.append(get_file_info(file, 'log'))
data_files_info.sort(key=lambda x: x['last_modified'], reverse=True) # Sort files by modification time, newest first
log_files_info.sort(key=lambda x: x['last_modified'], reverse=True) data_files.sort(key=lambda x: x['last_modified'], reverse=True)
log_files.sort(key=lambda x: x['last_modified'], reverse=True)
files = {"data": data_files_info, "log": log_files_info} files = {
'data': data_files,
'log': log_files
}
return render_template('download_results.html', files=files) return render_template('download_results.html', files=files)
@@ -124,4 +137,10 @@ def register_views(app):
context["results"] = results context["results"] = results
return render_template("analyze.html", **context) return render_template("analyze.html", **context)
@views_bp.route('/server_time')
def server_time():
current_time = datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
return {'server_time': current_time}
app.register_blueprint(views_bp) app.register_blueprint(views_bp)

View File

@@ -1,3 +1,7 @@
# All main config options will be passed to template engine
[MAIN]
APP_TITLE = 'Torn User Activity Grabber'
[DEFAULT] [DEFAULT]
SECRET_KEY = your_secret_key SECRET_KEY = your_secret_key
API_KEY = your_api_key API_KEY = your_api_key

20
fly.toml Normal file
View File

@@ -0,0 +1,20 @@
# fly.toml app configuration file generated for tornactivitytracker on 2025-02-11T02:59:23+01:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#
app = 'tornactivitytracker'
primary_region = 'fra'
[build]
[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = 'stop'
auto_start_machines = true
min_machines_running = 0
processes = ['app']
[[vm]]
size = 'shared-cpu-2x'

View File

@@ -7,3 +7,5 @@ requests
matplotlib matplotlib
seaborn seaborn
configparser configparser
plotly
configobj

View File

@@ -14,6 +14,8 @@ charset-normalizer==3.4.1
# via requests # via requests
click==8.1.8 click==8.1.8
# via flask # via flask
configobj==5.0.9
# via -r requirements.in
configparser==7.1.0 configparser==7.1.0
# via -r requirements.in # via -r requirements.in
contourpy==1.3.1 contourpy==1.3.1
@@ -48,6 +50,8 @@ matplotlib==3.10.0
# via # via
# -r requirements.in # -r requirements.in
# seaborn # seaborn
narwhals==1.26.0
# via plotly
numpy==2.2.2 numpy==2.2.2
# via # via
# contourpy # contourpy
@@ -55,13 +59,17 @@ numpy==2.2.2
# pandas # pandas
# seaborn # seaborn
packaging==24.2 packaging==24.2
# via matplotlib # via
# matplotlib
# plotly
pandas==2.2.3 pandas==2.2.3
# via # via
# -r requirements.in # -r requirements.in
# seaborn # seaborn
pillow==11.1.0 pillow==11.1.0
# via matplotlib # via matplotlib
plotly==6.0.0
# via -r requirements.in
pyparsing==3.2.1 pyparsing==3.2.1
# via matplotlib # via matplotlib
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
@@ -70,6 +78,7 @@ python-dateutil==2.9.0.post0
# pandas # pandas
pytz==2025.1 pytz==2025.1
# via pandas # via pandas
redis==5.0.1
requests==2.32.3 requests==2.32.3
# via -r requirements.in # via -r requirements.in
seaborn==0.13.2 seaborn==0.13.2
@@ -86,3 +95,4 @@ wtforms==3.2.1
# via # via
# bootstrap-flask # bootstrap-flask
# flask-wtf # flask-wtf
celery==5.3.6

7
run.py
View File

@@ -1,5 +1,6 @@
from app.app import init_app from app import create_app
app = create_app()
if __name__ == '__main__': if __name__ == '__main__':
app = init_app() app.run(debug=True)
app.run(debug=True, threaded=True)

50
stop_scraping.py Normal file
View File

@@ -0,0 +1,50 @@
import redis
import argparse
def get_redis():
return redis.StrictRedis(
host='localhost',
port=6379,
db=0,
decode_responses=True
)
def stop_scraping(flush=False, force=False):
redis_client = get_redis()
if flush:
redis_client.flushall()
print("Flushed all Redis data")
return True
current_faction_id = redis_client.get("current_faction_id")
if not current_faction_id:
print("No active scraping session found.")
return False if not force else True
redis_client.hset(f"scraper:{current_faction_id}", "scraping_active", "0")
print(f"Sent stop signal to scraping process for faction {current_faction_id}")
return True
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Stop the Torn Activity Tracker scraping process.')
parser.add_argument('--force', action='store_true', help='Force stop even if no active session is found')
parser.add_argument('--flush', action='store_true', help='Flush all Redis data (WARNING: This will clear ALL Redis data)')
args = parser.parse_args()
if args.flush:
if input("WARNING: This will delete ALL Redis data. Are you sure? (y/N) ").lower() != 'y':
print("Operation cancelled.")
exit(0)
success = stop_scraping(flush=args.flush, force=args.force)
if not success and args.force:
print("Forcing stop for all potential scraping processes...")
redis_client = get_redis()
# Get all scraper keys
for key in redis_client.keys("scraper:*"):
redis_client.hset(key, "scraping_active", "0")
print("Sent stop signal to all potential scraping processes.")

File diff suppressed because one or more lines are too long