Compare commits
5 Commits
production
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| adc002b38d | |||
| 5994d8ae7b | |||
| f68ada7204 | |||
| 2217bd5855 | |||
| fa5d59b069 |
25
README.md
25
README.md
@@ -1,18 +1,22 @@
|
|||||||
# Torn User Activity Scraper
|
# Torn User Activity Tracker
|
||||||
|
|
||||||
|
> [!WARNING]
|
||||||
|
> **Development is still in its early stages; do not put it to productive use!**
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Start and stop scraping user activity data
|
- Start and stop scraping user activity data
|
||||||
- View real-time logs
|
- View real-time logs
|
||||||
- Download data and log files
|
- Download data and log files
|
||||||
- View scraping results and statistics
|
- View scraping results
|
||||||
|
- Plugin based analysis system
|
||||||
- Toggle between light and dark mode
|
- Toggle between light and dark mode
|
||||||
|
|
||||||
**Note:** Many features are not fully implemented yet, but the activity tracker/grabber works as intended.
|
**Note:** Many features are not fully implemented yet, but the activity tracker/grabber works as intended.
|
||||||
|
|
||||||
## Planned Features
|
## Planned Features
|
||||||
|
|
||||||
- Additional analyses
|
- Additional analyses plugins
|
||||||
- Selector for Torn API data to choose which data shall be tracked
|
- Selector for Torn API data to choose which data shall be tracked
|
||||||
- Improved / fixed log viewer
|
- Improved / fixed log viewer
|
||||||
|
|
||||||
@@ -24,6 +28,21 @@
|
|||||||
- Flask-WTF
|
- Flask-WTF
|
||||||
- Pandas
|
- Pandas
|
||||||
- Requests
|
- Requests
|
||||||
|
- Redis
|
||||||
|
- Celery
|
||||||
|
|
||||||
|
Redis currently has to run locally, but this will change in the future. To change this, see file tasks.py:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# tasks.py
|
||||||
|
def get_redis():
|
||||||
|
return redis.StrictRedis(
|
||||||
|
host='localhost',
|
||||||
|
port=6379,
|
||||||
|
db=0,
|
||||||
|
decode_responses=True
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
@@ -7,15 +7,18 @@ from app.views import register_views
|
|||||||
from app.api import register_api
|
from app.api import register_api
|
||||||
from app.config import load_config
|
from app.config import load_config
|
||||||
from app.filters import register_filters
|
from app.filters import register_filters
|
||||||
|
from app.tasks import celery
|
||||||
|
|
||||||
from app.logging_config import init_logger
|
from app.logging_config import init_logger
|
||||||
|
|
||||||
def create_app():
|
def create_app(config=None):
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
os.environ['TZ'] = 'UTC'
|
if config is None:
|
||||||
|
|
||||||
config = load_config()
|
config = load_config()
|
||||||
|
app.config.update(config)
|
||||||
|
|
||||||
|
os.environ['TZ'] = 'UTC'
|
||||||
|
|
||||||
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
|
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
|
||||||
|
|
||||||
@@ -23,6 +26,9 @@ def create_app():
|
|||||||
for key, value in config.get('BOOTSTRAP', {}).items():
|
for key, value in config.get('BOOTSTRAP', {}).items():
|
||||||
app.config[key.upper()] = value
|
app.config[key.upper()] = value
|
||||||
|
|
||||||
|
# Initialize Celery
|
||||||
|
celery.conf.update(app.config)
|
||||||
|
|
||||||
bootstrap = Bootstrap5(app)
|
bootstrap = Bootstrap5(app)
|
||||||
|
|
||||||
# Store the entire config in Flask app
|
# Store the entire config in Flask app
|
||||||
|
|||||||
85
app/api.py
85
app/api.py
@@ -1,4 +1,3 @@
|
|||||||
# filepath: /home/michaelb/Dokumente/TornActivityTracker/app/api.py
|
|
||||||
from flask import jsonify, request, Response, send_from_directory, current_app
|
from flask import jsonify, request, Response, send_from_directory, current_app
|
||||||
import threading
|
import threading
|
||||||
import os
|
import os
|
||||||
@@ -10,6 +9,7 @@ from app.models import Scraper
|
|||||||
from app.util import create_zip, delete_old_zips, tail
|
from app.util import create_zip, delete_old_zips, tail
|
||||||
from app.config import load_config
|
from app.config import load_config
|
||||||
from app.forms import ScrapingForm
|
from app.forms import ScrapingForm
|
||||||
|
from app.tasks import start_scraping_task, stop_scraping_task, get_redis
|
||||||
|
|
||||||
scraping_thread = None
|
scraping_thread = None
|
||||||
scraper = None
|
scraper = None
|
||||||
@@ -18,42 +18,40 @@ scrape_lock = threading.Lock()
|
|||||||
def register_api(app):
|
def register_api(app):
|
||||||
@app.route('/start_scraping', methods=['POST'])
|
@app.route('/start_scraping', methods=['POST'])
|
||||||
def start_scraping():
|
def start_scraping():
|
||||||
global scraping_thread, scraper
|
|
||||||
with scrape_lock:
|
|
||||||
scraper = current_app.config.get('SCRAPER')
|
|
||||||
if scraper is not None and scraper.scraping_active:
|
|
||||||
current_app.logger.warning("Can't start scraping process: scraping already in progress")
|
|
||||||
return jsonify({"status": "Scraping already in progress"})
|
|
||||||
|
|
||||||
form = ScrapingForm()
|
form = ScrapingForm()
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
|
redis_client = get_redis()
|
||||||
faction_id = form.faction_id.data
|
faction_id = form.faction_id.data
|
||||||
fetch_interval = form.fetch_interval.data
|
|
||||||
run_interval = form.run_interval.data
|
|
||||||
|
|
||||||
scraper = Scraper(faction_id, fetch_interval, run_interval, app)
|
# Check if scraping is already active
|
||||||
scraper.scraping_active = True
|
if redis_client.hget(f"scraper:{faction_id}", "scraping_active") == "1":
|
||||||
|
return jsonify({"status": "Scraping already in progress"})
|
||||||
|
|
||||||
scraping_thread = threading.Thread(target=scraper.start_scraping, args=(app,))
|
# Convert config to a serializable dict with only needed values
|
||||||
scraping_thread.daemon = True
|
config_dict = {
|
||||||
scraping_thread.start()
|
'DATA': {'DATA_DIR': current_app.config['DATA']['DATA_DIR']},
|
||||||
|
'DEFAULT': {'API_KEY': current_app.config['DEFAULT']['API_KEY']}
|
||||||
current_app.config['SCRAPER'] = scraper
|
}
|
||||||
current_app.config['SCRAPING_THREAD'] = scraping_thread
|
|
||||||
|
|
||||||
|
start_scraping_task.delay(
|
||||||
|
faction_id,
|
||||||
|
int(form.fetch_interval.data), # Ensure this is an int
|
||||||
|
int(form.run_interval.data), # Ensure this is an int
|
||||||
|
config_dict
|
||||||
|
)
|
||||||
return jsonify({"status": "Scraping started"})
|
return jsonify({"status": "Scraping started"})
|
||||||
return jsonify({"status": "Invalid form data"})
|
return jsonify({"status": "Invalid form data"})
|
||||||
|
|
||||||
@app.route('/stop_scraping', methods=['POST'])
|
@app.route('/stop_scraping', methods=['POST'])
|
||||||
def stop_scraping():
|
def stop_scraping():
|
||||||
scraper = current_app.config.get('SCRAPER')
|
redis_client = get_redis()
|
||||||
if scraper is None or not scraper.scraping_active:
|
faction_id = redis_client.get("current_faction_id")
|
||||||
return jsonify({"status": "Scraping is not running"})
|
if not faction_id:
|
||||||
|
return jsonify({"status": "No active scraping session"})
|
||||||
|
|
||||||
|
stop_scraping_task.delay(faction_id)
|
||||||
|
return jsonify({"status": "Stopping scraping"})
|
||||||
|
|
||||||
scraper.stop_scraping()
|
|
||||||
current_app.config['SCRAPING_ACTIVE'] = False
|
|
||||||
current_app.logger.debug("Scraping stopped by user")
|
|
||||||
return jsonify({"status": "Scraping stopped"})
|
|
||||||
@app.route('/logfile', methods=['GET'])
|
@app.route('/logfile', methods=['GET'])
|
||||||
def logfile():
|
def logfile():
|
||||||
log_file_name = current_app.logger.handlers[0].baseFilename
|
log_file_name = current_app.logger.handlers[0].baseFilename
|
||||||
@@ -196,20 +194,37 @@ def register_api(app):
|
|||||||
|
|
||||||
@app.route('/scraping_status', methods=['GET'])
|
@app.route('/scraping_status', methods=['GET'])
|
||||||
def scraping_status():
|
def scraping_status():
|
||||||
if scraper is None:
|
redis_client = get_redis()
|
||||||
current_app.logger.debug("Scraper is not initialized.")
|
current_faction_id = redis_client.get("current_faction_id")
|
||||||
|
|
||||||
|
if not current_faction_id:
|
||||||
return jsonify({"scraping_active": False})
|
return jsonify({"scraping_active": False})
|
||||||
|
|
||||||
if scraper.scraping_active:
|
scraping_active = redis_client.hget(f"scraper:{current_faction_id}", "scraping_active")
|
||||||
current_app.logger.debug("Scraping is active.")
|
|
||||||
return jsonify({"scraping_active": True})
|
# If we have a faction_id but scraping is not active, clean up the stale state
|
||||||
else:
|
if not scraping_active or scraping_active == "0":
|
||||||
current_app.logger.debug("Scraping is not active.")
|
redis_client.delete("current_faction_id")
|
||||||
return jsonify({"scraping_active": False})
|
return jsonify({"scraping_active": False})
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"scraping_active": True,
|
||||||
|
"faction_id": current_faction_id
|
||||||
|
})
|
||||||
|
|
||||||
@app.route('/scraping_get_end_time')
|
@app.route('/scraping_get_end_time')
|
||||||
def scraping_get_end_time():
|
def scraping_get_end_time():
|
||||||
if scraper is None:
|
redis_client = get_redis()
|
||||||
current_app.logger.debug("Scraper is not initialized.")
|
current_faction_id = redis_client.get("current_faction_id")
|
||||||
|
|
||||||
|
if not current_faction_id:
|
||||||
return jsonify({"scraping_active": False})
|
return jsonify({"scraping_active": False})
|
||||||
return jsonify({"end_time": scraper.end_time})
|
|
||||||
|
end_time = redis_client.hget(f"scraper:{current_faction_id}", "end_time")
|
||||||
|
if not end_time:
|
||||||
|
return jsonify({"scraping_active": False})
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"end_time": end_time,
|
||||||
|
"faction_id": current_faction_id
|
||||||
|
})
|
||||||
|
|||||||
@@ -4,4 +4,12 @@ from datetime import datetime
|
|||||||
def register_filters(app):
|
def register_filters(app):
|
||||||
@app.template_filter('datetimeformat')
|
@app.template_filter('datetimeformat')
|
||||||
def datetimeformat(value):
|
def datetimeformat(value):
|
||||||
return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
|
"""Convert datetime or timestamp to formatted string"""
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
dt = value
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
dt = datetime.fromtimestamp(float(value))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return str(value)
|
||||||
|
return dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||||
@@ -5,21 +5,60 @@ import os
|
|||||||
import time
|
import time
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from requests.exceptions import ConnectionError, Timeout, RequestException
|
from requests.exceptions import ConnectionError, Timeout, RequestException
|
||||||
|
import redis
|
||||||
|
import threading
|
||||||
|
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
|
|
||||||
class Scraper:
|
class Scraper:
|
||||||
def __init__(self, faction_id, fetch_interval, run_interval, app):
|
_instances = {} # Track all instances by faction_id
|
||||||
|
_lock = threading.Lock()
|
||||||
|
|
||||||
|
def __new__(cls, faction_id, *args, **kwargs):
|
||||||
|
with cls._lock:
|
||||||
|
# Stop any existing instance for this faction
|
||||||
|
if faction_id in cls._instances:
|
||||||
|
old_instance = cls._instances[faction_id]
|
||||||
|
old_instance.stop_scraping()
|
||||||
|
|
||||||
|
instance = super().__new__(cls)
|
||||||
|
cls._instances[faction_id] = instance
|
||||||
|
return instance
|
||||||
|
|
||||||
|
def __init__(self, faction_id, fetch_interval, run_interval, config):
|
||||||
|
# Only initialize if not already initialized
|
||||||
|
if not hasattr(self, 'faction_id'):
|
||||||
|
self.redis_client = redis.StrictRedis(
|
||||||
|
host='localhost', port=6379, db=0, decode_responses=True
|
||||||
|
)
|
||||||
self.faction_id = faction_id
|
self.faction_id = faction_id
|
||||||
self.fetch_interval = fetch_interval
|
self.fetch_interval = fetch_interval
|
||||||
self.run_interval = run_interval
|
self.run_interval = run_interval
|
||||||
self.end_time = datetime.now() + timedelta(days=run_interval)
|
self.API_KEY = config['DEFAULT']['API_KEY']
|
||||||
self.data_file_name = os.path.join(app.config['DATA']['DATA_DIR'], f"{self.faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv")
|
self.data_file_name = os.path.join(
|
||||||
self.scraping_active = False
|
config['DATA']['DATA_DIR'],
|
||||||
self.API_KEY = app.config['DEFAULT']['API_KEY']
|
f"{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
|
||||||
self.logger = app.logger
|
)
|
||||||
|
self.end_time = datetime.now() + timedelta(days=int(run_interval))
|
||||||
|
|
||||||
print(self.data_file_name)
|
# Store scraper state in Redis
|
||||||
|
self.redis_client.hmset(f"scraper:{faction_id}", {
|
||||||
|
"faction_id": faction_id,
|
||||||
|
"fetch_interval": fetch_interval,
|
||||||
|
"run_interval": run_interval,
|
||||||
|
"end_time": self.end_time.isoformat(),
|
||||||
|
"data_file_name": self.data_file_name,
|
||||||
|
"scraping_active": "0",
|
||||||
|
"api_key": self.API_KEY
|
||||||
|
})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def scraping_active(self):
|
||||||
|
return bool(int(self.redis_client.hget(f"scraper:{self.faction_id}", "scraping_active")))
|
||||||
|
|
||||||
|
@scraping_active.setter
|
||||||
|
def scraping_active(self, value):
|
||||||
|
self.redis_client.hset(f"scraper:{self.faction_id}", "scraping_active", "1" if value else "0")
|
||||||
|
|
||||||
def fetch_faction_data(self):
|
def fetch_faction_data(self):
|
||||||
url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={self.API_KEY}"
|
url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={self.API_KEY}"
|
||||||
@@ -48,16 +87,14 @@ class Scraper:
|
|||||||
time.sleep(2 ** attempt) # Exponential backoff
|
time.sleep(2 ** attempt) # Exponential backoff
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def start_scraping(self, app) -> None:
|
def start_scraping(self) -> None:
|
||||||
"""Starts the scraping process until the end time is reached or stopped manually."""
|
"""Starts the scraping process until the end time is reached or stopped manually."""
|
||||||
self.scraping_active = True
|
self.scraping_active = True
|
||||||
|
|
||||||
# Anwendungskontext explizit setzen
|
|
||||||
with app.app_context():
|
|
||||||
current_app.logger.info(f"Starting scraping for faction ID {self.faction_id}")
|
current_app.logger.info(f"Starting scraping for faction ID {self.faction_id}")
|
||||||
current_app.logger.debug(f"Fetch interval: {self.fetch_interval}s, Run interval: {self.run_interval} days, End time: {self.end_time}")
|
current_app.logger.debug(f"Fetch interval: {self.fetch_interval}s, Run interval: {self.run_interval} days, End time: {self.end_time}")
|
||||||
|
|
||||||
MAX_FAILURES = 5 # Stop after 5 consecutive failures
|
MAX_FAILURES = 5
|
||||||
failure_count = 0
|
failure_count = 0
|
||||||
|
|
||||||
while datetime.now() < self.end_time and self.scraping_active:
|
while datetime.now() < self.end_time and self.scraping_active:
|
||||||
@@ -122,6 +159,18 @@ class Scraper:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
current_app.logger.error(f"Error saving data to {self.data_file_name}: {e}")
|
current_app.logger.error(f"Error saving data to {self.data_file_name}: {e}")
|
||||||
|
|
||||||
|
def cleanup_redis_state(self):
|
||||||
|
"""Clean up all Redis state for this scraper instance"""
|
||||||
|
if hasattr(self, 'faction_id'):
|
||||||
|
self.redis_client.delete(f"scraper:{self.faction_id}")
|
||||||
|
current_id = self.redis_client.get("current_faction_id")
|
||||||
|
if current_id and current_id == str(self.faction_id):
|
||||||
|
self.redis_client.delete("current_faction_id")
|
||||||
|
# Remove from instances tracking
|
||||||
|
with self._lock:
|
||||||
|
if self.faction_id in self._instances:
|
||||||
|
del self._instances[self.faction_id]
|
||||||
|
|
||||||
def handle_scraping_end(self) -> None:
|
def handle_scraping_end(self) -> None:
|
||||||
"""Handles cleanup and logging when scraping ends."""
|
"""Handles cleanup and logging when scraping ends."""
|
||||||
if not self.scraping_active:
|
if not self.scraping_active:
|
||||||
@@ -133,7 +182,13 @@ class Scraper:
|
|||||||
|
|
||||||
current_app.logger.info("Scraping completed.")
|
current_app.logger.info("Scraping completed.")
|
||||||
self.scraping_active = False
|
self.scraping_active = False
|
||||||
|
self.cleanup_redis_state()
|
||||||
|
|
||||||
def stop_scraping(self):
|
def stop_scraping(self):
|
||||||
self.scraping_active = False
|
self.scraping_active = False
|
||||||
current_app.logger.debug("Scraping stopped by user")
|
self.cleanup_redis_state()
|
||||||
|
current_app.logger.debug(f"Scraping stopped for faction {self.faction_id}")
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
"""Ensure Redis cleanup on object destruction"""
|
||||||
|
self.cleanup_redis_state()
|
||||||
@@ -12,6 +12,7 @@ export class ScraperUtils {
|
|||||||
|
|
||||||
this.serverTime = null;
|
this.serverTime = null;
|
||||||
this.endTime = null;
|
this.endTime = null;
|
||||||
|
this.pollInterval = null; // Add this line
|
||||||
|
|
||||||
this.init();
|
this.init();
|
||||||
}
|
}
|
||||||
@@ -20,7 +21,6 @@ export class ScraperUtils {
|
|||||||
this.showLoadingIndicator();
|
this.showLoadingIndicator();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Ensure each function runs only once
|
|
||||||
await Promise.all([
|
await Promise.all([
|
||||||
this.updateServerTime(),
|
this.updateServerTime(),
|
||||||
this.checkScrapingStatus()
|
this.checkScrapingStatus()
|
||||||
@@ -29,8 +29,12 @@ export class ScraperUtils {
|
|||||||
console.error("Error during initialization:", error);
|
console.error("Error during initialization:", error);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure end time is fetched only if scraping is active
|
// Start polling for status updates
|
||||||
if (this.endTime === null) {
|
this.startPolling();
|
||||||
|
|
||||||
|
// Only start the clock and wait for end time if scraping is active
|
||||||
|
if (this.activityIndicator.textContent === 'Active') {
|
||||||
|
if (!this.endTime) {
|
||||||
try {
|
try {
|
||||||
await this.fetchEndTime();
|
await this.fetchEndTime();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -38,22 +42,28 @@ export class ScraperUtils {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure UI is only updated once everything is ready
|
|
||||||
if (this.serverTime && this.endTime) {
|
if (this.serverTime && this.endTime) {
|
||||||
this.startClock();
|
this.startClock();
|
||||||
this.hideLoadingIndicator();
|
|
||||||
} else {
|
|
||||||
console.warn("Delaying hiding the loading indicator due to missing data...");
|
|
||||||
const checkDataInterval = setInterval(() => {
|
|
||||||
if (this.serverTime && this.endTime) {
|
|
||||||
clearInterval(checkDataInterval);
|
|
||||||
this.startClock();
|
|
||||||
this.hideLoadingIndicator();
|
|
||||||
}
|
|
||||||
}, 500);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Hide loading indicator regardless of scraping status
|
||||||
|
this.hideLoadingIndicator();
|
||||||
|
}
|
||||||
|
|
||||||
|
startPolling() {
|
||||||
|
// Poll every 2 seconds
|
||||||
|
this.pollInterval = setInterval(async () => {
|
||||||
|
await this.checkScrapingStatus();
|
||||||
|
}, 2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
stopPolling() {
|
||||||
|
if (this.pollInterval) {
|
||||||
|
clearInterval(this.pollInterval);
|
||||||
|
this.pollInterval = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
showLoadingIndicator() {
|
showLoadingIndicator() {
|
||||||
this.statusContainer.classList.remove('d-none');
|
this.statusContainer.classList.remove('d-none');
|
||||||
@@ -79,9 +89,7 @@ export class ScraperUtils {
|
|||||||
this.activityIndicator.classList.add('text-bg-success');
|
this.activityIndicator.classList.add('text-bg-success');
|
||||||
this.activityIndicator.textContent = 'Active';
|
this.activityIndicator.textContent = 'Active';
|
||||||
|
|
||||||
console.log(`Scraping is active until ${data.end_time} TCT`);
|
// Fetch end time if we don't have it yet
|
||||||
|
|
||||||
// Only call fetchEndTime() if endTime is not already set
|
|
||||||
if (!this.endTime) {
|
if (!this.endTime) {
|
||||||
await this.fetchEndTime();
|
await this.fetchEndTime();
|
||||||
}
|
}
|
||||||
@@ -98,6 +106,9 @@ export class ScraperUtils {
|
|||||||
|
|
||||||
this.endTimeElement.classList.add('d-none');
|
this.endTimeElement.classList.add('d-none');
|
||||||
this.timeLeftElement.classList.add('d-none');
|
this.timeLeftElement.classList.add('d-none');
|
||||||
|
|
||||||
|
// Reset end time when inactive
|
||||||
|
this.endTime = null;
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error checking scraping status:', error);
|
console.error('Error checking scraping status:', error);
|
||||||
@@ -177,4 +188,16 @@ export class ScraperUtils {
|
|||||||
const seconds = totalSeconds % 60;
|
const seconds = totalSeconds % 60;
|
||||||
return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
|
return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add cleanup method
|
||||||
|
cleanup() {
|
||||||
|
this.stopPolling();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add event listener for page unload
|
||||||
|
window.addEventListener('unload', () => {
|
||||||
|
if (window.scraperUtils) {
|
||||||
|
window.scraperUtils.cleanup();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|||||||
93
app/tasks.py
Normal file
93
app/tasks.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
from celery import Celery
|
||||||
|
from app.models import Scraper
|
||||||
|
import redis
|
||||||
|
from datetime import timedelta
|
||||||
|
from flask import current_app
|
||||||
|
|
||||||
|
def create_celery():
|
||||||
|
celery = Celery('tasks', broker='redis://localhost:6379/0')
|
||||||
|
celery.conf.update(
|
||||||
|
task_serializer='json',
|
||||||
|
accept_content=['json'],
|
||||||
|
result_serializer='json',
|
||||||
|
timezone='UTC'
|
||||||
|
)
|
||||||
|
return celery
|
||||||
|
|
||||||
|
def init_celery(app):
|
||||||
|
"""Initialize Celery with Flask app context"""
|
||||||
|
celery = create_celery()
|
||||||
|
celery.conf.update(app.config)
|
||||||
|
|
||||||
|
class ContextTask(celery.Task):
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
with app.app_context():
|
||||||
|
return self.run(*args, **kwargs)
|
||||||
|
|
||||||
|
celery.Task = ContextTask
|
||||||
|
return celery
|
||||||
|
|
||||||
|
celery = create_celery() # This will be initialized properly in app/__init__.py
|
||||||
|
|
||||||
|
def get_redis():
|
||||||
|
return redis.StrictRedis(
|
||||||
|
host='localhost',
|
||||||
|
port=6379,
|
||||||
|
db=0,
|
||||||
|
decode_responses=True
|
||||||
|
)
|
||||||
|
|
||||||
|
@celery.task
|
||||||
|
def start_scraping_task(faction_id, fetch_interval, run_interval, config_dict):
|
||||||
|
"""
|
||||||
|
Start scraping task with serializable parameters
|
||||||
|
Args:
|
||||||
|
faction_id: ID of the faction to scrape
|
||||||
|
fetch_interval: Interval between fetches in seconds
|
||||||
|
run_interval: How long to run the scraper in days
|
||||||
|
config_dict: Dictionary containing configuration
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
redis_client = get_redis()
|
||||||
|
# Set current faction ID at task start
|
||||||
|
redis_client.set("current_faction_id", str(faction_id))
|
||||||
|
|
||||||
|
scraper = Scraper(
|
||||||
|
faction_id=faction_id,
|
||||||
|
fetch_interval=int(fetch_interval),
|
||||||
|
run_interval=int(run_interval),
|
||||||
|
config=config_dict
|
||||||
|
)
|
||||||
|
scraper.start_scraping()
|
||||||
|
return {"status": "success"}
|
||||||
|
except Exception as e:
|
||||||
|
# Clean up Redis state on error
|
||||||
|
redis_client = get_redis()
|
||||||
|
redis_client.delete("current_faction_id")
|
||||||
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
|
@celery.task
|
||||||
|
def stop_scraping_task(faction_id):
|
||||||
|
"""Stop scraping task and clean up Redis state"""
|
||||||
|
try:
|
||||||
|
redis_client = get_redis()
|
||||||
|
|
||||||
|
# Clean up Redis state
|
||||||
|
redis_client.hset(f"scraper:{faction_id}", "scraping_active", "0")
|
||||||
|
redis_client.delete(f"scraper:{faction_id}")
|
||||||
|
|
||||||
|
# Clean up current_faction_id if it matches
|
||||||
|
current_id = redis_client.get("current_faction_id")
|
||||||
|
if current_id and current_id == str(faction_id):
|
||||||
|
redis_client.delete("current_faction_id")
|
||||||
|
|
||||||
|
# Revoke any running tasks for this faction
|
||||||
|
celery.control.revoke(
|
||||||
|
celery.current_task.request.id,
|
||||||
|
terminate=True,
|
||||||
|
signal='SIGTERM'
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"status": "success", "message": f"Stopped scraping for faction {faction_id}"}
|
||||||
|
except Exception as e:
|
||||||
|
return {"status": "error", "message": str(e)}
|
||||||
86
app/views.py
86
app/views.py
@@ -2,6 +2,8 @@ import os
|
|||||||
import glob
|
import glob
|
||||||
from flask import render_template, Blueprint, current_app, request
|
from flask import render_template, Blueprint, current_app, request
|
||||||
|
|
||||||
|
from app.tasks import get_redis
|
||||||
|
|
||||||
from app.forms import ScrapingForm
|
from app.forms import ScrapingForm
|
||||||
from app.util import get_size
|
from app.util import get_size
|
||||||
from app.config import load_config
|
from app.config import load_config
|
||||||
@@ -13,6 +15,14 @@ from datetime import datetime
|
|||||||
|
|
||||||
views_bp = Blueprint("views", __name__)
|
views_bp = Blueprint("views", __name__)
|
||||||
|
|
||||||
|
def sizeof_fmt(num, suffix="B"):
|
||||||
|
"""Convert bytes to human readable format"""
|
||||||
|
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
|
||||||
|
if abs(num) < 1024.0:
|
||||||
|
return f"{num:3.1f} {unit}{suffix}"
|
||||||
|
num /= 1024.0
|
||||||
|
return f"{num:.1f} Yi{suffix}"
|
||||||
|
|
||||||
def register_views(app):
|
def register_views(app):
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
def index():
|
def index():
|
||||||
@@ -29,49 +39,59 @@ def register_views(app):
|
|||||||
|
|
||||||
@app.route('/download_results')
|
@app.route('/download_results')
|
||||||
def download_results():
|
def download_results():
|
||||||
log_file_name = os.path.abspath(app.config['LOG_FILE_NAME'])
|
# Get the current active log file and data file from Redis and app config
|
||||||
scraper = app.config.get('SCRAPER')
|
redis_client = get_redis()
|
||||||
|
current_faction_id = redis_client.get("current_faction_id")
|
||||||
|
|
||||||
if scraper:
|
active_data_file = None
|
||||||
print(scraper.data_file_name)
|
if current_faction_id:
|
||||||
if not scraper:
|
active_data_file = redis_client.hget(f"scraper:{current_faction_id}", "data_file_name")
|
||||||
print("Scraper not initialized")
|
|
||||||
|
|
||||||
data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
|
active_log_file = app.config['LOG_FILE_NAME']
|
||||||
log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
|
|
||||||
|
|
||||||
data_files = glob.glob(os.path.join(data_dir, "*.csv"))
|
def get_file_info(file_path, file_type='data'):
|
||||||
log_files = glob.glob(os.path.join(log_dir, "*.log"))
|
stats = os.stat(file_path)
|
||||||
|
name = os.path.basename(file_path)
|
||||||
|
|
||||||
|
# Determine if file is active
|
||||||
|
is_active = False
|
||||||
|
if file_type == 'data' and active_data_file:
|
||||||
|
is_active = os.path.abspath(file_path) == os.path.abspath(active_data_file)
|
||||||
|
elif file_type == 'log' and active_log_file:
|
||||||
|
is_active = os.path.basename(file_path) == os.path.basename(active_log_file)
|
||||||
|
|
||||||
def get_file_info(file_path):
|
|
||||||
return {
|
return {
|
||||||
"name": file_path,
|
'name': file_path, # Full path for internal use
|
||||||
"name_display": os.path.basename(file_path),
|
'name_display': name, # Just filename for display
|
||||||
"last_modified": os.path.getmtime(file_path),
|
'last_modified': stats.st_mtime, # Send timestamp instead of datetime
|
||||||
"created": os.path.getctime(file_path),
|
'created': stats.st_ctime, # Send timestamp instead of datetime
|
||||||
"size": get_size(file_path)
|
'size': sizeof_fmt(stats.st_size),
|
||||||
|
'active': is_active
|
||||||
}
|
}
|
||||||
|
|
||||||
data_files_info = [get_file_info(file) for file in data_files]
|
data_files = []
|
||||||
log_files_info = [get_file_info(file) for file in log_files]
|
log_files = []
|
||||||
|
|
||||||
if scraper and scraper.scraping_active:
|
# Get data files
|
||||||
for data_file in data_files_info:
|
data_dir = os.path.abspath(app.config['DATA']['DATA_DIR'])
|
||||||
if os.path.abspath(scraper.data_file_name) == data_file['name']:
|
if os.path.exists(data_dir):
|
||||||
data_file['active'] = True
|
for file in glob.glob(os.path.join(data_dir, "*.csv")):
|
||||||
else:
|
data_files.append(get_file_info(file, 'data'))
|
||||||
data_file['active'] = False
|
|
||||||
|
|
||||||
for log_file in log_files_info:
|
# Get log files
|
||||||
if log_file_name == os.path.abspath(log_file['name']):
|
log_dir = os.path.abspath(app.config['LOGGING']['LOG_DIR'])
|
||||||
log_file['active'] = True
|
if os.path.exists(log_dir):
|
||||||
else:
|
for file in glob.glob(os.path.join(log_dir, "*.log")):
|
||||||
log_file['active'] = False
|
log_files.append(get_file_info(file, 'log'))
|
||||||
|
|
||||||
data_files_info.sort(key=lambda x: x['last_modified'], reverse=True)
|
# Sort files by modification time, newest first
|
||||||
log_files_info.sort(key=lambda x: x['last_modified'], reverse=True)
|
data_files.sort(key=lambda x: x['last_modified'], reverse=True)
|
||||||
|
log_files.sort(key=lambda x: x['last_modified'], reverse=True)
|
||||||
|
|
||||||
files = {"data": data_files_info, "log": log_files_info}
|
files = {
|
||||||
|
'data': data_files,
|
||||||
|
'log': log_files
|
||||||
|
}
|
||||||
|
|
||||||
return render_template('download_results.html', files=files)
|
return render_template('download_results.html', files=files)
|
||||||
|
|
||||||
@@ -120,7 +140,7 @@ def register_views(app):
|
|||||||
|
|
||||||
@views_bp.route('/server_time')
|
@views_bp.route('/server_time')
|
||||||
def server_time():
|
def server_time():
|
||||||
current_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
|
current_time = datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
return {'server_time': current_time}
|
return {'server_time': current_time}
|
||||||
|
|
||||||
app.register_blueprint(views_bp)
|
app.register_blueprint(views_bp)
|
||||||
|
|||||||
@@ -9,3 +9,5 @@ seaborn
|
|||||||
configparser
|
configparser
|
||||||
plotly
|
plotly
|
||||||
configobj
|
configobj
|
||||||
|
redis
|
||||||
|
celery
|
||||||
@@ -4,16 +4,33 @@
|
|||||||
#
|
#
|
||||||
# pip-compile requirements.in
|
# pip-compile requirements.in
|
||||||
#
|
#
|
||||||
|
amqp==5.3.1
|
||||||
|
# via kombu
|
||||||
|
billiard==4.2.1
|
||||||
|
# via celery
|
||||||
blinker==1.9.0
|
blinker==1.9.0
|
||||||
# via flask
|
# via flask
|
||||||
bootstrap-flask==2.4.1
|
bootstrap-flask==2.4.1
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
|
celery==5.4.0
|
||||||
|
# via -r requirements.in
|
||||||
certifi==2025.1.31
|
certifi==2025.1.31
|
||||||
# via requests
|
# via requests
|
||||||
charset-normalizer==3.4.1
|
charset-normalizer==3.4.1
|
||||||
# via requests
|
# via requests
|
||||||
click==8.1.8
|
click==8.1.8
|
||||||
# via flask
|
# via
|
||||||
|
# celery
|
||||||
|
# click-didyoumean
|
||||||
|
# click-plugins
|
||||||
|
# click-repl
|
||||||
|
# flask
|
||||||
|
click-didyoumean==0.3.1
|
||||||
|
# via celery
|
||||||
|
click-plugins==1.1.1
|
||||||
|
# via celery
|
||||||
|
click-repl==0.3.0
|
||||||
|
# via celery
|
||||||
configobj==5.0.9
|
configobj==5.0.9
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
configparser==7.1.0
|
configparser==7.1.0
|
||||||
@@ -41,6 +58,8 @@ jinja2==3.1.5
|
|||||||
# via flask
|
# via flask
|
||||||
kiwisolver==1.4.8
|
kiwisolver==1.4.8
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
|
kombu==5.4.2
|
||||||
|
# via celery
|
||||||
markupsafe==3.0.2
|
markupsafe==3.0.2
|
||||||
# via
|
# via
|
||||||
# jinja2
|
# jinja2
|
||||||
@@ -50,9 +69,9 @@ matplotlib==3.10.0
|
|||||||
# via
|
# via
|
||||||
# -r requirements.in
|
# -r requirements.in
|
||||||
# seaborn
|
# seaborn
|
||||||
narwhals==1.26.0
|
narwhals==1.27.1
|
||||||
# via plotly
|
# via plotly
|
||||||
numpy==2.2.2
|
numpy==2.2.3
|
||||||
# via
|
# via
|
||||||
# contourpy
|
# contourpy
|
||||||
# matplotlib
|
# matplotlib
|
||||||
@@ -70,14 +89,19 @@ pillow==11.1.0
|
|||||||
# via matplotlib
|
# via matplotlib
|
||||||
plotly==6.0.0
|
plotly==6.0.0
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
|
prompt-toolkit==3.0.50
|
||||||
|
# via click-repl
|
||||||
pyparsing==3.2.1
|
pyparsing==3.2.1
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
python-dateutil==2.9.0.post0
|
python-dateutil==2.9.0.post0
|
||||||
# via
|
# via
|
||||||
|
# celery
|
||||||
# matplotlib
|
# matplotlib
|
||||||
# pandas
|
# pandas
|
||||||
pytz==2025.1
|
pytz==2025.1
|
||||||
# via pandas
|
# via pandas
|
||||||
|
redis==5.2.1
|
||||||
|
# via -r requirements.in
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
seaborn==0.13.2
|
seaborn==0.13.2
|
||||||
@@ -85,9 +109,19 @@ seaborn==0.13.2
|
|||||||
six==1.17.0
|
six==1.17.0
|
||||||
# via python-dateutil
|
# via python-dateutil
|
||||||
tzdata==2025.1
|
tzdata==2025.1
|
||||||
# via pandas
|
# via
|
||||||
|
# celery
|
||||||
|
# kombu
|
||||||
|
# pandas
|
||||||
urllib3==2.3.0
|
urllib3==2.3.0
|
||||||
# via requests
|
# via requests
|
||||||
|
vine==5.1.0
|
||||||
|
# via
|
||||||
|
# amqp
|
||||||
|
# celery
|
||||||
|
# kombu
|
||||||
|
wcwidth==0.2.13
|
||||||
|
# via prompt-toolkit
|
||||||
werkzeug==3.1.3
|
werkzeug==3.1.3
|
||||||
# via flask
|
# via flask
|
||||||
wtforms==3.2.1
|
wtforms==3.2.1
|
||||||
|
|||||||
5
run.py
5
run.py
@@ -1,5 +1,6 @@
|
|||||||
from app import create_app
|
from app import create_app
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
app = create_app()
|
app = create_app()
|
||||||
app.run(debug=True, threaded=True)
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True)
|
||||||
50
stop_scraping.py
Normal file
50
stop_scraping.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import redis
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
def get_redis():
|
||||||
|
return redis.StrictRedis(
|
||||||
|
host='localhost',
|
||||||
|
port=6379,
|
||||||
|
db=0,
|
||||||
|
decode_responses=True
|
||||||
|
)
|
||||||
|
|
||||||
|
def stop_scraping(flush=False, force=False):
|
||||||
|
redis_client = get_redis()
|
||||||
|
|
||||||
|
if flush:
|
||||||
|
redis_client.flushall()
|
||||||
|
print("Flushed all Redis data")
|
||||||
|
return True
|
||||||
|
|
||||||
|
current_faction_id = redis_client.get("current_faction_id")
|
||||||
|
|
||||||
|
if not current_faction_id:
|
||||||
|
print("No active scraping session found.")
|
||||||
|
return False if not force else True
|
||||||
|
|
||||||
|
redis_client.hset(f"scraper:{current_faction_id}", "scraping_active", "0")
|
||||||
|
print(f"Sent stop signal to scraping process for faction {current_faction_id}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='Stop the Torn Activity Tracker scraping process.')
|
||||||
|
parser.add_argument('--force', action='store_true', help='Force stop even if no active session is found')
|
||||||
|
parser.add_argument('--flush', action='store_true', help='Flush all Redis data (WARNING: This will clear ALL Redis data)')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.flush:
|
||||||
|
if input("WARNING: This will delete ALL Redis data. Are you sure? (y/N) ").lower() != 'y':
|
||||||
|
print("Operation cancelled.")
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
success = stop_scraping(flush=args.flush, force=args.force)
|
||||||
|
|
||||||
|
if not success and args.force:
|
||||||
|
print("Forcing stop for all potential scraping processes...")
|
||||||
|
redis_client = get_redis()
|
||||||
|
# Get all scraper keys
|
||||||
|
for key in redis_client.keys("scraper:*"):
|
||||||
|
redis_client.hset(key, "scraping_active", "0")
|
||||||
|
print("Sent stop signal to all potential scraping processes.")
|
||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user