full refactor fml

This commit is contained in:
Michael Beck
2025-02-07 04:13:17 +01:00
parent ceebbafed5
commit 9076c5ed7a
11 changed files with 561 additions and 461 deletions

210
app/api.py Normal file
View File

@@ -0,0 +1,210 @@
# filepath: /home/michaelb/Dokumente/TornActivityTracker/app/api.py
from flask import jsonify, request, Response, send_from_directory, current_app
import threading
import os
import glob
from datetime import datetime
import pandas as pd
from app.models import Scraper, generate_statistics
from app.util import create_zip, delete_old_zips, tail, get_size
from app.config import load_config
from app.logging_config import get_logger
from app.forms import ScrapingForm
config = load_config()
logger = get_logger()
log_file_name = logger.handlers[0].baseFilename
scraping_thread = None
scraper = None
scrape_lock = threading.Lock()
def register_api(app):
@app.route('/start_scraping', methods=['POST'])
def start_scraping():
with scrape_lock:
scraper = current_app.config.get('SCRAPER')
if scraper is not None and scraper.scraping_active:
logger.warning("Can't start scraping process: scraping already in progress")
return jsonify({"status": "Scraping already in progress"})
form = ScrapingForm()
if form.validate_on_submit():
faction_id = form.faction_id.data
fetch_interval = form.fetch_interval.data
run_interval = form.run_interval.data
scraper = Scraper(faction_id, fetch_interval, run_interval, current_app)
scraper.scraping_active = True
scraping_thread = threading.Thread(target=scraper.start_scraping)
scraping_thread.daemon = True
scraping_thread.start()
current_app.config['SCRAPER'] = scraper
current_app.config['SCRAPING_THREAD'] = scraping_thread
return jsonify({"status": "Scraping started"})
return jsonify({"status": "Invalid form data"})
@app.route('/stop_scraping', methods=['POST'])
def stop_scraping():
scraper = current_app.config.get('SCRAPER')
if scraper is None or not scraper.scraping_active:
return jsonify({"status": "Scraping is not running"})
scraper.stop_scraping()
current_app.config['SCRAPING_ACTIVE'] = False
logger.debug("Scraping stopped by user")
return jsonify({"status": "Scraping stopped"})
@app.route('/logfile', methods=['GET'])
def logfile():
page = int(request.args.get('page', 0)) # Page number
lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page
log_file_path = log_file_name # Path to the current log file
if not os.path.isfile(log_file_path):
logger.error("Log file not found")
return jsonify({"error": "Log file not found"}), 404
log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES']))
log_lines = log_lines[::-1] # Reverse the list
start = page * lines_per_page
end = start + lines_per_page
paginated_lines = log_lines[start:end] if start < len(log_lines) else []
return jsonify({
"log": paginated_lines,
"total_lines": len(log_lines),
"pages": (len(log_lines) + lines_per_page - 1) // lines_per_page,
"start_line": len(log_lines) - start
})
@app.route('/download_files', methods=['POST'])
def download_files():
delete_old_zips() # Clean up old zip files
file_paths = request.json.get('file_paths')
if not file_paths:
return jsonify({"error": "No files specified"}), 400
# Get the absolute path of the parent directory
parent_dir = os.path.abspath(os.path.join(app.root_path, os.pardir))
# Validate and correct file paths
valid_file_paths = []
for file_path in file_paths:
if file_path.startswith('/data/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(parent_dir, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
elif file_path.startswith('/log/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(parent_dir, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
if not valid_file_paths:
return jsonify({"error": "No valid files specified"}), 400
# Create a unique zip file name
zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip"
zip_path = create_zip(valid_file_paths, zip_name)
# Log the directory and file path for debugging
current_app.logger.debug(f"Sending file from directory: temp, file: {zip_name}")
return download_tmp_file(zip_name)
@app.route('/delete_files', methods=['POST'])
def delete_files():
file_paths = request.json.get('file_paths', [])
if not file_paths:
return jsonify({"error": "No files specified"}), 400
errors = []
data_dir = os.path.abspath(config['DATA']['DATA_DIR'])
log_dir = os.path.abspath(config['LOGGING']['LOG_DIR'])
for file_path in file_paths:
if file_path.startswith('/data/'):
full_file_path = os.path.join(data_dir, file_path.lstrip('/data/'))
elif file_path.startswith('/log/'):
full_file_path = os.path.join(log_dir, file_path.lstrip('/log/'))
else:
errors.append({"file": file_path, "error": "File not in allowed directory"})
continue
# Check if the file is in either the logs or the data files folder
#if not (full_file_path.startswith(data_dir) or full_file_path.startswith(log_dir)):
# errors.append({"file": file_path, "error": "File not in allowed directory"})
# continue
# Check if it's the currently active log file
if full_file_path == log_file_name:
errors.append({"file": file_path, "error": "Cannot delete active log file."})
continue
# Check if it's an active data file
if scraper and scraper.data_file_name == full_file_path:
errors.append({"file": file_path, "error": "Cannot delete active data file."})
continue
if not os.path.isfile(full_file_path):
errors.append({"file": file_path, "error": "File not found"})
continue
try:
os.remove(full_file_path)
except Exception as e:
errors.append({"file": file_path, "error": str(e)})
if errors:
return jsonify({"errors": errors}), 207 # Multi-Status response
return jsonify({"success": True}), 200
@app.route('/data/<path:filename>')
def download_data_file(filename):
data_dir = os.path.abspath(config['DATA']['DATA_DIR'])
file_path = os.path.join(data_dir, filename)
return send_from_directory(directory=data_dir, path=filename, as_attachment=True)
@app.route('/log/<path:filename>')
def download_log_file(filename):
log_dir = os.path.abspath(config['LOGGING']['LOG_DIR'])
file_path = os.path.join(log_dir, filename)
return send_from_directory(directory=log_dir, path=filename, as_attachment=True)
@app.route('/tmp/<path:filename>')
def download_tmp_file(filename):
tmp_dir = os.path.abspath(config['TEMP']['TEMP_DIR'])
file_path = os.path.join(tmp_dir, filename)
return send_from_directory(directory=tmp_dir, path=filename, as_attachment=True)
@app.route('/config/lines_per_page')
def get_lines_per_page():
lines_per_page = config['LOGGING']['VIEW_PAGE_LINES']
return jsonify({"lines_per_page": lines_per_page})
@app.route('/scraping_status', methods=['GET'])
def scraping_status():
if scraper is None:
logger.debug("Scraper is not initialized.")
return jsonify({"scraping_active": False})
if scraper.scraping_active:
logger.debug("Scraping is active.")
return jsonify({"scraping_active": True})
else:
logger.debug("Scraping is not active.")
return jsonify({"scraping_active": False})

View File

@@ -1,470 +1,41 @@
from flask import Flask, request, render_template, Response, jsonify, url_for
from flask_bootstrap import Bootstrap5 # from package boostrap_flask
from app.forms import ScrapingForm
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
import threading
import logging
from logging.handlers import QueueHandler
from queue import Queue
import os
import glob
from flask import Flask
from flask_bootstrap import Bootstrap5
from datetime import datetime
from flask import send_from_directory
import configparser
import zipfile
import os
from datetime import timedelta
from app.views import register_views
from app.api import register_api
from app.config import load_config
from app.filters import register_filters
def init_app():
config = load_config()
# Initialize app
app = Flask(__name__)
# Load configuration
config = configparser.ConfigParser()
config.read('config.ini')
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
API_KEY = config['DEFAULT']['API_KEY']
app.config['API_KEY'] = config['DEFAULT']['API_KEY']
app.config['DATA'] = config['DATA']
app.config['TEMP'] = config['TEMP']
app.config['LOGGING'] = config['LOGGING']
# Move bootstrap settings to root level
for key in config['BOOTSTRAP']:
app.config[key.upper()] = config['BOOTSTRAP'][key]
bootstrap = Bootstrap5(app)
# Move every setting from config['BOOTSTRAP'] to the root level of config
for key in config['BOOTSTRAP']:
key = key.upper()
app.config[key] = config['BOOTSTRAP'][key]
if key == 'SECRET_KEY':
continue
elif key == 'API_KEY':
continue
print(f"Loaded config: {key} = {app.config[key]}")
# Global state
scraping_active = False
scraping_thread = None
data_file_name = None
log_file_name = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
# Initialize the logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG) # Adjust as needed
# Make any logger.info() call go to both the log file and the queue.
# 1) FILE HANDLER
file_handler = logging.FileHandler(log_file_name, mode='w')
file_handler.setLevel(logging.DEBUG) # or INFO, WARNING, etc.
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# 2) QUEUE HANDLER
log_queue = Queue()
queue_handler = QueueHandler(log_queue)
queue_handler.setLevel(logging.DEBUG)
logger.addHandler(queue_handler)
def create_zip(file_paths, zip_name):
zip_path = os.path.join(app.root_path, 'temp', zip_name)
with zipfile.ZipFile(zip_path, 'w') as zipf:
for file_path in file_paths:
arcname = os.path.basename(file_path)
zipf.write(file_path, arcname)
return zip_path
def delete_old_zips():
temp_dir = os.path.join(app.root_path, 'temp')
now = datetime.now()
for filename in os.listdir(temp_dir):
if filename.endswith('.zip'):
file_path = os.path.join(temp_dir, filename)
file_time = datetime.fromtimestamp(os.path.getmtime(file_path))
if now - file_time > timedelta(hours=1):
os.remove(file_path)
logger.info(f"Deleted old zip file: {filename}")
def fetch_faction_data(faction_id):
url = f"https://api.torn.com/faction/{faction_id}?selections=&key={API_KEY}"
response = requests.get(url)
if response.status_code == 200:
logger.info(f"Fetched data for faction ID {faction_id}")
return response.json()
else:
logger.warning(f"Failed to fetch faction data for faction ID {faction_id}")
return None
def fetch_user_activity(user_id):
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
response = requests.get(url)
if response.status_code == 200:
return response.json()
else:
logger.error(f"Failed to fetch user activity for user ID {user_id}")
return None
def scrape_data(faction_id, fetch_interval, run_interval):
global scraping_active
global data_file_name
end_time = datetime.now() + timedelta(days=run_interval)
data_file_name = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
while datetime.now() < end_time and scraping_active:
logger.info(f"Fetching data at {datetime.now()}")
faction_data = fetch_faction_data(faction_id)
if faction_data and 'members' in faction_data:
user_activity_data = []
for user_id, user_info in faction_data['members'].items():
user_activity = fetch_user_activity(user_id)
if user_activity:
user_activity_data.append({
'user_id': user_id,
'name': user_activity.get('name', ''),
'last_action': user_activity.get('last_action', {}).get('timestamp', 0),
'stadata_file_nametus': user_activity.get('status', {}).get('state', ''),
'timestamp': datetime.now().timestamp()
})
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
# Append data to the file
df = pd.DataFrame(user_activity_data)
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
if not os.path.isfile(data_file_name):
df.to_csv(data_file_name, index=False)
else:
df.to_csv(data_file_name, mode='a', header=False, index=False)
logger.info(f"Data appended to {data_file_name}")
time.sleep(fetch_interval)
else:
if datetime.now() < end_time:
logger.warning(f"Scraping stopped at {datetime.now()}")
elif scraping_active == False:
logger.warning(f"Scraping stopped at {datetime.now()} due to user request")
else:
logger.error(f"Scraping stopped due to timeout at {datetime.now()}")
logger.info("Scraping completed.")
scraping_active = False
def generate_statistics(df):
df['hour'] = df['timestamp'].dt.hour
activity_by_hour = df.groupby('hour').size()
return activity_by_hour
# Taken from:
# https://gist.github.com/amitsaha/5990310?permalink_comment_id=3017951#gistcomment-3017951
def tail(filename, n):
stat = os.stat(filename)
n = int(n)
if stat.st_size == 0 or n == 0:
yield ''
return
page_size = int(config['LOGGING']['TAIL_PAGE_SIZE'])
offsets = []
count = _n = n if n >= 0 else -n
last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1
with open(filename, 'r') as f:
while count > 0:
starting_byte = last_byte_read - page_size
if last_byte_read == 0:
offsets.append(0)
break
elif starting_byte < 0:
f.seek(0)
text = f.read(last_byte_read)
else:
f.seek(starting_byte)
text = f.read(page_size)
for i in range(-1, -1*len(text)-1, -1):
last_byte_read -= 1
if text[i] == '\n':
last_nl_byte = last_byte_read
starting_offset = last_nl_byte + 1
offsets.append(starting_offset)
count -= 1
offsets = offsets[len(offsets)-_n:]
offsets.reverse()
with open(filename, 'r') as f:
for i, offset in enumerate(offsets):
f.seek(offset)
if i == len(offsets) - 1:
yield f.read()
else:
bytes_to_read = offsets[i+1] - offset
yield f.read(bytes_to_read)
def is_data_file_in_use(filename):
if(data_file_name == None):
return False
if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, data_file_name.lstrip('/')) and scraping_active:
return True
return False
@app.route('/is_data_file_in_use/<path:filename>')
def is_data_file_in_use_json(filename):
return jsonify(is_data_file_in_use(filename))
def is_log_file_in_use(filename):
if(log_file_name == None):
return False
if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, log_file_name.lstrip('/')):
return True
return False
@app.route('/is_log_file_in_use/<path:filename>')
def is_log_file_in_use_json(filename):
print(filename)
return jsonify(is_log_file_in_use(filename))
@app.route('/')
def index():
form = ScrapingForm()
return render_template('index.html', form=form)
@app.route('/start_scraping', methods=['POST'])
def start_scraping():
global scraping_active, scraping_thread
form = ScrapingForm()
if form.validate_on_submit():
if scraping_active:
logger.warning("Can't start scraping process: scraping already in progress")
return jsonify({"status": "Scraping already in progress"})
scraping_active = True
faction_id = form.faction_id.data
fetch_interval = form.fetch_interval.data
run_interval = form.run_interval.data
# Start scraping in a separate thread
scraping_thread = threading.Thread(target=scrape_data, args=(faction_id, fetch_interval, run_interval))
scraping_thread.daemon = True
scraping_thread.start()
return jsonify({"status": "Scraping started"})
return jsonify({"status": "Invalid form data"})
@app.route('/stop_scraping', methods=['POST'])
def stop_scraping():
global scraping_active
if not scraping_active:
return jsonify({"status": "No scraping in progress"})
scraping_active = False
logger.debug("scraping_active set to False")
return jsonify({"status": "Scraping stopped"})
@app.route('/scraping_status', methods=['GET'])
def scraping_status():
global scraping_active
logger.debug(f"scraping_status called: scraping_active = {scraping_active}")
return jsonify({"scraping_active": scraping_active})
@app.route('/logs')
def logs():
def generate():
while True:
if not log_queue.empty():
log = log_queue.get().getMessage()
yield f"data: {log}\n\n"
time.sleep(0.1)
return Response(generate(), mimetype='text/event-stream')
@app.route('/logfile', methods=['GET'])
def logfile():
page = int(request.args.get('page', 0)) # Page number
lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page
log_file_path = log_file_name # Path to the current log file
if not os.path.isfile(log_file_path):
logging.error("Log file not found")
return jsonify({"error": "Log file not found"}), 404
log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES']))
log_lines = log_lines[::-1] # Reverse the list
start = page * lines_per_page
end = start + lines_per_page
paginated_lines = log_lines[start:end] if start < len(log_lines) else []
return jsonify({
"log": paginated_lines,
"total_lines": len(log_lines),
"pages": (len(log_lines) + lines_per_page - 1) // lines_per_page,
"start_line": len(log_lines) - start # Starting line number for the current page
})
@app.route('/results')
def results():
# Assuming the scraping is done and data is saved somewhere
faction_id = request.args.get('faction_id')
filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
if os.path.isfile(filename):
df = pd.read_csv(filename)
stats = generate_statistics(df)
return render_template('results.html', stats=stats.to_dict())
else:
return "No data found."
@app.route('/analyze')
def analyze():
return render_template('analyze.html');
@app.route('/log_viewer')
def log_viewer():
return render_template('log_viewer.html');
@app.route('/download_results')
def download_results():
data_files = glob.glob("data/*.csv")
log_files = glob.glob("log/*.log")
def get_file_info(file_path):
return {
"name": file_path,
"name_display": os.path.basename(file_path),
"last_modified": os.path.getmtime(file_path),
"created": os.path.getctime(file_path),
"size": get_size(file_path)
}
data_files_info = [get_file_info(file) for file in data_files]
log_files_info = [get_file_info(file) for file in log_files]
for data_file in data_files_info:
if is_data_file_in_use(data_file['name']):
data_file['active'] = True
else:
data_file['active'] = False
for log_file in log_files_info:
if is_log_file_in_use(log_file['name']):
log_file['active'] = True
else:
log_file['active'] = False
files = {"data": data_files_info, "log": log_files_info}
return render_template('download_results.html', files=files)
@app.route('/download_files', methods=['POST'])
def download_files():
delete_old_zips() # Clean up old zip files
file_paths = request.json.get('file_paths', [])
if not file_paths:
return jsonify({"error": "No files specified"}), 400
# Validate and correct file paths
valid_file_paths = []
for file_path in file_paths:
if file_path.startswith('/data/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(app.root_path, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
elif file_path.startswith('/log/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(app.root_path, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
if not valid_file_paths:
return jsonify({"error": "No valid files specified"}), 400
# Create a unique zip file name
zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip"
zip_path = create_zip(valid_file_paths, zip_name)
return send_from_directory(directory='temp', path=zip_name, as_attachment=True)
@app.route('/delete_files', methods=['POST'])
def delete_files():
file_paths = request.json.get('file_paths', [])
if not file_paths:
return jsonify({"error": "No files specified"}), 400
errors = []
for file_path in file_paths:
full_file_path = os.path.join(app.root_path, file_path.lstrip('/'))
print(f"Attempting to delete: {file_path}") # Debugging line
print(f"Full path: {full_file_path}") # Debugging line
print(f"file_path: {file_path}") # Debugging line
# Check if the file is in either the logs or the data files folder
if not (full_file_path.startswith(os.path.join(app.root_path, 'log')) or
full_file_path.startswith(os.path.join(app.root_path, 'data'))):
errors.append({"file": file_path, "error": "File not in allowed directory"})
continue
# Check if it's the currently active log file
if is_log_file_in_use(file_path):
errors.append({"file": file_path, "error": "Cannot delete active log file."})
continue
# Check if it's an active data file
if is_data_file_in_use(file_path):
errors.append({"file": file_path, "error": "Cannot delete active data file."})
continue
if not os.path.isfile(full_file_path):
errors.append({"file": file_path, "error": "File not found"})
continue
try:
os.remove(full_file_path)
except Exception as e:
errors.append({"file": file_path, "error": str(e)})
if errors:
return jsonify({"errors": errors}), 207 # Multi-Status response
return jsonify({"success": True}), 200
@app.template_filter('datetimeformat')
def datetimeformat(value):
return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
def get_size(path):
size = os.path.getsize(path)
if size < 1024:
return f"{size} bytes"
elif size < pow(1024,2):
return f"{round(size/1024, 2)} KB"
elif size < pow(1024,3):
return f"{round(size/(pow(1024,2)), 2)} MB"
elif size < pow(1024,4):
return f"{round(size/(pow(1024,3)), 2)} GB"
@app.route('/data/<path:filename>')
def download_data_file(filename):
return send_from_directory('data', filename)
@app.route('/log/<path:filename>')
def download_log_file(filename):
return send_from_directory('log', filename)
@app.route('/config/lines_per_page')
def get_lines_per_page():
lines_per_page = config['LOGGING']['VIEW_PAGE_LINES']
return jsonify({"lines_per_page": lines_per_page})
if __name__ == '__main__':
app.run(debug=True, threaded=True)
# Initialize global variables
app.config['SCRAPING_ACTIVE'] = False
app.config['SCRAPING_THREAD'] = None
app.config['DATA_FILE_NAME'] = None
app.config['LOG_FILE_NAME'] = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
# Register routes
register_views(app)
register_api(app)
register_filters(app)
return app

View File

@@ -0,0 +1,7 @@
import configparser
import os
def load_config():
config = configparser.ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), '..', 'config.ini'))
return config

7
app/filters.py Normal file
View File

@@ -0,0 +1,7 @@
from flask import Blueprint, request, jsonify
from datetime import datetime
def register_filters(app):
@app.template_filter('datetimeformat')
def datetimeformat(value):
return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')

39
app/logging_config.py Normal file
View File

@@ -0,0 +1,39 @@
import logging
from logging.handlers import QueueHandler
from queue import Queue
import os
from datetime import datetime
from app.config import load_config
config = load_config()
# Define the log directory and ensure it exists
LOG_DIR = config['LOGGING']['LOG_DIR']
if not os.path.exists(LOG_DIR):
os.makedirs(LOG_DIR)
# Generate the log filename dynamically
log_file_name = os.path.join(LOG_DIR, datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log')
# Initialize the logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# File handler
file_handler = logging.FileHandler(log_file_name, mode='w')
file_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# Queue handler for real-time logging
log_queue = Queue()
queue_handler = QueueHandler(log_queue)
queue_handler.setLevel(logging.DEBUG)
logger.addHandler(queue_handler)
# Function to get logger in other modules
def get_logger():
return logger

View File

@@ -0,0 +1,96 @@
import requests
import pandas as pd
import os
import time
from datetime import datetime, timedelta
from app.logging_config import get_logger
from app.config import load_config
config = load_config()
API_KEY = config['DEFAULT']['API_KEY']
logger = get_logger()
class Scraper:
def __init__(self, faction_id, fetch_interval, run_interval, app):
self.faction_id = faction_id
self.fetch_interval = fetch_interval
self.run_interval = run_interval
self.end_time = datetime.now() + timedelta(days=run_interval)
self.data_file_name = os.path.join(app.config['DATA']['DATA_DIR'], f"{self.faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv")
self.scraping_active = False
print(self.data_file_name)
def fetch_faction_data(self):
url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={API_KEY}"
response = requests.get(url)
if response.status_code == 200:
return response.json()
logger.warning(f"Failed to fetch faction data for faction ID {self.faction_id}")
return None
def fetch_user_activity(self, user_id):
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
response = requests.get(url)
if response.status_code == 200:
return response.json()
logger.error(f"Failed to fetch user activity for user ID {user_id}")
return None
def start_scraping(self):
self.scraping_active = True
logger.info(f"Starting scraping process for faction ID {self.faction_id}")
logger.debug(f"Fetch interval: {self.fetch_interval}")
logger.debug(f"Run interval: {self.run_interval}")
logger.debug(f"End time: {self.end_time}")
while datetime.now() < self.end_time and self.scraping_active:
logger.info(f"Fetching data at {datetime.now()}")
faction_data = self.fetch_faction_data()
if faction_data and 'members' in faction_data:
user_activity_data = []
for user_id, user_info in faction_data['members'].items():
user_activity = self.fetch_user_activity(user_id)
if user_activity:
user_activity_data.append({
'user_id': user_id,
'name': user_activity.get('name', ''),
'last_action': user_activity.get('last_action', {}).get('timestamp', 0),
'status': user_activity.get('status', {}).get('state', ''),
'timestamp': datetime.now().timestamp()
})
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
# Append data to the file
df = pd.DataFrame(user_activity_data)
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
if not os.path.isfile(self.data_file_name):
df.to_csv(self.data_file_name, index=False)
else:
df.to_csv(self.data_file_name, mode='a', header=False, index=False)
logger.info(f"Data appended to {self.data_file_name}")
time.sleep(self.fetch_interval)
else:
if datetime.now() < self.end_time:
logger.warning(f"Scraping stopped at {datetime.now()} because of timeout ({self.run_interval} days, end time: {self.end_time})")
elif not self.scraping_active:
logger.warning(f"Scraping stopped at {datetime.now()} due to user request")
else:
logger.error(f"Scraping stopped due to timeout at {datetime.now()}")
logger.info("Scraping completed.")
self.scraping_active = False
def stop_scraping(self):
self.scraping_active = False
logger.debug("Scraping stopped by user")
def generate_statistics(df):
df['hour'] = df['timestamp'].dt.hour # No need to convert timestamp again
return df.groupby('hour').size() # Activity by hour

2
app/state.py Normal file
View File

@@ -0,0 +1,2 @@
data_file_name = None
log_file_name = None

View File

@@ -30,7 +30,7 @@
<tbody>
{% for file in files.data %}
<tr>
<td><input type="checkbox" name="fileCheckbox" value="{{ url_for('download_log_file', filename=file.name_display) }}"{{ ' disabled' if file.active }}></td>
<td><input type="checkbox" name="fileCheckbox" value="{{ url_for('download_data_file', filename=file.name_display) }}"{{ ' disabled' if file.active }}></td>
<td><a href="{{ url_for('download_data_file', filename=file.name_display) }}" target="_blank">{{ file.name_display }}</a></td>
<td>{{ file.last_modified | datetimeformat }}</td>
<td>{{ file.created | datetimeformat }}</td>

View File

@@ -0,0 +1,85 @@
import os
import zipfile
from datetime import datetime, timedelta
from app.state import data_file_name, log_file_name
from app.config import load_config
config = load_config()
def create_zip(file_paths, zip_name, app):
temp_dir = os.path.abspath(app.config['TEMP']['TEMP_DIR'])
zip_path = os.path.join(temp_dir, zip_name)
with zipfile.ZipFile(zip_path, 'w') as zipf:
for file_path in file_paths:
zipf.write(file_path, os.path.basename(file_path))
print(f"Zip file created: {zip_path}")
return zip_path
def delete_old_zips():
temp_dir = os.path.abspath(config['TEMP']['TEMP_DIR'])
now = datetime.now()
for filename in os.listdir(temp_dir):
if filename.endswith('.zip'):
file_path = os.path.join(temp_dir, filename)
if now - datetime.fromtimestamp(os.path.getmtime(file_path)) > timedelta(hours=1):
os.remove(file_path)
def tail(filename, n):
stat = os.stat(filename)
n = int(n)
if stat.st_size == 0 or n == 0:
yield ''
return
page_size = int(config['LOGGING']['TAIL_PAGE_SIZE'])
offsets = []
count = _n = n if n >= 0 else -n
last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1
with open(filename, 'r') as f:
while count > 0:
starting_byte = last_byte_read - page_size
if last_byte_read == 0:
offsets.append(0)
break
elif starting_byte < 0:
f.seek(0)
text = f.read(last_byte_read)
else:
f.seek(starting_byte)
text = f.read(page_size)
for i in range(-1, -1*len(text)-1, -1):
last_byte_read -= 1
if text[i] == '\n':
last_nl_byte = last_byte_read
starting_offset = last_nl_byte + 1
offsets.append(starting_offset)
count -= 1
offsets = offsets[len(offsets)-_n:]
offsets.reverse()
with open(filename, 'r') as f:
for i, offset in enumerate(offsets):
f.seek(offset)
if i == len(offsets) - 1:
yield f.read()
else:
bytes_to_read = offsets[i+1] - offset
yield f.read(bytes_to_read)
def get_size(path):
size = os.path.getsize(path)
if size < 1024:
return f"{size} bytes"
elif size < pow(1024,2):
return f"{round(size/1024, 2)} KB"
elif size < pow(1024,3):
return f"{round(size/(pow(1024,2)), 2)} MB"
elif size < pow(1024,4):
return f"{round(size/(pow(1024,3)), 2)} GB"

View File

@@ -0,0 +1,82 @@
import os
import glob
from flask import render_template
from app.forms import ScrapingForm
from app.util import get_size
from app.config import load_config
from app.api import scraper as scraper# Import the scraper instance
from app.logging_config import get_logger
from app.state import log_file_name
print(f"A imported log_file_name: {log_file_name}")
config = load_config()
logger = get_logger()
def register_views(app):
@app.route('/')
def index():
form = ScrapingForm()
return render_template('index.html', form=form)
@app.route('/results')
def results():
return render_template('results.html')
@app.route('/analyze')
def analyze():
return render_template('analyze.html')
@app.route('/log_viewer')
def log_viewer():
return render_template('log_viewer.html')
@app.route('/download_results')
def download_results():
log_file_name = os.path.abspath(app.config['LOG_FILE_NAME'])
scraper = app.config.get('SCRAPER')
if scraper:
print(scraper.data_file_name)
if not scraper:
print("Scraper not initialized")
data_dir = os.path.abspath(config['DATA']['DATA_DIR'])
log_dir = os.path.abspath(config['LOGGING']['LOG_DIR'])
data_files = glob.glob(os.path.join(data_dir, "*.csv"))
log_files = glob.glob(os.path.join(log_dir, "*.log"))
def get_file_info(file_path):
return {
"name": file_path,
"name_display": os.path.basename(file_path),
"last_modified": os.path.getmtime(file_path),
"created": os.path.getctime(file_path),
"size": get_size(file_path)
}
data_files_info = [get_file_info(file) for file in data_files]
log_files_info = [get_file_info(file) for file in log_files]
if scraper and scraper.scraping_active:
for data_file in data_files_info:
if os.path.abspath(scraper.data_file_name) == data_file['name']:
data_file['active'] = True
else:
data_file['active'] = False
for log_file in log_files_info:
if log_file_name == os.path.abspath(log_file['name']):
log_file['active'] = True
else:
log_file['active'] = False
data_files_info.sort(key=lambda x: x['last_modified'], reverse=True)
log_files_info.sort(key=lambda x: x['last_modified'], reverse=True)
files = {"data": data_files_info, "log": log_files_info}
return render_template('download_results.html', files=files)

3
run.py
View File

@@ -1,4 +1,5 @@
from app.app import app
from app.app import init_app
if __name__ == '__main__':
app = init_app()
app.run(debug=True, threaded=True)