1st restructure

This commit is contained in:
Michael Beck
2025-02-06 23:27:30 +01:00
parent f552601c4b
commit ceebbafed5
24 changed files with 56 additions and 198 deletions

470
app/app.py Normal file
View File

@@ -0,0 +1,470 @@
from flask import Flask, request, render_template, Response, jsonify, url_for
from flask_bootstrap import Bootstrap5 # from package boostrap_flask
from app.forms import ScrapingForm
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
import threading
import logging
from logging.handlers import QueueHandler
from queue import Queue
import os
import glob
from datetime import datetime
from flask import send_from_directory
import configparser
import zipfile
import os
from datetime import timedelta
app = Flask(__name__)
# Load configuration
config = configparser.ConfigParser()
config.read('config.ini')
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
API_KEY = config['DEFAULT']['API_KEY']
bootstrap = Bootstrap5(app)
# Move every setting from config['BOOTSTRAP'] to the root level of config
for key in config['BOOTSTRAP']:
key = key.upper()
app.config[key] = config['BOOTSTRAP'][key]
if key == 'SECRET_KEY':
continue
elif key == 'API_KEY':
continue
print(f"Loaded config: {key} = {app.config[key]}")
# Global state
scraping_active = False
scraping_thread = None
data_file_name = None
log_file_name = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
# Initialize the logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG) # Adjust as needed
# Make any logger.info() call go to both the log file and the queue.
# 1) FILE HANDLER
file_handler = logging.FileHandler(log_file_name, mode='w')
file_handler.setLevel(logging.DEBUG) # or INFO, WARNING, etc.
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# 2) QUEUE HANDLER
log_queue = Queue()
queue_handler = QueueHandler(log_queue)
queue_handler.setLevel(logging.DEBUG)
logger.addHandler(queue_handler)
def create_zip(file_paths, zip_name):
zip_path = os.path.join(app.root_path, 'temp', zip_name)
with zipfile.ZipFile(zip_path, 'w') as zipf:
for file_path in file_paths:
arcname = os.path.basename(file_path)
zipf.write(file_path, arcname)
return zip_path
def delete_old_zips():
temp_dir = os.path.join(app.root_path, 'temp')
now = datetime.now()
for filename in os.listdir(temp_dir):
if filename.endswith('.zip'):
file_path = os.path.join(temp_dir, filename)
file_time = datetime.fromtimestamp(os.path.getmtime(file_path))
if now - file_time > timedelta(hours=1):
os.remove(file_path)
logger.info(f"Deleted old zip file: {filename}")
def fetch_faction_data(faction_id):
url = f"https://api.torn.com/faction/{faction_id}?selections=&key={API_KEY}"
response = requests.get(url)
if response.status_code == 200:
logger.info(f"Fetched data for faction ID {faction_id}")
return response.json()
else:
logger.warning(f"Failed to fetch faction data for faction ID {faction_id}")
return None
def fetch_user_activity(user_id):
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
response = requests.get(url)
if response.status_code == 200:
return response.json()
else:
logger.error(f"Failed to fetch user activity for user ID {user_id}")
return None
def scrape_data(faction_id, fetch_interval, run_interval):
global scraping_active
global data_file_name
end_time = datetime.now() + timedelta(days=run_interval)
data_file_name = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
while datetime.now() < end_time and scraping_active:
logger.info(f"Fetching data at {datetime.now()}")
faction_data = fetch_faction_data(faction_id)
if faction_data and 'members' in faction_data:
user_activity_data = []
for user_id, user_info in faction_data['members'].items():
user_activity = fetch_user_activity(user_id)
if user_activity:
user_activity_data.append({
'user_id': user_id,
'name': user_activity.get('name', ''),
'last_action': user_activity.get('last_action', {}).get('timestamp', 0),
'stadata_file_nametus': user_activity.get('status', {}).get('state', ''),
'timestamp': datetime.now().timestamp()
})
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
# Append data to the file
df = pd.DataFrame(user_activity_data)
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
if not os.path.isfile(data_file_name):
df.to_csv(data_file_name, index=False)
else:
df.to_csv(data_file_name, mode='a', header=False, index=False)
logger.info(f"Data appended to {data_file_name}")
time.sleep(fetch_interval)
else:
if datetime.now() < end_time:
logger.warning(f"Scraping stopped at {datetime.now()}")
elif scraping_active == False:
logger.warning(f"Scraping stopped at {datetime.now()} due to user request")
else:
logger.error(f"Scraping stopped due to timeout at {datetime.now()}")
logger.info("Scraping completed.")
scraping_active = False
def generate_statistics(df):
df['hour'] = df['timestamp'].dt.hour
activity_by_hour = df.groupby('hour').size()
return activity_by_hour
# Taken from:
# https://gist.github.com/amitsaha/5990310?permalink_comment_id=3017951#gistcomment-3017951
def tail(filename, n):
stat = os.stat(filename)
n = int(n)
if stat.st_size == 0 or n == 0:
yield ''
return
page_size = int(config['LOGGING']['TAIL_PAGE_SIZE'])
offsets = []
count = _n = n if n >= 0 else -n
last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1
with open(filename, 'r') as f:
while count > 0:
starting_byte = last_byte_read - page_size
if last_byte_read == 0:
offsets.append(0)
break
elif starting_byte < 0:
f.seek(0)
text = f.read(last_byte_read)
else:
f.seek(starting_byte)
text = f.read(page_size)
for i in range(-1, -1*len(text)-1, -1):
last_byte_read -= 1
if text[i] == '\n':
last_nl_byte = last_byte_read
starting_offset = last_nl_byte + 1
offsets.append(starting_offset)
count -= 1
offsets = offsets[len(offsets)-_n:]
offsets.reverse()
with open(filename, 'r') as f:
for i, offset in enumerate(offsets):
f.seek(offset)
if i == len(offsets) - 1:
yield f.read()
else:
bytes_to_read = offsets[i+1] - offset
yield f.read(bytes_to_read)
def is_data_file_in_use(filename):
if(data_file_name == None):
return False
if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, data_file_name.lstrip('/')) and scraping_active:
return True
return False
@app.route('/is_data_file_in_use/<path:filename>')
def is_data_file_in_use_json(filename):
return jsonify(is_data_file_in_use(filename))
def is_log_file_in_use(filename):
if(log_file_name == None):
return False
if os.path.join(app.root_path, filename.lstrip('/')) == os.path.join(app.root_path, log_file_name.lstrip('/')):
return True
return False
@app.route('/is_log_file_in_use/<path:filename>')
def is_log_file_in_use_json(filename):
print(filename)
return jsonify(is_log_file_in_use(filename))
@app.route('/')
def index():
form = ScrapingForm()
return render_template('index.html', form=form)
@app.route('/start_scraping', methods=['POST'])
def start_scraping():
global scraping_active, scraping_thread
form = ScrapingForm()
if form.validate_on_submit():
if scraping_active:
logger.warning("Can't start scraping process: scraping already in progress")
return jsonify({"status": "Scraping already in progress"})
scraping_active = True
faction_id = form.faction_id.data
fetch_interval = form.fetch_interval.data
run_interval = form.run_interval.data
# Start scraping in a separate thread
scraping_thread = threading.Thread(target=scrape_data, args=(faction_id, fetch_interval, run_interval))
scraping_thread.daemon = True
scraping_thread.start()
return jsonify({"status": "Scraping started"})
return jsonify({"status": "Invalid form data"})
@app.route('/stop_scraping', methods=['POST'])
def stop_scraping():
global scraping_active
if not scraping_active:
return jsonify({"status": "No scraping in progress"})
scraping_active = False
logger.debug("scraping_active set to False")
return jsonify({"status": "Scraping stopped"})
@app.route('/scraping_status', methods=['GET'])
def scraping_status():
global scraping_active
logger.debug(f"scraping_status called: scraping_active = {scraping_active}")
return jsonify({"scraping_active": scraping_active})
@app.route('/logs')
def logs():
def generate():
while True:
if not log_queue.empty():
log = log_queue.get().getMessage()
yield f"data: {log}\n\n"
time.sleep(0.1)
return Response(generate(), mimetype='text/event-stream')
@app.route('/logfile', methods=['GET'])
def logfile():
page = int(request.args.get('page', 0)) # Page number
lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page
log_file_path = log_file_name # Path to the current log file
if not os.path.isfile(log_file_path):
logging.error("Log file not found")
return jsonify({"error": "Log file not found"}), 404
log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES']))
log_lines = log_lines[::-1] # Reverse the list
start = page * lines_per_page
end = start + lines_per_page
paginated_lines = log_lines[start:end] if start < len(log_lines) else []
return jsonify({
"log": paginated_lines,
"total_lines": len(log_lines),
"pages": (len(log_lines) + lines_per_page - 1) // lines_per_page,
"start_line": len(log_lines) - start # Starting line number for the current page
})
@app.route('/results')
def results():
# Assuming the scraping is done and data is saved somewhere
faction_id = request.args.get('faction_id')
filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
if os.path.isfile(filename):
df = pd.read_csv(filename)
stats = generate_statistics(df)
return render_template('results.html', stats=stats.to_dict())
else:
return "No data found."
@app.route('/analyze')
def analyze():
return render_template('analyze.html');
@app.route('/log_viewer')
def log_viewer():
return render_template('log_viewer.html');
@app.route('/download_results')
def download_results():
data_files = glob.glob("data/*.csv")
log_files = glob.glob("log/*.log")
def get_file_info(file_path):
return {
"name": file_path,
"name_display": os.path.basename(file_path),
"last_modified": os.path.getmtime(file_path),
"created": os.path.getctime(file_path),
"size": get_size(file_path)
}
data_files_info = [get_file_info(file) for file in data_files]
log_files_info = [get_file_info(file) for file in log_files]
for data_file in data_files_info:
if is_data_file_in_use(data_file['name']):
data_file['active'] = True
else:
data_file['active'] = False
for log_file in log_files_info:
if is_log_file_in_use(log_file['name']):
log_file['active'] = True
else:
log_file['active'] = False
files = {"data": data_files_info, "log": log_files_info}
return render_template('download_results.html', files=files)
@app.route('/download_files', methods=['POST'])
def download_files():
delete_old_zips() # Clean up old zip files
file_paths = request.json.get('file_paths', [])
if not file_paths:
return jsonify({"error": "No files specified"}), 400
# Validate and correct file paths
valid_file_paths = []
for file_path in file_paths:
if file_path.startswith('/data/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(app.root_path, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
elif file_path.startswith('/log/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(app.root_path, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
if not valid_file_paths:
return jsonify({"error": "No valid files specified"}), 400
# Create a unique zip file name
zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip"
zip_path = create_zip(valid_file_paths, zip_name)
return send_from_directory(directory='temp', path=zip_name, as_attachment=True)
@app.route('/delete_files', methods=['POST'])
def delete_files():
file_paths = request.json.get('file_paths', [])
if not file_paths:
return jsonify({"error": "No files specified"}), 400
errors = []
for file_path in file_paths:
full_file_path = os.path.join(app.root_path, file_path.lstrip('/'))
print(f"Attempting to delete: {file_path}") # Debugging line
print(f"Full path: {full_file_path}") # Debugging line
print(f"file_path: {file_path}") # Debugging line
# Check if the file is in either the logs or the data files folder
if not (full_file_path.startswith(os.path.join(app.root_path, 'log')) or
full_file_path.startswith(os.path.join(app.root_path, 'data'))):
errors.append({"file": file_path, "error": "File not in allowed directory"})
continue
# Check if it's the currently active log file
if is_log_file_in_use(file_path):
errors.append({"file": file_path, "error": "Cannot delete active log file."})
continue
# Check if it's an active data file
if is_data_file_in_use(file_path):
errors.append({"file": file_path, "error": "Cannot delete active data file."})
continue
if not os.path.isfile(full_file_path):
errors.append({"file": file_path, "error": "File not found"})
continue
try:
os.remove(full_file_path)
except Exception as e:
errors.append({"file": file_path, "error": str(e)})
if errors:
return jsonify({"errors": errors}), 207 # Multi-Status response
return jsonify({"success": True}), 200
@app.template_filter('datetimeformat')
def datetimeformat(value):
return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
def get_size(path):
size = os.path.getsize(path)
if size < 1024:
return f"{size} bytes"
elif size < pow(1024,2):
return f"{round(size/1024, 2)} KB"
elif size < pow(1024,3):
return f"{round(size/(pow(1024,2)), 2)} MB"
elif size < pow(1024,4):
return f"{round(size/(pow(1024,3)), 2)} GB"
@app.route('/data/<path:filename>')
def download_data_file(filename):
return send_from_directory('data', filename)
@app.route('/log/<path:filename>')
def download_log_file(filename):
return send_from_directory('log', filename)
@app.route('/config/lines_per_page')
def get_lines_per_page():
lines_per_page = config['LOGGING']['VIEW_PAGE_LINES']
return jsonify({"lines_per_page": lines_per_page})
if __name__ == '__main__':
app.run(debug=True, threaded=True)