402 lines
14 KiB
Python
402 lines
14 KiB
Python
from flask import Flask, request, render_template, Response, jsonify, url_for
|
|
from flask_bootstrap import Bootstrap5 # from package boostrap_flask
|
|
from forms import ScrapingForm
|
|
import requests
|
|
import pandas as pd
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
import threading
|
|
import logging
|
|
from logging.handlers import QueueHandler
|
|
from queue import Queue
|
|
import os
|
|
import glob
|
|
from datetime import datetime
|
|
from flask import send_from_directory
|
|
import configparser
|
|
|
|
import zipfile
|
|
import os
|
|
from datetime import timedelta
|
|
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Load configuration
|
|
config = configparser.ConfigParser()
|
|
config.read('config.ini')
|
|
|
|
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
|
|
API_KEY = config['DEFAULT']['API_KEY']
|
|
|
|
bootstrap = Bootstrap5(app)
|
|
|
|
# Move every setting from config['BOOTSTRAP'] to the root level of config
|
|
for key in config['BOOTSTRAP']:
|
|
key = key.upper()
|
|
app.config[key] = config['BOOTSTRAP'][key]
|
|
if key == 'SECRET_KEY':
|
|
continue
|
|
elif key == 'API_KEY':
|
|
continue
|
|
print(f"Loaded config: {key} = {app.config[key]}")
|
|
|
|
# Initialize the logger
|
|
logger = logging.getLogger(__name__)
|
|
logger.setLevel(logging.DEBUG) # Adjust as needed
|
|
|
|
# Make any logger.info() call go to both the log file and the queue.
|
|
# 1) FILE HANDLER
|
|
logFile = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
|
|
file_handler = logging.FileHandler(logFile, mode='w')
|
|
file_handler.setLevel(logging.DEBUG) # or INFO, WARNING, etc.
|
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s',
|
|
datefmt='%m/%d/%Y %I:%M:%S %p')
|
|
file_handler.setFormatter(formatter)
|
|
|
|
logger.addHandler(file_handler)
|
|
|
|
# 2) QUEUE HANDLER
|
|
log_queue = Queue()
|
|
queue_handler = QueueHandler(log_queue)
|
|
queue_handler.setLevel(logging.DEBUG)
|
|
logger.addHandler(queue_handler)
|
|
|
|
# Global state
|
|
scraping_active = False
|
|
scraping_thread = None
|
|
|
|
|
|
def create_zip(file_paths, zip_name):
|
|
zip_path = os.path.join(app.root_path, 'temp', zip_name)
|
|
with zipfile.ZipFile(zip_path, 'w') as zipf:
|
|
for file_path in file_paths:
|
|
arcname = os.path.basename(file_path)
|
|
zipf.write(file_path, arcname)
|
|
return zip_path
|
|
|
|
def delete_old_zips():
|
|
temp_dir = os.path.join(app.root_path, 'temp')
|
|
now = datetime.now()
|
|
for filename in os.listdir(temp_dir):
|
|
if filename.endswith('.zip'):
|
|
file_path = os.path.join(temp_dir, filename)
|
|
file_time = datetime.fromtimestamp(os.path.getmtime(file_path))
|
|
if now - file_time > timedelta(hours=1):
|
|
os.remove(file_path)
|
|
logger.info(f"Deleted old zip file: {filename}")
|
|
|
|
def fetch_faction_data(faction_id):
|
|
url = f"https://api.torn.com/faction/{faction_id}?selections=&key={API_KEY}"
|
|
response = requests.get(url)
|
|
if response.status_code == 200:
|
|
logger.info(f"Fetched data for faction ID {faction_id}")
|
|
return response.json()
|
|
else:
|
|
logger.warning(f"Failed to fetch faction data for faction ID {faction_id}")
|
|
return None
|
|
|
|
def fetch_user_activity(user_id):
|
|
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
|
|
response = requests.get(url)
|
|
if response.status_code == 200:
|
|
return response.json()
|
|
else:
|
|
logger.error(f"Failed to fetch user activity for user ID {user_id}")
|
|
return None
|
|
|
|
def scrape_data(faction_id, fetch_interval, run_interval):
|
|
global scraping_active
|
|
end_time = datetime.now() + timedelta(days=run_interval)
|
|
filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
|
|
|
|
while datetime.now() < end_time and scraping_active:
|
|
logger.info(f"Fetching data at {datetime.now()}")
|
|
faction_data = fetch_faction_data(faction_id)
|
|
if faction_data and 'members' in faction_data:
|
|
user_activity_data = []
|
|
for user_id, user_info in faction_data['members'].items():
|
|
user_activity = fetch_user_activity(user_id)
|
|
if user_activity:
|
|
user_activity_data.append({
|
|
'user_id': user_id,
|
|
'name': user_activity.get('name', ''),
|
|
'last_action': user_activity.get('last_action', {}).get('timestamp', 0),
|
|
'status': user_activity.get('status', {}).get('state', ''),
|
|
'timestamp': datetime.now().timestamp()
|
|
})
|
|
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
|
|
|
|
# Append data to the file
|
|
df = pd.DataFrame(user_activity_data)
|
|
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
|
|
|
if not os.path.isfile(filename):
|
|
df.to_csv(filename, index=False)
|
|
else:
|
|
df.to_csv(filename, mode='a', header=False, index=False)
|
|
|
|
logger.info(f"Data appended to {filename}")
|
|
|
|
time.sleep(fetch_interval)
|
|
else:
|
|
if datetime.now() < end_time:
|
|
logger.warning(f"Scraping stopped at {datetime.now()}")
|
|
elif scraping_active == False:
|
|
logger.warning(f"Scraping stopped at {datetime.now()} due to user request")
|
|
else:
|
|
logger.error(f"Scraping stopped due to timeout at {datetime.now()}")
|
|
logger.info("Scraping completed.")
|
|
scraping_active = False
|
|
|
|
def generate_statistics(df):
|
|
df['hour'] = df['timestamp'].dt.hour
|
|
activity_by_hour = df.groupby('hour').size()
|
|
return activity_by_hour
|
|
|
|
# Taken from:
|
|
# https://gist.github.com/amitsaha/5990310?permalink_comment_id=3017951#gistcomment-3017951
|
|
def tail(filename, n):
|
|
stat = os.stat(filename)
|
|
n = int(n)
|
|
if stat.st_size == 0 or n == 0:
|
|
yield ''
|
|
return
|
|
|
|
page_size = int(config['LOGGING']['TAIL_PAGE_SIZE'])
|
|
offsets = []
|
|
count = _n = n if n >= 0 else -n
|
|
|
|
last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1
|
|
|
|
with open(filename, 'r') as f:
|
|
while count > 0:
|
|
starting_byte = last_byte_read - page_size
|
|
if last_byte_read == 0:
|
|
offsets.append(0)
|
|
break
|
|
elif starting_byte < 0:
|
|
f.seek(0)
|
|
text = f.read(last_byte_read)
|
|
else:
|
|
f.seek(starting_byte)
|
|
text = f.read(page_size)
|
|
|
|
for i in range(-1, -1*len(text)-1, -1):
|
|
last_byte_read -= 1
|
|
if text[i] == '\n':
|
|
last_nl_byte = last_byte_read
|
|
starting_offset = last_nl_byte + 1
|
|
offsets.append(starting_offset)
|
|
count -= 1
|
|
|
|
offsets = offsets[len(offsets)-_n:]
|
|
offsets.reverse()
|
|
|
|
with open(filename, 'r') as f:
|
|
for i, offset in enumerate(offsets):
|
|
f.seek(offset)
|
|
|
|
if i == len(offsets) - 1:
|
|
yield f.read()
|
|
else:
|
|
bytes_to_read = offsets[i+1] - offset
|
|
yield f.read(bytes_to_read)
|
|
|
|
@app.route('/')
|
|
def index():
|
|
form = ScrapingForm()
|
|
return render_template('index.html', form=form)
|
|
|
|
@app.route('/start_scraping', methods=['POST'])
|
|
def start_scraping():
|
|
global scraping_active, scraping_thread
|
|
form = ScrapingForm()
|
|
if form.validate_on_submit():
|
|
if scraping_active:
|
|
logger.warning("Can't start scraping process: scraping already in progress")
|
|
return jsonify({"status": "Scraping already in progress"})
|
|
|
|
scraping_active = True
|
|
|
|
faction_id = form.faction_id.data
|
|
fetch_interval = form.fetch_interval.data
|
|
run_interval = form.run_interval.data
|
|
|
|
# Start scraping in a separate thread
|
|
scraping_thread = threading.Thread(target=scrape_data, args=(faction_id, fetch_interval, run_interval))
|
|
scraping_thread.daemon = True
|
|
scraping_thread.start()
|
|
|
|
return jsonify({"status": "Scraping started"})
|
|
return jsonify({"status": "Invalid form data"})
|
|
|
|
@app.route('/stop_scraping', methods=['POST'])
|
|
def stop_scraping():
|
|
global scraping_active
|
|
if not scraping_active:
|
|
return jsonify({"status": "No scraping in progress"})
|
|
|
|
scraping_active = False
|
|
logger.debug("scraping_active set to False")
|
|
return jsonify({"status": "Scraping stopped"})
|
|
|
|
@app.route('/scraping_status', methods=['GET'])
|
|
def scraping_status():
|
|
global scraping_active
|
|
logger.debug(f"scraping_status called: scraping_active = {scraping_active}")
|
|
return jsonify({"scraping_active": scraping_active})
|
|
|
|
@app.route('/logs')
|
|
def logs():
|
|
def generate():
|
|
while True:
|
|
if not log_queue.empty():
|
|
log = log_queue.get().getMessage()
|
|
yield f"data: {log}\n\n"
|
|
time.sleep(0.1)
|
|
return Response(generate(), mimetype='text/event-stream')
|
|
|
|
@app.route('/logfile', methods=['GET'])
|
|
def logfile():
|
|
page = int(request.args.get('page', 0)) # Page number
|
|
lines_per_page = int(request.args.get('lines_per_page', config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page
|
|
log_file_path = logFile # Path to the current log file
|
|
|
|
if not os.path.isfile(log_file_path):
|
|
logging.error("Log file not found")
|
|
return jsonify({"error": "Log file not found"}), 404
|
|
|
|
log_lines = list(tail(log_file_path, config['LOGGING']['VIEW_MAX_LINES']))
|
|
|
|
log_lines = log_lines[::-1] # Reverse the list
|
|
|
|
start = page * lines_per_page
|
|
end = start + lines_per_page
|
|
paginated_lines = log_lines[start:end] if start < len(log_lines) else []
|
|
|
|
return jsonify({
|
|
"log": paginated_lines,
|
|
"total_lines": len(log_lines),
|
|
"pages": (len(log_lines) + lines_per_page - 1) // lines_per_page,
|
|
"start_line": len(log_lines) - start # Starting line number for the current page
|
|
})
|
|
@app.route('/results')
|
|
def results():
|
|
# Assuming the scraping is done and data is saved somewhere
|
|
faction_id = request.args.get('faction_id')
|
|
filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
|
|
if os.path.isfile(filename):
|
|
df = pd.read_csv(filename)
|
|
stats = generate_statistics(df)
|
|
return render_template('results.html', stats=stats.to_dict())
|
|
else:
|
|
return "No data found."
|
|
|
|
@app.route('/download_results')
|
|
def download_results():
|
|
data_files = glob.glob("data/*.csv")
|
|
log_files = glob.glob("log/*.log")
|
|
|
|
def get_file_info(file_path):
|
|
return {
|
|
"name": file_path,
|
|
"name_display": os.path.basename(file_path),
|
|
"last_modified": os.path.getmtime(file_path),
|
|
"created": os.path.getctime(file_path),
|
|
"size": get_size(file_path)
|
|
}
|
|
|
|
data_files_info = [get_file_info(file) for file in data_files]
|
|
log_files_info = [get_file_info(file) for file in log_files]
|
|
|
|
files = {"data": data_files_info, "log": log_files_info}
|
|
|
|
return render_template('download_results.html', files=files)
|
|
|
|
@app.route('/download_files', methods=['POST'])
|
|
def download_files():
|
|
delete_old_zips() # Clean up old zip files
|
|
|
|
file_paths = request.json.get('file_paths', [])
|
|
|
|
if not file_paths:
|
|
return jsonify({"error": "No files specified"}), 400
|
|
|
|
# Validate and correct file paths
|
|
valid_file_paths = []
|
|
for file_path in file_paths:
|
|
if file_path.startswith('/data/'):
|
|
corrected_path = file_path.lstrip('/')
|
|
full_path = os.path.join(app.root_path, corrected_path)
|
|
if os.path.isfile(full_path):
|
|
valid_file_paths.append(full_path)
|
|
elif file_path.startswith('/log/'):
|
|
corrected_path = file_path.lstrip('/')
|
|
full_path = os.path.join(app.root_path, corrected_path)
|
|
if os.path.isfile(full_path):
|
|
valid_file_paths.append(full_path)
|
|
|
|
if not valid_file_paths:
|
|
return jsonify({"error": "No valid files specified"}), 400
|
|
|
|
# Create a unique zip file name
|
|
zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip"
|
|
zip_path = create_zip(valid_file_paths, zip_name)
|
|
|
|
return send_from_directory(directory='temp', path=zip_name, as_attachment=True)
|
|
|
|
@app.route('/delete_files', methods=['POST'])
|
|
def delete_files():
|
|
file_paths = request.json.get('file_paths', [])
|
|
|
|
if not file_paths:
|
|
return jsonify({"error": "No files specified"}), 400
|
|
|
|
errors = []
|
|
for file_path in file_paths:
|
|
file_path = os.path.join(app.root_path, file_path.lstrip('/'))
|
|
if not os.path.isfile(file_path):
|
|
errors.append({"file": file_path, "error": "File not found"})
|
|
continue
|
|
|
|
try:
|
|
os.remove(file_path)
|
|
except Exception as e:
|
|
errors.append({"file": file_path, "error": str(e)})
|
|
|
|
if errors:
|
|
return jsonify({"errors": errors}), 207 # Multi-Status response
|
|
return jsonify({"success": True}), 200
|
|
|
|
@app.template_filter('datetimeformat')
|
|
def datetimeformat(value):
|
|
return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
def get_size(path):
|
|
size = os.path.getsize(path)
|
|
if size < 1024:
|
|
return f"{size} bytes"
|
|
elif size < pow(1024,2):
|
|
return f"{round(size/1024, 2)} KB"
|
|
elif size < pow(1024,3):
|
|
return f"{round(size/(pow(1024,2)), 2)} MB"
|
|
elif size < pow(1024,4):
|
|
return f"{round(size/(pow(1024,3)), 2)} GB"
|
|
|
|
@app.route('/data/<path:filename>')
|
|
def download_data_file(filename):
|
|
return send_from_directory('data', filename)
|
|
|
|
@app.route('/log/<path:filename>')
|
|
def download_log_file(filename):
|
|
return send_from_directory('log', filename)
|
|
|
|
@app.route('/config/lines_per_page')
|
|
def get_lines_per_page():
|
|
lines_per_page = config['LOGGING']['VIEW_PAGE_LINES']
|
|
return jsonify({"lines_per_page": lines_per_page})
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True, threaded=True) |