diff --git a/app/analysis.py b/app/analysis.py new file mode 100644 index 0000000..d0660d4 --- /dev/null +++ b/app/analysis.py @@ -0,0 +1,60 @@ +import os +import pandas as pd +import matplotlib +matplotlib.use("Agg") # Prevents GUI-related issues in Flask + +import matplotlib.pyplot as plt +import seaborn as sns + + +def load_data(file_path: str) -> pd.DataFrame: + """Loads the scraped data from a CSV file into a Pandas DataFrame.""" + if not os.path.exists(file_path): + raise FileNotFoundError(f"File {file_path} not found.") + + df = pd.read_csv(file_path) + df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce") + df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce") + + return df + +def generate_statistics(df: pd.DataFrame): + """Generates activity statistics grouped by hour.""" + df["hour"] = df["timestamp"].dt.hour + return df.groupby("hour").size() + +def plot_activity_distribution(df: pd.DataFrame, output_path="activity_distribution.png"): + """Plots user activity distribution and saves the figure.""" + + # Ensure the directory exists + static_dir = os.path.join("app", "static", "plots") + output_path = os.path.join(static_dir, output_path) + os.makedirs(static_dir, exist_ok=True) + + # Convert timestamp column to datetime (if not already) + if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]): + df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce") + + df["hour"] = df["timestamp"].dt.hour + activity_counts = df.groupby("hour").size().reset_index(name="count") + + # Use non-GUI backend for Matplotlib + plt.figure(figsize=(10, 5)) + + # Fix Seaborn Warning: Assign `hue` explicitly + sns.barplot(x="hour", y="count", data=activity_counts, hue="hour", palette="Blues", legend=False) + + plt.xlabel("Hour of the Day") + plt.ylabel("Activity Count") + plt.title("User Activity Distribution") + plt.xticks(range(0, 24)) + + # Save the plot file safely + plt.savefig(output_path, bbox_inches="tight") + plt.close() + + # Verify the file exists after saving + if not os.path.exists(output_path): + raise FileNotFoundError(f"Plot could not be saved to {output_path}") + + return output_path \ No newline at end of file diff --git a/app/app.py b/app/app.py index b9afd09..776a397 100644 --- a/app/app.py +++ b/app/app.py @@ -6,6 +6,7 @@ from app.views import register_views from app.api import register_api from app.config import load_config from app.filters import register_filters +from app.analysis import generate_statistics def init_app(): config = load_config() diff --git a/app/templates/data_visualization.html b/app/templates/data_visualization.html new file mode 100644 index 0000000..d047a27 --- /dev/null +++ b/app/templates/data_visualization.html @@ -0,0 +1,68 @@ +{% extends 'base.html' %} + +{% block content %} +
+
+
+
+
+

User Activity Distribution

+
+
+ +
+ + +
+
+
+ + {% if error %} + + {% endif %} + + {% if plot_url %} +
+
+

Selected File: {{ selected_file.split('/')[-1] }}

+ User Activity Distribution +
+
+ {% endif %} + + {% if statistics %} +
+
+

Activity Statistics

+ + + + + + + + + {% for hour, count in statistics.items() %} + + + + + {% endfor %} + +
HourActivity Count
{{ hour }}{{ count }}
+
+
+ {% endif %} + +
+
+
+{% endblock content %} diff --git a/app/templates/includes/navigation.html b/app/templates/includes/navigation.html index 66935c4..293c2c0 100644 --- a/app/templates/includes/navigation.html +++ b/app/templates/includes/navigation.html @@ -3,7 +3,7 @@
Torn User Activity Scraper {% from 'bootstrap4/nav.html' import render_nav_item %} - {{ render_nav_item('analyze', 'Analyze') }} + {{ render_nav_item('views.data_visualization', 'Data Visualization') }} {{ render_nav_item('download_results', 'Files') }} {{ render_nav_item('log_viewer', 'Logs') }}
diff --git a/app/views.py b/app/views.py index e4f5032..1507bf3 100644 --- a/app/views.py +++ b/app/views.py @@ -1,12 +1,14 @@ import os import glob -from flask import render_template +from flask import render_template, Blueprint, current_app, request from app.forms import ScrapingForm from app.util import get_size from app.config import load_config from app.api import scraper as scraper# Import the scraper instance from app.logging_config import get_logger +from app.analysis import load_data, generate_statistics, plot_activity_distribution + from app.state import log_file_name @@ -15,6 +17,8 @@ print(f"A imported log_file_name: {log_file_name}") config = load_config() logger = get_logger() +views_bp = Blueprint("views", __name__) + def register_views(app): @app.route('/') def index(): @@ -79,4 +83,46 @@ def register_views(app): files = {"data": data_files_info, "log": log_files_info} - return render_template('download_results.html', files=files) \ No newline at end of file + return render_template('download_results.html', files=files) + + views_bp = Blueprint("views", __name__) + + @views_bp.route("/data-visualization", methods=["GET", "POST"]) + def data_visualization(): + """Route to display activity statistics with a visualization.""" + data_dir = current_app.config["DATA"]["DATA_DIR"] + + # Find all available CSV files + data_files = sorted( + glob.glob(os.path.join(data_dir, "*.csv")), + key=os.path.getmtime, + reverse=True + ) + + if not data_files: + return render_template("data_visualization.html", error="No data files found.", data_files=[]) + + # Get the selected file from the dropdown (default to the latest file) + selected_file = request.form.get("data_file", data_files[0] if data_files else None) + + if selected_file and os.path.exists(selected_file): + df = load_data(selected_file) + statistics = generate_statistics(df) + + # ✅ Generate the plot and get the correct URL path + # remove app/ from the base URL + plot_url = plot_activity_distribution(df).replace("app/", "") + + else: + return render_template("data_visualization.html", error="Invalid file selection.", data_files=data_files) + + return render_template( + "data_visualization.html", + plot_url=plot_url, + statistics=statistics.to_dict(), + data_files=data_files, + selected_file=selected_file + ) + + + app.register_blueprint(views_bp)