diff --git a/app/analysis.py b/app/analysis.py
new file mode 100644
index 0000000..d0660d4
--- /dev/null
+++ b/app/analysis.py
@@ -0,0 +1,60 @@
+import os
+import pandas as pd
+import matplotlib
+matplotlib.use("Agg") # Prevents GUI-related issues in Flask
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+
+def load_data(file_path: str) -> pd.DataFrame:
+ """Loads the scraped data from a CSV file into a Pandas DataFrame."""
+ if not os.path.exists(file_path):
+ raise FileNotFoundError(f"File {file_path} not found.")
+
+ df = pd.read_csv(file_path)
+ df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
+ df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
+
+ return df
+
+def generate_statistics(df: pd.DataFrame):
+ """Generates activity statistics grouped by hour."""
+ df["hour"] = df["timestamp"].dt.hour
+ return df.groupby("hour").size()
+
+def plot_activity_distribution(df: pd.DataFrame, output_path="activity_distribution.png"):
+ """Plots user activity distribution and saves the figure."""
+
+ # Ensure the directory exists
+ static_dir = os.path.join("app", "static", "plots")
+ output_path = os.path.join(static_dir, output_path)
+ os.makedirs(static_dir, exist_ok=True)
+
+ # Convert timestamp column to datetime (if not already)
+ if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
+ df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
+
+ df["hour"] = df["timestamp"].dt.hour
+ activity_counts = df.groupby("hour").size().reset_index(name="count")
+
+ # Use non-GUI backend for Matplotlib
+ plt.figure(figsize=(10, 5))
+
+ # Fix Seaborn Warning: Assign `hue` explicitly
+ sns.barplot(x="hour", y="count", data=activity_counts, hue="hour", palette="Blues", legend=False)
+
+ plt.xlabel("Hour of the Day")
+ plt.ylabel("Activity Count")
+ plt.title("User Activity Distribution")
+ plt.xticks(range(0, 24))
+
+ # Save the plot file safely
+ plt.savefig(output_path, bbox_inches="tight")
+ plt.close()
+
+ # Verify the file exists after saving
+ if not os.path.exists(output_path):
+ raise FileNotFoundError(f"Plot could not be saved to {output_path}")
+
+ return output_path
\ No newline at end of file
diff --git a/app/app.py b/app/app.py
index b9afd09..776a397 100644
--- a/app/app.py
+++ b/app/app.py
@@ -6,6 +6,7 @@ from app.views import register_views
from app.api import register_api
from app.config import load_config
from app.filters import register_filters
+from app.analysis import generate_statistics
def init_app():
config = load_config()
diff --git a/app/templates/data_visualization.html b/app/templates/data_visualization.html
new file mode 100644
index 0000000..d047a27
--- /dev/null
+++ b/app/templates/data_visualization.html
@@ -0,0 +1,68 @@
+{% extends 'base.html' %}
+
+{% block content %}
+User Activity Distribution
+ Selected File: {{ selected_file.split('/')[-1] }}
+
+
Activity Statistics
+
+
+
+
+
+
+
+ {% for hour, count in statistics.items() %}
+ Hour
+ Activity Count
+
+
+ {% endfor %}
+
+ {{ hour }}
+ {{ count }}
+