60 lines
2.0 KiB
Python
60 lines
2.0 KiB
Python
import os
|
|
import pandas as pd
|
|
import matplotlib
|
|
matplotlib.use("Agg") # Prevents GUI-related issues in Flask
|
|
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
|
|
|
|
def load_data(file_path: str) -> pd.DataFrame:
|
|
"""Loads the scraped data from a CSV file into a Pandas DataFrame."""
|
|
if not os.path.exists(file_path):
|
|
raise FileNotFoundError(f"File {file_path} not found.")
|
|
|
|
df = pd.read_csv(file_path)
|
|
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
|
df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
|
|
|
|
return df
|
|
|
|
def generate_statistics(df: pd.DataFrame):
|
|
"""Generates activity statistics grouped by hour."""
|
|
df["hour"] = df["timestamp"].dt.hour
|
|
return df.groupby("hour").size()
|
|
|
|
def plot_activity_distribution(df: pd.DataFrame, output_path="activity_distribution.png"):
|
|
"""Plots user activity distribution and saves the figure."""
|
|
|
|
# Ensure the directory exists
|
|
static_dir = os.path.join("app", "static", "plots")
|
|
output_path = os.path.join(static_dir, output_path)
|
|
os.makedirs(static_dir, exist_ok=True)
|
|
|
|
# Convert timestamp column to datetime (if not already)
|
|
if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
|
|
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
|
|
|
df["hour"] = df["timestamp"].dt.hour
|
|
activity_counts = df.groupby("hour").size().reset_index(name="count")
|
|
|
|
# Use non-GUI backend for Matplotlib
|
|
plt.figure(figsize=(10, 5))
|
|
|
|
# Fix Seaborn Warning: Assign `hue` explicitly
|
|
sns.barplot(x="hour", y="count", data=activity_counts, hue="hour", palette="Blues", legend=False)
|
|
|
|
plt.xlabel("Hour of the Day")
|
|
plt.ylabel("Activity Count")
|
|
plt.title("User Activity Distribution")
|
|
plt.xticks(range(0, 24))
|
|
|
|
# Save the plot file safely
|
|
plt.savefig(output_path, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
# Verify the file exists after saving
|
|
if not os.path.exists(output_path):
|
|
raise FileNotFoundError(f"Plot could not be saved to {output_path}")
|
|
|
|
return output_path |