adds data analysis
This commit is contained in:
60
app/analysis.py
Normal file
60
app/analysis.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import matplotlib
|
||||
matplotlib.use("Agg") # Prevents GUI-related issues in Flask
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
|
||||
|
||||
def load_data(file_path: str) -> pd.DataFrame:
|
||||
"""Loads the scraped data from a CSV file into a Pandas DataFrame."""
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File {file_path} not found.")
|
||||
|
||||
df = pd.read_csv(file_path)
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
||||
df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
|
||||
|
||||
return df
|
||||
|
||||
def generate_statistics(df: pd.DataFrame):
|
||||
"""Generates activity statistics grouped by hour."""
|
||||
df["hour"] = df["timestamp"].dt.hour
|
||||
return df.groupby("hour").size()
|
||||
|
||||
def plot_activity_distribution(df: pd.DataFrame, output_path="activity_distribution.png"):
|
||||
"""Plots user activity distribution and saves the figure."""
|
||||
|
||||
# Ensure the directory exists
|
||||
static_dir = os.path.join("app", "static", "plots")
|
||||
output_path = os.path.join(static_dir, output_path)
|
||||
os.makedirs(static_dir, exist_ok=True)
|
||||
|
||||
# Convert timestamp column to datetime (if not already)
|
||||
if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
||||
|
||||
df["hour"] = df["timestamp"].dt.hour
|
||||
activity_counts = df.groupby("hour").size().reset_index(name="count")
|
||||
|
||||
# Use non-GUI backend for Matplotlib
|
||||
plt.figure(figsize=(10, 5))
|
||||
|
||||
# Fix Seaborn Warning: Assign `hue` explicitly
|
||||
sns.barplot(x="hour", y="count", data=activity_counts, hue="hour", palette="Blues", legend=False)
|
||||
|
||||
plt.xlabel("Hour of the Day")
|
||||
plt.ylabel("Activity Count")
|
||||
plt.title("User Activity Distribution")
|
||||
plt.xticks(range(0, 24))
|
||||
|
||||
# Save the plot file safely
|
||||
plt.savefig(output_path, bbox_inches="tight")
|
||||
plt.close()
|
||||
|
||||
# Verify the file exists after saving
|
||||
if not os.path.exists(output_path):
|
||||
raise FileNotFoundError(f"Plot could not be saved to {output_path}")
|
||||
|
||||
return output_path
|
||||
Reference in New Issue
Block a user