feature/analysis-form #10
@@ -22,13 +22,24 @@ class BasePlotAnalysis(BaseAnalysis, ABC):
|
||||
- Transformation
|
||||
- Plot generation
|
||||
- Memory cleanup
|
||||
"""
|
||||
|
||||
Attributes:
|
||||
plot_filename (str): The filename for the output plot.
|
||||
alt_text (str): The alt text for the plot.
|
||||
"""
|
||||
plot_filename = "default_plot.png"
|
||||
alt_text = "Default Alt Text"
|
||||
|
||||
def execute(self, df: pd.DataFrame):
|
||||
"""Executes the full analysis pipeline"""
|
||||
"""
|
||||
Executes the full analysis pipeline.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
str: HTML img tag containing the URL to the generated plot.
|
||||
"""
|
||||
df = prepare_data(df) # Step 1: Prepare data
|
||||
|
||||
paths = mk_plotdir(self.plot_filename)
|
||||
@@ -45,10 +56,23 @@ class BasePlotAnalysis(BaseAnalysis, ABC):
|
||||
|
||||
@abstractmethod
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Subclasses must define how they transform the data"""
|
||||
"""
|
||||
Subclasses must define how they transform the data.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Subclasses must define how they generate the plot"""
|
||||
"""
|
||||
Subclasses must define how they generate the plot.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The transformed DataFrame containing data to be plotted.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -18,13 +18,24 @@ class BasePlotlyAnalysis(BaseAnalysis, ABC):
|
||||
- Transformation
|
||||
- Plot generation
|
||||
- Memory cleanup
|
||||
"""
|
||||
|
||||
Attributes:
|
||||
plot_filename (str): The filename for the output plot.
|
||||
alt_text (str): The alt text for the plot.
|
||||
"""
|
||||
plot_filename = "default_plot.html"
|
||||
alt_text = "Default Alt Text"
|
||||
|
||||
def execute(self, df: pd.DataFrame):
|
||||
"""Executes the full analysis pipeline"""
|
||||
"""
|
||||
Executes the full analysis pipeline.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
str: HTML iframe containing the URL to the generated plot.
|
||||
"""
|
||||
df = prepare_data(df) # Step 1: Prepare data
|
||||
|
||||
paths = mk_plotdir(self.plot_filename)
|
||||
@@ -41,10 +52,23 @@ class BasePlotlyAnalysis(BaseAnalysis, ABC):
|
||||
|
||||
@abstractmethod
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Subclasses must define how they transform the data"""
|
||||
"""
|
||||
Subclasses must define how they transform the data.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Subclasses must define how they generate the plot"""
|
||||
"""
|
||||
Subclasses must define how they generate the plot.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The transformed DataFrame containing data to be plotted.
|
||||
"""
|
||||
pass
|
||||
@@ -3,6 +3,20 @@ import os
|
||||
import pandas as pd
|
||||
|
||||
def prepare_data(df):
|
||||
"""
|
||||
Prepares the data for analysis by converting timestamps, calculating previous timestamps,
|
||||
determining active status, and extracting the hour from the timestamp.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The processed DataFrame with additional columns for analysis.
|
||||
|
||||
The returned DataFrame will have the following columns:
|
||||
user_id name last_action status timestamp prev_timestamp was_active hour
|
||||
0 12345678 UserName 2025-02-08 17:58:11 Okay 2025-02-08 18:09:41.867984056 NaT False 18
|
||||
"""
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
||||
df["last_action"] = pd.to_datetime(df["last_action"])
|
||||
df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1)
|
||||
@@ -12,6 +26,15 @@ def prepare_data(df):
|
||||
return df
|
||||
|
||||
def mk_plotdir(output_filename):
|
||||
"""
|
||||
Creates the directory for storing plots and generates the output path and URL for the plot.
|
||||
|
||||
Parameters:
|
||||
output_filename (str): The filename for the output plot.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the output path and plot URL.
|
||||
"""
|
||||
plots_dir = os.path.join(current_app.root_path, "static", "plots")
|
||||
os.makedirs(plots_dir, exist_ok=True)
|
||||
|
||||
|
||||
@@ -12,18 +12,40 @@ matplotlib.use('Agg')
|
||||
logger = get_logger()
|
||||
|
||||
class PlotTopActiveUsers(BasePlotAnalysis):
|
||||
"""
|
||||
Class for analyzing the most active users and generating a bar chart.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the analysis.
|
||||
description (str): A brief description of the analysis.
|
||||
plot_filename (str): The filename for the output plot.
|
||||
note (str): Additional notes for the analysis.
|
||||
"""
|
||||
name = "Top Active Users"
|
||||
description = "Displays the most active users based on their number of recorded actions."
|
||||
plot_filename = "bar_activity-per-user.png"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the bar plot"""
|
||||
"""
|
||||
Transform data for the bar plot.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame with active counts per user.
|
||||
"""
|
||||
df = df[df['was_active'] == True].groupby('name').size().reset_index(name='active_count')
|
||||
return df
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Generate bar plot"""
|
||||
"""
|
||||
Generate bar plot.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The transformed DataFrame containing active counts per user.
|
||||
"""
|
||||
# create a barplot from active counts sorted by active count
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.barplot(x='active_count', y='name', data=df.sort_values('active_count', ascending=False))
|
||||
|
||||
@@ -9,17 +9,40 @@ import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
|
||||
class PlotPeakHours(BasePlotAnalysis):
|
||||
"""
|
||||
Class for analyzing peak activity hours and generating a bar chart.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the analysis.
|
||||
description (str): A brief description of the analysis.
|
||||
plot_filename (str): The filename for the output plot.
|
||||
note (str): Additional notes for the analysis.
|
||||
"""
|
||||
|
||||
name = "Peak Hours Analysis"
|
||||
description = "Identifies peak activity hours using a bar chart."
|
||||
plot_filename = "peak_hours.png"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data to add was_active column and extract peak hours"""
|
||||
"""
|
||||
Transform data to add was_active column and extract peak hours. See data_utils.py.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame with additional columns for analysis.
|
||||
"""
|
||||
return df
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Generate bar chart for peak hours"""
|
||||
"""
|
||||
Generate bar chart for peak hours.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The transformed DataFrame containing user activity data.
|
||||
"""
|
||||
peak_hours = df[df["was_active"]]["hour"].value_counts().sort_index()
|
||||
|
||||
plt.figure(figsize=(12, 5))
|
||||
|
||||
@@ -7,13 +7,30 @@ import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
|
||||
class PlotActivityHeatmap(BasePlotAnalysis):
|
||||
"""
|
||||
Class for analyzing user activity trends over multiple days and generating a heatmap.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the analysis.
|
||||
description (str): A brief description of the analysis.
|
||||
plot_filename (str): The filename for the output plot.
|
||||
note (str): Additional notes for the analysis.
|
||||
"""
|
||||
name = "Activity Heatmap"
|
||||
description = "Displays user activity trends over multiple days using a heatmap. Generates a downloadable PNG image."
|
||||
plot_filename = "activity_heatmap.png"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the heatmap"""
|
||||
"""
|
||||
Transform data for the heatmap.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame with activity counts by hour.
|
||||
"""
|
||||
active_counts = df[df['was_active']].pivot_table(
|
||||
index='name',
|
||||
columns='hour',
|
||||
@@ -25,7 +42,12 @@ class PlotActivityHeatmap(BasePlotAnalysis):
|
||||
return active_counts.sort_values(by='total_active_minutes', ascending=False)
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Generate heatmap plot"""
|
||||
"""
|
||||
Generate heatmap plot.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
|
||||
"""
|
||||
plt.figure(figsize=(12, 8))
|
||||
sns.heatmap(df.loc[:, df.columns != 'total_active_minutes'], cmap='viridis', cbar_kws={'label': 'Count of was_active == True'})
|
||||
plt.xlabel('Hour of Day')
|
||||
|
||||
@@ -12,13 +12,30 @@ matplotlib.use('Agg')
|
||||
logger = get_logger()
|
||||
|
||||
class PlotLineActivityAllUsers(BasePlotAnalysis):
|
||||
"""
|
||||
Class for analyzing user activity trends over multiple days and generating a line graph.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the analysis.
|
||||
description (str): A brief description of the analysis.
|
||||
plot_filename (str): The filename for the output plot.
|
||||
note (str): Additional notes for the analysis.
|
||||
"""
|
||||
name = "Activity Line Graph (All Users)"
|
||||
description = "This analysis shows the activity line graph for all users. Gneerates a downloadable PNG image."
|
||||
plot_filename = "line_activity-all_users.png"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the bar plot"""
|
||||
"""
|
||||
Transform data for the line plot.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame with activity counts by hour.
|
||||
"""
|
||||
df['hour'] = df['timestamp'].dt.hour
|
||||
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
|
||||
df['total_active_minutes'] = df.sum(axis=1)
|
||||
@@ -30,7 +47,12 @@ class PlotLineActivityAllUsers(BasePlotAnalysis):
|
||||
return df
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
# Plot activity throughout the day for each user with the cumulative sum having a bold line in one plot
|
||||
"""
|
||||
Generate line graph for user activity throughout the day.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
|
||||
"""
|
||||
plt.figure(figsize=(12, 6))
|
||||
|
||||
# Plot each user's activity
|
||||
|
||||
@@ -10,13 +10,30 @@ from app.logging_config import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
class PlotlyActivityHeatmap(BasePlotlyAnalysis):
|
||||
"""
|
||||
Class for analyzing user activity trends over multiple days and generating an interactive heatmap.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the analysis.
|
||||
description (str): A brief description of the analysis.
|
||||
plot_filename (str): The filename for the output plot.
|
||||
note (str): Additional notes for the analysis.
|
||||
"""
|
||||
name = "Activity Heatmap (Interactive)"
|
||||
description = "Displays user activity trends over multiple days using an interactive heatmap."
|
||||
plot_filename = "activity_heatmap.html"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the heatmap"""
|
||||
"""
|
||||
Transform data for the heatmap.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame with activity counts by hour.
|
||||
"""
|
||||
df['hour'] = df['timestamp'].dt.hour
|
||||
active_counts = df[df['was_active']].pivot_table(
|
||||
index='name',
|
||||
@@ -34,7 +51,12 @@ class PlotlyActivityHeatmap(BasePlotlyAnalysis):
|
||||
return active_counts
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Generate heatmap plot"""
|
||||
"""
|
||||
Generate heatmap plot.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
|
||||
"""
|
||||
df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0)
|
||||
|
||||
# Create a Plotly heatmap
|
||||
|
||||
@@ -9,13 +9,30 @@ from app.logging_config import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
|
||||
"""
|
||||
Class for analyzing user activity trends over multiple days and generating an interactive line graph.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the analysis.
|
||||
description (str): A brief description of the analysis.
|
||||
plot_filename (str): The filename for the output plot.
|
||||
note (str): Additional notes for the analysis.
|
||||
"""
|
||||
name = "Activity Line Graph (All Users, Interactive)"
|
||||
description = "This analysis shows the activity line graph for all users. The graph is interactive and can be used to explore the data."
|
||||
plot_filename = "line_activity-all_users.html"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the line plot"""
|
||||
"""
|
||||
Transform data for the line plot.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame with activity counts by hour.
|
||||
"""
|
||||
df['hour'] = df['timestamp'].dt.hour
|
||||
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
|
||||
df['total_active_minutes'] = df.sum(axis=1)
|
||||
@@ -27,7 +44,12 @@ class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
|
||||
return df
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
# Create a Plotly figure
|
||||
"""
|
||||
Generate interactive line graph for user activity throughout the day.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
|
||||
"""
|
||||
self.fig = make_subplots()
|
||||
|
||||
# Plot each user's activity
|
||||
@@ -37,7 +59,6 @@ class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
|
||||
else:
|
||||
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index))
|
||||
|
||||
# Update layout
|
||||
self.fig.update_layout(
|
||||
title='User Activity Throughout the Day',
|
||||
xaxis_title='Hour of Day',
|
||||
|
||||
Reference in New Issue
Block a user