feature/analysis-form #10

Merged
mischbeck merged 4 commits from feature/analysis-form into master 2025-02-10 03:11:58 +01:00
9 changed files with 225 additions and 22 deletions
Showing only changes of commit 595237c172 - Show all commits

View File

@@ -22,13 +22,24 @@ class BasePlotAnalysis(BaseAnalysis, ABC):
- Transformation - Transformation
- Plot generation - Plot generation
- Memory cleanup - Memory cleanup
Attributes:
plot_filename (str): The filename for the output plot.
alt_text (str): The alt text for the plot.
""" """
plot_filename = "default_plot.png" plot_filename = "default_plot.png"
alt_text = "Default Alt Text" alt_text = "Default Alt Text"
def execute(self, df: pd.DataFrame): def execute(self, df: pd.DataFrame):
"""Executes the full analysis pipeline""" """
Executes the full analysis pipeline.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
str: HTML img tag containing the URL to the generated plot.
"""
df = prepare_data(df) # Step 1: Prepare data df = prepare_data(df) # Step 1: Prepare data
paths = mk_plotdir(self.plot_filename) paths = mk_plotdir(self.plot_filename)
@@ -45,10 +56,23 @@ class BasePlotAnalysis(BaseAnalysis, ABC):
@abstractmethod @abstractmethod
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Subclasses must define how they transform the data""" """
Subclasses must define how they transform the data.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame.
"""
pass pass
@abstractmethod @abstractmethod
def plot_data(self, df: pd.DataFrame): def plot_data(self, df: pd.DataFrame):
"""Subclasses must define how they generate the plot""" """
Subclasses must define how they generate the plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing data to be plotted.
"""
pass pass

View File

@@ -18,13 +18,24 @@ class BasePlotlyAnalysis(BaseAnalysis, ABC):
- Transformation - Transformation
- Plot generation - Plot generation
- Memory cleanup - Memory cleanup
Attributes:
plot_filename (str): The filename for the output plot.
alt_text (str): The alt text for the plot.
""" """
plot_filename = "default_plot.html" plot_filename = "default_plot.html"
alt_text = "Default Alt Text" alt_text = "Default Alt Text"
def execute(self, df: pd.DataFrame): def execute(self, df: pd.DataFrame):
"""Executes the full analysis pipeline""" """
Executes the full analysis pipeline.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
str: HTML iframe containing the URL to the generated plot.
"""
df = prepare_data(df) # Step 1: Prepare data df = prepare_data(df) # Step 1: Prepare data
paths = mk_plotdir(self.plot_filename) paths = mk_plotdir(self.plot_filename)
@@ -41,10 +52,23 @@ class BasePlotlyAnalysis(BaseAnalysis, ABC):
@abstractmethod @abstractmethod
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Subclasses must define how they transform the data""" """
Subclasses must define how they transform the data.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame.
"""
pass pass
@abstractmethod @abstractmethod
def plot_data(self, df: pd.DataFrame): def plot_data(self, df: pd.DataFrame):
"""Subclasses must define how they generate the plot""" """
Subclasses must define how they generate the plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing data to be plotted.
"""
pass pass

View File

@@ -3,6 +3,20 @@ import os
import pandas as pd import pandas as pd
def prepare_data(df): def prepare_data(df):
"""
Prepares the data for analysis by converting timestamps, calculating previous timestamps,
determining active status, and extracting the hour from the timestamp.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The processed DataFrame with additional columns for analysis.
The returned DataFrame will have the following columns:
user_id name last_action status timestamp prev_timestamp was_active hour
0 12345678 UserName 2025-02-08 17:58:11 Okay 2025-02-08 18:09:41.867984056 NaT False 18
"""
df["timestamp"] = pd.to_datetime(df["timestamp"]) df["timestamp"] = pd.to_datetime(df["timestamp"])
df["last_action"] = pd.to_datetime(df["last_action"]) df["last_action"] = pd.to_datetime(df["last_action"])
df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1) df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1)
@@ -12,6 +26,15 @@ def prepare_data(df):
return df return df
def mk_plotdir(output_filename): def mk_plotdir(output_filename):
"""
Creates the directory for storing plots and generates the output path and URL for the plot.
Parameters:
output_filename (str): The filename for the output plot.
Returns:
dict: A dictionary containing the output path and plot URL.
"""
plots_dir = os.path.join(current_app.root_path, "static", "plots") plots_dir = os.path.join(current_app.root_path, "static", "plots")
os.makedirs(plots_dir, exist_ok=True) os.makedirs(plots_dir, exist_ok=True)
@@ -19,4 +42,4 @@ def mk_plotdir(output_filename):
plot_url = url_for('static', filename=f'plots/{output_filename}', _external=True) plot_url = url_for('static', filename=f'plots/{output_filename}', _external=True)
return {'output_path': output_path, 'plot_url': plot_url} return {'output_path': output_path, 'plot_url': plot_url}

View File

@@ -12,18 +12,40 @@ matplotlib.use('Agg')
logger = get_logger() logger = get_logger()
class PlotTopActiveUsers(BasePlotAnalysis): class PlotTopActiveUsers(BasePlotAnalysis):
"""
Class for analyzing the most active users and generating a bar chart.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Top Active Users" name = "Top Active Users"
description = "Displays the most active users based on their number of recorded actions." description = "Displays the most active users based on their number of recorded actions."
plot_filename = "bar_activity-per-user.png" plot_filename = "bar_activity-per-user.png"
note = "" note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the bar plot""" """
Transform data for the bar plot.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with active counts per user.
"""
df = df[df['was_active'] == True].groupby('name').size().reset_index(name='active_count') df = df[df['was_active'] == True].groupby('name').size().reset_index(name='active_count')
return df return df
def plot_data(self, df: pd.DataFrame): def plot_data(self, df: pd.DataFrame):
"""Generate bar plot""" """
Generate bar plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing active counts per user.
"""
# create a barplot from active counts sorted by active count # create a barplot from active counts sorted by active count
plt.figure(figsize=(10, 6)) plt.figure(figsize=(10, 6))
sns.barplot(x='active_count', y='name', data=df.sort_values('active_count', ascending=False)) sns.barplot(x='active_count', y='name', data=df.sort_values('active_count', ascending=False))

View File

@@ -9,17 +9,40 @@ import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
class PlotPeakHours(BasePlotAnalysis): class PlotPeakHours(BasePlotAnalysis):
"""
Class for analyzing peak activity hours and generating a bar chart.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Peak Hours Analysis" name = "Peak Hours Analysis"
description = "Identifies peak activity hours using a bar chart." description = "Identifies peak activity hours using a bar chart."
plot_filename = "peak_hours.png" plot_filename = "peak_hours.png"
note = "" note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data to add was_active column and extract peak hours""" """
Transform data to add was_active column and extract peak hours. See data_utils.py.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with additional columns for analysis.
"""
return df return df
def plot_data(self, df: pd.DataFrame): def plot_data(self, df: pd.DataFrame):
"""Generate bar chart for peak hours""" """
Generate bar chart for peak hours.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing user activity data.
"""
peak_hours = df[df["was_active"]]["hour"].value_counts().sort_index() peak_hours = df[df["was_active"]]["hour"].value_counts().sort_index()
plt.figure(figsize=(12, 5)) plt.figure(figsize=(12, 5))

View File

@@ -7,13 +7,30 @@ import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
class PlotActivityHeatmap(BasePlotAnalysis): class PlotActivityHeatmap(BasePlotAnalysis):
"""
Class for analyzing user activity trends over multiple days and generating a heatmap.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Activity Heatmap" name = "Activity Heatmap"
description = "Displays user activity trends over multiple days using a heatmap. Generates a downloadable PNG image." description = "Displays user activity trends over multiple days using a heatmap. Generates a downloadable PNG image."
plot_filename = "activity_heatmap.png" plot_filename = "activity_heatmap.png"
note = "" note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the heatmap""" """
Transform data for the heatmap.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with activity counts by hour.
"""
active_counts = df[df['was_active']].pivot_table( active_counts = df[df['was_active']].pivot_table(
index='name', index='name',
columns='hour', columns='hour',
@@ -25,7 +42,12 @@ class PlotActivityHeatmap(BasePlotAnalysis):
return active_counts.sort_values(by='total_active_minutes', ascending=False) return active_counts.sort_values(by='total_active_minutes', ascending=False)
def plot_data(self, df: pd.DataFrame): def plot_data(self, df: pd.DataFrame):
"""Generate heatmap plot""" """
Generate heatmap plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
"""
plt.figure(figsize=(12, 8)) plt.figure(figsize=(12, 8))
sns.heatmap(df.loc[:, df.columns != 'total_active_minutes'], cmap='viridis', cbar_kws={'label': 'Count of was_active == True'}) sns.heatmap(df.loc[:, df.columns != 'total_active_minutes'], cmap='viridis', cbar_kws={'label': 'Count of was_active == True'})
plt.xlabel('Hour of Day') plt.xlabel('Hour of Day')

View File

@@ -12,13 +12,30 @@ matplotlib.use('Agg')
logger = get_logger() logger = get_logger()
class PlotLineActivityAllUsers(BasePlotAnalysis): class PlotLineActivityAllUsers(BasePlotAnalysis):
"""
Class for analyzing user activity trends over multiple days and generating a line graph.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Activity Line Graph (All Users)" name = "Activity Line Graph (All Users)"
description = "This analysis shows the activity line graph for all users. Gneerates a downloadable PNG image." description = "This analysis shows the activity line graph for all users. Gneerates a downloadable PNG image."
plot_filename = "line_activity-all_users.png" plot_filename = "line_activity-all_users.png"
note = "" note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the bar plot""" """
Transform data for the line plot.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with activity counts by hour.
"""
df['hour'] = df['timestamp'].dt.hour df['hour'] = df['timestamp'].dt.hour
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0) df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
df['total_active_minutes'] = df.sum(axis=1) df['total_active_minutes'] = df.sum(axis=1)
@@ -30,7 +47,12 @@ class PlotLineActivityAllUsers(BasePlotAnalysis):
return df return df
def plot_data(self, df: pd.DataFrame): def plot_data(self, df: pd.DataFrame):
# Plot activity throughout the day for each user with the cumulative sum having a bold line in one plot """
Generate line graph for user activity throughout the day.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
"""
plt.figure(figsize=(12, 6)) plt.figure(figsize=(12, 6))
# Plot each user's activity # Plot each user's activity

View File

@@ -10,13 +10,30 @@ from app.logging_config import get_logger
logger = get_logger() logger = get_logger()
class PlotlyActivityHeatmap(BasePlotlyAnalysis): class PlotlyActivityHeatmap(BasePlotlyAnalysis):
"""
Class for analyzing user activity trends over multiple days and generating an interactive heatmap.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Activity Heatmap (Interactive)" name = "Activity Heatmap (Interactive)"
description = "Displays user activity trends over multiple days using an interactive heatmap." description = "Displays user activity trends over multiple days using an interactive heatmap."
plot_filename = "activity_heatmap.html" plot_filename = "activity_heatmap.html"
note = "" note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the heatmap""" """
Transform data for the heatmap.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with activity counts by hour.
"""
df['hour'] = df['timestamp'].dt.hour df['hour'] = df['timestamp'].dt.hour
active_counts = df[df['was_active']].pivot_table( active_counts = df[df['was_active']].pivot_table(
index='name', index='name',
@@ -34,7 +51,12 @@ class PlotlyActivityHeatmap(BasePlotlyAnalysis):
return active_counts return active_counts
def plot_data(self, df: pd.DataFrame): def plot_data(self, df: pd.DataFrame):
"""Generate heatmap plot""" """
Generate heatmap plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
"""
df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0) df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0)
# Create a Plotly heatmap # Create a Plotly heatmap

View File

@@ -9,13 +9,30 @@ from app.logging_config import get_logger
logger = get_logger() logger = get_logger()
class PlotlyLineActivityAllUsers(BasePlotlyAnalysis): class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
"""
Class for analyzing user activity trends over multiple days and generating an interactive line graph.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Activity Line Graph (All Users, Interactive)" name = "Activity Line Graph (All Users, Interactive)"
description = "This analysis shows the activity line graph for all users. The graph is interactive and can be used to explore the data." description = "This analysis shows the activity line graph for all users. The graph is interactive and can be used to explore the data."
plot_filename = "line_activity-all_users.html" plot_filename = "line_activity-all_users.html"
note = "" note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the line plot""" """
Transform data for the line plot.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with activity counts by hour.
"""
df['hour'] = df['timestamp'].dt.hour df['hour'] = df['timestamp'].dt.hour
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0) df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
df['total_active_minutes'] = df.sum(axis=1) df['total_active_minutes'] = df.sum(axis=1)
@@ -27,7 +44,12 @@ class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
return df return df
def plot_data(self, df: pd.DataFrame): def plot_data(self, df: pd.DataFrame):
# Create a Plotly figure """
Generate interactive line graph for user activity throughout the day.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
"""
self.fig = make_subplots() self.fig = make_subplots()
# Plot each user's activity # Plot each user's activity
@@ -37,7 +59,6 @@ class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
else: else:
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index)) self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index))
# Update layout
self.fig.update_layout( self.fig.update_layout(
title='User Activity Throughout the Day', title='User Activity Throughout the Day',
xaxis_title='Hour of Day', xaxis_title='Hour of Day',