From 595237c1726debf5667e16394ada87dd50701905 Mon Sep 17 00:00:00 2001 From: Michael Beck Date: Mon, 10 Feb 2025 02:28:50 +0100 Subject: [PATCH] adds docstrings --- app/analysis/basePlotAnalysis.py | 32 +++++++++++++++++--- app/analysis/basePlotlyAnalysis.py | 32 +++++++++++++++++--- app/analysis/data_utils.py | 25 ++++++++++++++- app/analysis/plot_bar_activity-user.py | 26 ++++++++++++++-- app/analysis/plot_bar_peak_hours.py | 27 +++++++++++++++-- app/analysis/plot_heat_user-activity-hour.py | 26 ++++++++++++++-- app/analysis/plot_line_activity-user.py | 26 ++++++++++++++-- app/analysis/plotly_heat_user-activity.py | 26 ++++++++++++++-- app/analysis/plotly_line_activity-user.py | 27 +++++++++++++++-- 9 files changed, 225 insertions(+), 22 deletions(-) diff --git a/app/analysis/basePlotAnalysis.py b/app/analysis/basePlotAnalysis.py index 652107d..2b6a15d 100644 --- a/app/analysis/basePlotAnalysis.py +++ b/app/analysis/basePlotAnalysis.py @@ -22,13 +22,24 @@ class BasePlotAnalysis(BaseAnalysis, ABC): - Transformation - Plot generation - Memory cleanup + + Attributes: + plot_filename (str): The filename for the output plot. + alt_text (str): The alt text for the plot. """ - plot_filename = "default_plot.png" alt_text = "Default Alt Text" def execute(self, df: pd.DataFrame): - """Executes the full analysis pipeline""" + """ + Executes the full analysis pipeline. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + str: HTML img tag containing the URL to the generated plot. + """ df = prepare_data(df) # Step 1: Prepare data paths = mk_plotdir(self.plot_filename) @@ -45,10 +56,23 @@ class BasePlotAnalysis(BaseAnalysis, ABC): @abstractmethod def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: - """Subclasses must define how they transform the data""" + """ + Subclasses must define how they transform the data. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame. + """ pass @abstractmethod def plot_data(self, df: pd.DataFrame): - """Subclasses must define how they generate the plot""" + """ + Subclasses must define how they generate the plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing data to be plotted. + """ pass diff --git a/app/analysis/basePlotlyAnalysis.py b/app/analysis/basePlotlyAnalysis.py index f9540b4..291aff3 100644 --- a/app/analysis/basePlotlyAnalysis.py +++ b/app/analysis/basePlotlyAnalysis.py @@ -18,13 +18,24 @@ class BasePlotlyAnalysis(BaseAnalysis, ABC): - Transformation - Plot generation - Memory cleanup + + Attributes: + plot_filename (str): The filename for the output plot. + alt_text (str): The alt text for the plot. """ - plot_filename = "default_plot.html" alt_text = "Default Alt Text" def execute(self, df: pd.DataFrame): - """Executes the full analysis pipeline""" + """ + Executes the full analysis pipeline. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + str: HTML iframe containing the URL to the generated plot. + """ df = prepare_data(df) # Step 1: Prepare data paths = mk_plotdir(self.plot_filename) @@ -41,10 +52,23 @@ class BasePlotlyAnalysis(BaseAnalysis, ABC): @abstractmethod def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: - """Subclasses must define how they transform the data""" + """ + Subclasses must define how they transform the data. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame. + """ pass @abstractmethod def plot_data(self, df: pd.DataFrame): - """Subclasses must define how they generate the plot""" + """ + Subclasses must define how they generate the plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing data to be plotted. + """ pass \ No newline at end of file diff --git a/app/analysis/data_utils.py b/app/analysis/data_utils.py index 0f73850..c48dfc1 100644 --- a/app/analysis/data_utils.py +++ b/app/analysis/data_utils.py @@ -3,6 +3,20 @@ import os import pandas as pd def prepare_data(df): + """ + Prepares the data for analysis by converting timestamps, calculating previous timestamps, + determining active status, and extracting the hour from the timestamp. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The processed DataFrame with additional columns for analysis. + + The returned DataFrame will have the following columns: + user_id name last_action status timestamp prev_timestamp was_active hour + 0 12345678 UserName 2025-02-08 17:58:11 Okay 2025-02-08 18:09:41.867984056 NaT False 18 + """ df["timestamp"] = pd.to_datetime(df["timestamp"]) df["last_action"] = pd.to_datetime(df["last_action"]) df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1) @@ -12,6 +26,15 @@ def prepare_data(df): return df def mk_plotdir(output_filename): + """ + Creates the directory for storing plots and generates the output path and URL for the plot. + + Parameters: + output_filename (str): The filename for the output plot. + + Returns: + dict: A dictionary containing the output path and plot URL. + """ plots_dir = os.path.join(current_app.root_path, "static", "plots") os.makedirs(plots_dir, exist_ok=True) @@ -19,4 +42,4 @@ def mk_plotdir(output_filename): plot_url = url_for('static', filename=f'plots/{output_filename}', _external=True) - return {'output_path': output_path, 'plot_url': plot_url} + return {'output_path': output_path, 'plot_url': plot_url} \ No newline at end of file diff --git a/app/analysis/plot_bar_activity-user.py b/app/analysis/plot_bar_activity-user.py index 1a8fa67..6f8eaf1 100644 --- a/app/analysis/plot_bar_activity-user.py +++ b/app/analysis/plot_bar_activity-user.py @@ -12,18 +12,40 @@ matplotlib.use('Agg') logger = get_logger() class PlotTopActiveUsers(BasePlotAnalysis): + """ + Class for analyzing the most active users and generating a bar chart. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ name = "Top Active Users" description = "Displays the most active users based on their number of recorded actions." plot_filename = "bar_activity-per-user.png" note = "" def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: - """Transform data for the bar plot""" + """ + Transform data for the bar plot. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with active counts per user. + """ df = df[df['was_active'] == True].groupby('name').size().reset_index(name='active_count') return df def plot_data(self, df: pd.DataFrame): - """Generate bar plot""" + """ + Generate bar plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing active counts per user. + """ # create a barplot from active counts sorted by active count plt.figure(figsize=(10, 6)) sns.barplot(x='active_count', y='name', data=df.sort_values('active_count', ascending=False)) diff --git a/app/analysis/plot_bar_peak_hours.py b/app/analysis/plot_bar_peak_hours.py index f4ca59e..95dff34 100644 --- a/app/analysis/plot_bar_peak_hours.py +++ b/app/analysis/plot_bar_peak_hours.py @@ -9,17 +9,40 @@ import matplotlib matplotlib.use('Agg') class PlotPeakHours(BasePlotAnalysis): + """ + Class for analyzing peak activity hours and generating a bar chart. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ + name = "Peak Hours Analysis" description = "Identifies peak activity hours using a bar chart." plot_filename = "peak_hours.png" note = "" def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: - """Transform data to add was_active column and extract peak hours""" + """ + Transform data to add was_active column and extract peak hours. See data_utils.py. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with additional columns for analysis. + """ return df def plot_data(self, df: pd.DataFrame): - """Generate bar chart for peak hours""" + """ + Generate bar chart for peak hours. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing user activity data. + """ peak_hours = df[df["was_active"]]["hour"].value_counts().sort_index() plt.figure(figsize=(12, 5)) diff --git a/app/analysis/plot_heat_user-activity-hour.py b/app/analysis/plot_heat_user-activity-hour.py index 7bd077f..05de909 100644 --- a/app/analysis/plot_heat_user-activity-hour.py +++ b/app/analysis/plot_heat_user-activity-hour.py @@ -7,13 +7,30 @@ import matplotlib matplotlib.use('Agg') class PlotActivityHeatmap(BasePlotAnalysis): + """ + Class for analyzing user activity trends over multiple days and generating a heatmap. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ name = "Activity Heatmap" description = "Displays user activity trends over multiple days using a heatmap. Generates a downloadable PNG image." plot_filename = "activity_heatmap.png" note = "" def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: - """Transform data for the heatmap""" + """ + Transform data for the heatmap. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with activity counts by hour. + """ active_counts = df[df['was_active']].pivot_table( index='name', columns='hour', @@ -25,7 +42,12 @@ class PlotActivityHeatmap(BasePlotAnalysis): return active_counts.sort_values(by='total_active_minutes', ascending=False) def plot_data(self, df: pd.DataFrame): - """Generate heatmap plot""" + """ + Generate heatmap plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing activity counts by hour. + """ plt.figure(figsize=(12, 8)) sns.heatmap(df.loc[:, df.columns != 'total_active_minutes'], cmap='viridis', cbar_kws={'label': 'Count of was_active == True'}) plt.xlabel('Hour of Day') diff --git a/app/analysis/plot_line_activity-user.py b/app/analysis/plot_line_activity-user.py index f0cf25e..d5cf072 100644 --- a/app/analysis/plot_line_activity-user.py +++ b/app/analysis/plot_line_activity-user.py @@ -12,13 +12,30 @@ matplotlib.use('Agg') logger = get_logger() class PlotLineActivityAllUsers(BasePlotAnalysis): + """ + Class for analyzing user activity trends over multiple days and generating a line graph. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ name = "Activity Line Graph (All Users)" description = "This analysis shows the activity line graph for all users. Gneerates a downloadable PNG image." plot_filename = "line_activity-all_users.png" note = "" def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: - """Transform data for the bar plot""" + """ + Transform data for the line plot. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with activity counts by hour. + """ df['hour'] = df['timestamp'].dt.hour df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0) df['total_active_minutes'] = df.sum(axis=1) @@ -30,7 +47,12 @@ class PlotLineActivityAllUsers(BasePlotAnalysis): return df def plot_data(self, df: pd.DataFrame): - # Plot activity throughout the day for each user with the cumulative sum having a bold line in one plot + """ + Generate line graph for user activity throughout the day. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing activity counts by hour. + """ plt.figure(figsize=(12, 6)) # Plot each user's activity diff --git a/app/analysis/plotly_heat_user-activity.py b/app/analysis/plotly_heat_user-activity.py index 46a24c1..a73bc3c 100644 --- a/app/analysis/plotly_heat_user-activity.py +++ b/app/analysis/plotly_heat_user-activity.py @@ -10,13 +10,30 @@ from app.logging_config import get_logger logger = get_logger() class PlotlyActivityHeatmap(BasePlotlyAnalysis): + """ + Class for analyzing user activity trends over multiple days and generating an interactive heatmap. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ name = "Activity Heatmap (Interactive)" description = "Displays user activity trends over multiple days using an interactive heatmap." plot_filename = "activity_heatmap.html" note = "" def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: - """Transform data for the heatmap""" + """ + Transform data for the heatmap. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with activity counts by hour. + """ df['hour'] = df['timestamp'].dt.hour active_counts = df[df['was_active']].pivot_table( index='name', @@ -34,7 +51,12 @@ class PlotlyActivityHeatmap(BasePlotlyAnalysis): return active_counts def plot_data(self, df: pd.DataFrame): - """Generate heatmap plot""" + """ + Generate heatmap plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing activity counts by hour. + """ df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0) # Create a Plotly heatmap diff --git a/app/analysis/plotly_line_activity-user.py b/app/analysis/plotly_line_activity-user.py index 3ba1959..cd56b80 100644 --- a/app/analysis/plotly_line_activity-user.py +++ b/app/analysis/plotly_line_activity-user.py @@ -9,13 +9,30 @@ from app.logging_config import get_logger logger = get_logger() class PlotlyLineActivityAllUsers(BasePlotlyAnalysis): + """ + Class for analyzing user activity trends over multiple days and generating an interactive line graph. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ name = "Activity Line Graph (All Users, Interactive)" description = "This analysis shows the activity line graph for all users. The graph is interactive and can be used to explore the data." plot_filename = "line_activity-all_users.html" note = "" def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: - """Transform data for the line plot""" + """ + Transform data for the line plot. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with activity counts by hour. + """ df['hour'] = df['timestamp'].dt.hour df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0) df['total_active_minutes'] = df.sum(axis=1) @@ -27,7 +44,12 @@ class PlotlyLineActivityAllUsers(BasePlotlyAnalysis): return df def plot_data(self, df: pd.DataFrame): - # Create a Plotly figure + """ + Generate interactive line graph for user activity throughout the day. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing activity counts by hour. + """ self.fig = make_subplots() # Plot each user's activity @@ -37,7 +59,6 @@ class PlotlyLineActivityAllUsers(BasePlotlyAnalysis): else: self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index)) - # Update layout self.fig.update_layout( title='User Activity Throughout the Day', xaxis_title='Hour of Day',