From ea55c7ad6dfac65fad4deef3cad66915e138a161 Mon Sep 17 00:00:00 2001 From: Michael Beck Date: Mon, 10 Feb 2025 03:05:30 +0100 Subject: [PATCH] adds analysis plugin guide in readme --- README.md | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/README.md b/README.md index 3f1d41f..2c17066 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,129 @@ flask run 2. Open your web browser and navigate to `http://127.0.0.1:5000/`. +## Adding an Analysis Module + +This guide explains how to add a new analysis module using the provided base classes: `BasePlotlyAnalysis` and `BasePlotAnalysis`. These base classes ensure a structured workflow for data preparation, transformation, and visualization. + +### 1. Choosing the Right Base Class +Before implementing an analysis module, decide on the appropriate base class: +- **`BasePlotlyAnalysis`**: Use this for interactive plots with **Plotly** that generate **HTML** outputs. +- **`BasePlotAnalysis`**: Use this for static plots with **Matplotlib/Seaborn** that generate **PNG** image files. +- **`BaseAnalysis`**: Use this for any other type of analysis with **text** or **HTML** output for max flexibility. + +### 2. Naming Convention +Follow a structured naming convention for consistency: +- **File name:** `plotly_.py` for Plotly analyses, `plot_.py` for Matplotlib-based analyses. +- **Class name:** Use PascalCase and a descriptive suffix: + - Example for Plotly: `PlotlyActivityHeatmap` + - Example for Matplotlib: `PlotUserSessionDuration` + +### 3. Data Structure +The following DataFrame structure is passed to analysis classes: + +| user_id | name | last_action | status | timestamp | prev_timestamp | was_active | hour | +|----------|-----------|----------------------|--------|-----------------------------|----------------|------------|------| +| XXXXXXX | UserA | 2025-02-08 17:58:11 | Okay | 2025-02-08 18:09:41.867984056 | NaT | False | 18 | +| XXXXXXX | UserB | 2025-02-08 17:00:10 | Okay | 2025-02-08 18:09:42.427846909 | NaT | False | 18 | +| XXXXXXX | UserC | 2025-02-08 16:31:52 | Okay | 2025-02-08 18:09:42.823201895 | NaT | False | 18 | +| XXXXXXX | UserD | 2025-02-06 23:57:24 | Okay | 2025-02-08 18:09:43.179914951 | NaT | False | 18 | +| XXXXXXX | UserE | 2025-02-06 06:33:40 | Okay | 2025-02-08 18:09:43.434650898 | NaT | False | 18 | + +Note that the first X rows, depending on the number of the members, will always contain empty values in prev_timestamp as there has to be a previous timestamp .... + +### 4. Implementing an Analysis Module +Each analysis module should define two key methods: +- `transform_data(self, df: pd.DataFrame) -> pd.DataFrame`: Processes the input data for plotting. +- `plot_data(self, df: pd.DataFrame)`: Generates and saves the plot. + +#### Example: Adding a Plotly Heatmap +Below is an example of how to create a new analysis module using `BasePlotlyAnalysis`. + +```python +import pandas as pd +import plotly.graph_objects as go +from .basePlotlyAnalysis import BasePlotlyAnalysis + +class PlotlyActivityHeatmap(BasePlotlyAnalysis): + """ + Displays user activity trends over multiple days using an interactive heatmap. + """ + name = "Activity Heatmap (Interactive)" + description = "Displays user activity trends over multiple days." + plot_filename = "activity_heatmap.html" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + df['hour'] = df['timestamp'].dt.hour + active_counts = df[df['was_active']].pivot_table( + index='name', + columns='hour', + values='was_active', + aggfunc='sum', + fill_value=0 + ).reset_index() + return active_counts.melt(id_vars='name', var_name='hour', value_name='activity_count') + + def plot_data(self, df: pd.DataFrame): + df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0) + self.fig = go.Figure(data=go.Heatmap( + z=df.values, x=df.columns, y=df.index, colorscale='Viridis', + colorbar=dict(title='Activity Count') + )) + self.fig.update_layout(title='User Activity Heatmap', xaxis_title='Hour', yaxis_title='User') +``` + +#### Example: Adding a Static Matplotlib Plot +Below is an example of a Matplotlib-based analysis module using `BasePlotAnalysis`. + +```python +import pandas as pd +import matplotlib.pyplot as plt +from .basePlotAnalysis import BasePlotAnalysis + +class PlotUserSessionDuration(BasePlotAnalysis): + """ + Displays a histogram of user session durations. + """ + name = "User Session Duration Histogram" + description = "Histogram of session durations." + plot_filename = "session_duration.png" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + df['session_duration'] = (df['last_action'] - df['timestamp']).dt.total_seconds() + return df + + def plot_data(self, df: pd.DataFrame): + plt.figure(figsize=(10, 6)) + plt.hist(df['session_duration'].dropna(), bins=30, edgecolor='black') + plt.xlabel('Session Duration (seconds)') + plt.ylabel('Frequency') + plt.title('User Session Duration Histogram') +``` + +### 5. Registering the Module +Once you have created your analysis module, it will be automatically discovered by `load_analysis_modules()`, provided it is placed in the correct directory. + +### 6. Running the Analysis +To execute the analysis, pass a Pandas DataFrame to its `execute` method: +```python +from app.analysis.plotly_activity_heatmap import PlotlyActivityHeatmap +analysis = PlotlyActivityHeatmap() +result_html = analysis.execute(df) +print(result_html) # Returns the HTML for embedding the plot +``` + +### Summary +- Choose the appropriate base class (`BasePlotlyAnalysis` or `BasePlotAnalysis`). +- Follow the naming convention (`plotly_.py` for Plotly, `plot_.py` for Matplotlib). +- Implement `transform_data()` and `plot_data()` methods. +- The module will be auto-registered if placed in the correct directory. +- Execute the analysis by calling `.execute(df)`. + +This structure ensures that new analyses can be easily integrated and maintained. + + + + ## License All assets and code are under the [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/) LICENSE and in the public domain unless specified otherwise. \ No newline at end of file