Compare commits
4 Commits
develop
...
feature/an
| Author | SHA1 | Date | |
|---|---|---|---|
| ea55c7ad6d | |||
| 12e7cffca1 | |||
| 595237c172 | |||
| e57869374b |
123
README.md
123
README.md
@@ -93,6 +93,129 @@ flask run
|
|||||||
|
|
||||||
2. Open your web browser and navigate to `http://127.0.0.1:5000/`.
|
2. Open your web browser and navigate to `http://127.0.0.1:5000/`.
|
||||||
|
|
||||||
|
## Adding an Analysis Module
|
||||||
|
|
||||||
|
This guide explains how to add a new analysis module using the provided base classes: `BasePlotlyAnalysis` and `BasePlotAnalysis`. These base classes ensure a structured workflow for data preparation, transformation, and visualization.
|
||||||
|
|
||||||
|
### 1. Choosing the Right Base Class
|
||||||
|
Before implementing an analysis module, decide on the appropriate base class:
|
||||||
|
- **`BasePlotlyAnalysis`**: Use this for interactive plots with **Plotly** that generate **HTML** outputs.
|
||||||
|
- **`BasePlotAnalysis`**: Use this for static plots with **Matplotlib/Seaborn** that generate **PNG** image files.
|
||||||
|
- **`BaseAnalysis`**: Use this for any other type of analysis with **text** or **HTML** output for max flexibility.
|
||||||
|
|
||||||
|
### 2. Naming Convention
|
||||||
|
Follow a structured naming convention for consistency:
|
||||||
|
- **File name:** `plotly_<analysis_name>.py` for Plotly analyses, `plot_<analysis_name>.py` for Matplotlib-based analyses.
|
||||||
|
- **Class name:** Use PascalCase and a descriptive suffix:
|
||||||
|
- Example for Plotly: `PlotlyActivityHeatmap`
|
||||||
|
- Example for Matplotlib: `PlotUserSessionDuration`
|
||||||
|
|
||||||
|
### 3. Data Structure
|
||||||
|
The following DataFrame structure is passed to analysis classes:
|
||||||
|
|
||||||
|
| user_id | name | last_action | status | timestamp | prev_timestamp | was_active | hour |
|
||||||
|
|----------|-----------|----------------------|--------|-----------------------------|----------------|------------|------|
|
||||||
|
| XXXXXXX | UserA | 2025-02-08 17:58:11 | Okay | 2025-02-08 18:09:41.867984056 | NaT | False | 18 |
|
||||||
|
| XXXXXXX | UserB | 2025-02-08 17:00:10 | Okay | 2025-02-08 18:09:42.427846909 | NaT | False | 18 |
|
||||||
|
| XXXXXXX | UserC | 2025-02-08 16:31:52 | Okay | 2025-02-08 18:09:42.823201895 | NaT | False | 18 |
|
||||||
|
| XXXXXXX | UserD | 2025-02-06 23:57:24 | Okay | 2025-02-08 18:09:43.179914951 | NaT | False | 18 |
|
||||||
|
| XXXXXXX | UserE | 2025-02-06 06:33:40 | Okay | 2025-02-08 18:09:43.434650898 | NaT | False | 18 |
|
||||||
|
|
||||||
|
Note that the first X rows, depending on the number of the members, will always contain empty values in prev_timestamp as there has to be a previous timestamp ....
|
||||||
|
|
||||||
|
### 4. Implementing an Analysis Module
|
||||||
|
Each analysis module should define two key methods:
|
||||||
|
- `transform_data(self, df: pd.DataFrame) -> pd.DataFrame`: Processes the input data for plotting.
|
||||||
|
- `plot_data(self, df: pd.DataFrame)`: Generates and saves the plot.
|
||||||
|
|
||||||
|
#### Example: Adding a Plotly Heatmap
|
||||||
|
Below is an example of how to create a new analysis module using `BasePlotlyAnalysis`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pandas as pd
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
from .basePlotlyAnalysis import BasePlotlyAnalysis
|
||||||
|
|
||||||
|
class PlotlyActivityHeatmap(BasePlotlyAnalysis):
|
||||||
|
"""
|
||||||
|
Displays user activity trends over multiple days using an interactive heatmap.
|
||||||
|
"""
|
||||||
|
name = "Activity Heatmap (Interactive)"
|
||||||
|
description = "Displays user activity trends over multiple days."
|
||||||
|
plot_filename = "activity_heatmap.html"
|
||||||
|
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
df['hour'] = df['timestamp'].dt.hour
|
||||||
|
active_counts = df[df['was_active']].pivot_table(
|
||||||
|
index='name',
|
||||||
|
columns='hour',
|
||||||
|
values='was_active',
|
||||||
|
aggfunc='sum',
|
||||||
|
fill_value=0
|
||||||
|
).reset_index()
|
||||||
|
return active_counts.melt(id_vars='name', var_name='hour', value_name='activity_count')
|
||||||
|
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0)
|
||||||
|
self.fig = go.Figure(data=go.Heatmap(
|
||||||
|
z=df.values, x=df.columns, y=df.index, colorscale='Viridis',
|
||||||
|
colorbar=dict(title='Activity Count')
|
||||||
|
))
|
||||||
|
self.fig.update_layout(title='User Activity Heatmap', xaxis_title='Hour', yaxis_title='User')
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Example: Adding a Static Matplotlib Plot
|
||||||
|
Below is an example of a Matplotlib-based analysis module using `BasePlotAnalysis`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from .basePlotAnalysis import BasePlotAnalysis
|
||||||
|
|
||||||
|
class PlotUserSessionDuration(BasePlotAnalysis):
|
||||||
|
"""
|
||||||
|
Displays a histogram of user session durations.
|
||||||
|
"""
|
||||||
|
name = "User Session Duration Histogram"
|
||||||
|
description = "Histogram of session durations."
|
||||||
|
plot_filename = "session_duration.png"
|
||||||
|
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
df['session_duration'] = (df['last_action'] - df['timestamp']).dt.total_seconds()
|
||||||
|
return df
|
||||||
|
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
plt.figure(figsize=(10, 6))
|
||||||
|
plt.hist(df['session_duration'].dropna(), bins=30, edgecolor='black')
|
||||||
|
plt.xlabel('Session Duration (seconds)')
|
||||||
|
plt.ylabel('Frequency')
|
||||||
|
plt.title('User Session Duration Histogram')
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Registering the Module
|
||||||
|
Once you have created your analysis module, it will be automatically discovered by `load_analysis_modules()`, provided it is placed in the correct directory.
|
||||||
|
|
||||||
|
### 6. Running the Analysis
|
||||||
|
To execute the analysis, pass a Pandas DataFrame to its `execute` method:
|
||||||
|
```python
|
||||||
|
from app.analysis.plotly_activity_heatmap import PlotlyActivityHeatmap
|
||||||
|
analysis = PlotlyActivityHeatmap()
|
||||||
|
result_html = analysis.execute(df)
|
||||||
|
print(result_html) # Returns the HTML for embedding the plot
|
||||||
|
```
|
||||||
|
|
||||||
|
### Summary
|
||||||
|
- Choose the appropriate base class (`BasePlotlyAnalysis` or `BasePlotAnalysis`).
|
||||||
|
- Follow the naming convention (`plotly_<name>.py` for Plotly, `plot_<name>.py` for Matplotlib).
|
||||||
|
- Implement `transform_data()` and `plot_data()` methods.
|
||||||
|
- The module will be auto-registered if placed in the correct directory.
|
||||||
|
- Execute the analysis by calling `.execute(df)`.
|
||||||
|
|
||||||
|
This structure ensures that new analyses can be easily integrated and maintained.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
All assets and code are under the [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/) LICENSE and in the public domain unless specified otherwise.
|
All assets and code are under the [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/) LICENSE and in the public domain unless specified otherwise.
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
import matplotlib
|
|
||||||
matplotlib.use("Agg") # Prevents GUI-related issues in Flask
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import seaborn as sns
|
|
||||||
|
|
||||||
|
|
||||||
def load_data(file_path: str) -> pd.DataFrame:
|
|
||||||
"""Loads the scraped data from a CSV file into a Pandas DataFrame."""
|
|
||||||
if not os.path.exists(file_path):
|
|
||||||
raise FileNotFoundError(f"File {file_path} not found.")
|
|
||||||
|
|
||||||
df = pd.read_csv(file_path)
|
|
||||||
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
|
||||||
df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
def generate_statistics(df: pd.DataFrame):
|
|
||||||
"""Generates activity statistics grouped by hour."""
|
|
||||||
df["hour"] = df["timestamp"].dt.hour
|
|
||||||
return df.groupby("hour").size()
|
|
||||||
|
|
||||||
def plot_activity_distribution(df: pd.DataFrame, output_path="activity_distribution.png"):
|
|
||||||
"""Plots user activity distribution and saves the figure."""
|
|
||||||
|
|
||||||
# Ensure the directory exists
|
|
||||||
static_dir = os.path.join("app", "static", "plots")
|
|
||||||
output_path = os.path.join(static_dir, output_path)
|
|
||||||
os.makedirs(static_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# Convert timestamp column to datetime (if not already)
|
|
||||||
if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
|
|
||||||
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
|
||||||
|
|
||||||
df["hour"] = df["timestamp"].dt.hour
|
|
||||||
activity_counts = df.groupby("hour").size().reset_index(name="count")
|
|
||||||
|
|
||||||
# Use non-GUI backend for Matplotlib
|
|
||||||
plt.figure(figsize=(10, 5))
|
|
||||||
|
|
||||||
# Fix Seaborn Warning: Assign `hue` explicitly
|
|
||||||
sns.barplot(x="hour", y="count", data=activity_counts, hue="hour", palette="Blues", legend=False)
|
|
||||||
|
|
||||||
plt.xlabel("Hour of the Day")
|
|
||||||
plt.ylabel("Activity Count")
|
|
||||||
plt.title("User Activity Distribution")
|
|
||||||
plt.xticks(range(0, 24))
|
|
||||||
|
|
||||||
# Save the plot file safely
|
|
||||||
plt.savefig(output_path, bbox_inches="tight")
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
# Verify the file exists after saving
|
|
||||||
if not os.path.exists(output_path):
|
|
||||||
raise FileNotFoundError(f"Plot could not be saved to {output_path}")
|
|
||||||
|
|
||||||
return output_path
|
|
||||||
34
app/analysis/__init__.py
Normal file
34
app/analysis/__init__.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import os
|
||||||
|
import pkgutil
|
||||||
|
import importlib
|
||||||
|
import inspect
|
||||||
|
from abc import ABC
|
||||||
|
|
||||||
|
from .base import BaseAnalysis
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def load_analysis_modules():
|
||||||
|
analysis_modules = []
|
||||||
|
package_path = __path__[0]
|
||||||
|
|
||||||
|
for _, module_name, _ in pkgutil.iter_modules([package_path]):
|
||||||
|
module = importlib.import_module(f"app.analysis.{module_name}")
|
||||||
|
|
||||||
|
for _, obj in inspect.getmembers(module, inspect.isclass):
|
||||||
|
# Exclude abstract classes (like BasePlotAnalysis)
|
||||||
|
if issubclass(obj, BaseAnalysis) and obj is not BaseAnalysis and not inspect.isabstract(obj):
|
||||||
|
analysis_modules.append(obj()) # Instantiate only concrete classes
|
||||||
|
|
||||||
|
return analysis_modules
|
||||||
|
|
||||||
|
def load_data(file_path: str) -> pd.DataFrame:
|
||||||
|
"""Loads the scraped data from a CSV file into a Pandas DataFrame."""
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
raise FileNotFoundError(f"File {file_path} not found.")
|
||||||
|
|
||||||
|
df = pd.read_csv(file_path)
|
||||||
|
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
||||||
|
df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
|
||||||
|
|
||||||
|
return df
|
||||||
11
app/analysis/base.py
Normal file
11
app/analysis/base.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
class BaseAnalysis(ABC):
|
||||||
|
name = "Base Analysis"
|
||||||
|
description = "This is a base analysis module."
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def execute(self, df: pd.DataFrame):
|
||||||
|
"""Run analysis on the given DataFrame"""
|
||||||
|
pass
|
||||||
78
app/analysis/basePlotAnalysis.py
Normal file
78
app/analysis/basePlotAnalysis.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from flask import url_for
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from .base import BaseAnalysis
|
||||||
|
from app.analysis.data_utils import prepare_data, mk_plotdir
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use('Agg')
|
||||||
|
|
||||||
|
# -------------------------------------------
|
||||||
|
# Base Class for All Plot Analyses
|
||||||
|
# -------------------------------------------
|
||||||
|
class BasePlotAnalysis(BaseAnalysis, ABC):
|
||||||
|
"""
|
||||||
|
Base class for all plot-based analyses.
|
||||||
|
It enforces a structure for:
|
||||||
|
- Data preparation
|
||||||
|
- Transformation
|
||||||
|
- Plot generation
|
||||||
|
- Memory cleanup
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
plot_filename (str): The filename for the output plot.
|
||||||
|
alt_text (str): The alt text for the plot.
|
||||||
|
"""
|
||||||
|
plot_filename = "default_plot.png"
|
||||||
|
alt_text = "Default Alt Text"
|
||||||
|
|
||||||
|
def execute(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Executes the full analysis pipeline.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: HTML img tag containing the URL to the generated plot.
|
||||||
|
"""
|
||||||
|
df = prepare_data(df) # Step 1: Prepare data
|
||||||
|
|
||||||
|
paths = mk_plotdir(self.plot_filename)
|
||||||
|
self.output_path, self.plot_url = paths['output_path'], paths['plot_url']
|
||||||
|
|
||||||
|
df = self.transform_data(df) # Step 2: Transform data (implemented by subclass)
|
||||||
|
self.plot_data(df) # Step 3: Create the plot
|
||||||
|
|
||||||
|
plt.savefig(self.output_path, bbox_inches="tight")
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
del df # Step 4: Free memory
|
||||||
|
return f'<img src="{self.plot_url}" alt="{self.note}">'
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Subclasses must define how they transform the data.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: The transformed DataFrame.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Subclasses must define how they generate the plot.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The transformed DataFrame containing data to be plotted.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
74
app/analysis/basePlotlyAnalysis.py
Normal file
74
app/analysis/basePlotlyAnalysis.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
from flask import url_for
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from .base import BaseAnalysis
|
||||||
|
from app.analysis.data_utils import prepare_data, mk_plotdir
|
||||||
|
|
||||||
|
# -------------------------------------------
|
||||||
|
# Base Class for All Plotly Plot Analyses
|
||||||
|
# -------------------------------------------
|
||||||
|
class BasePlotlyAnalysis(BaseAnalysis, ABC):
|
||||||
|
"""
|
||||||
|
Base class for all Plotly plot-based analyses.
|
||||||
|
It enforces a structure for:
|
||||||
|
- Data preparation
|
||||||
|
- Transformation
|
||||||
|
- Plot generation
|
||||||
|
- Memory cleanup
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
plot_filename (str): The filename for the output plot.
|
||||||
|
alt_text (str): The alt text for the plot.
|
||||||
|
"""
|
||||||
|
plot_filename = "default_plot.html"
|
||||||
|
alt_text = "Default Alt Text"
|
||||||
|
|
||||||
|
def execute(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Executes the full analysis pipeline.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: HTML iframe containing the URL to the generated plot.
|
||||||
|
"""
|
||||||
|
df = prepare_data(df) # Step 1: Prepare data
|
||||||
|
|
||||||
|
paths = mk_plotdir(self.plot_filename)
|
||||||
|
self.output_path, self.plot_url = paths['output_path'], paths['plot_url']
|
||||||
|
|
||||||
|
df = self.transform_data(df) # Step 2: Transform data (implemented by subclass)
|
||||||
|
self.plot_data(df) # Step 3: Create the plot
|
||||||
|
|
||||||
|
# Save the plot as an HTML file
|
||||||
|
self.fig.write_html(self.output_path)
|
||||||
|
|
||||||
|
del df # Step 4: Free memory
|
||||||
|
return f'<iframe src="{self.plot_url}" width="100%" height="600"></iframe>'
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Subclasses must define how they transform the data.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: The transformed DataFrame.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Subclasses must define how they generate the plot.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The transformed DataFrame containing data to be plotted.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
45
app/analysis/data_utils.py
Normal file
45
app/analysis/data_utils.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
from flask import current_app, url_for
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def prepare_data(df):
|
||||||
|
"""
|
||||||
|
Prepares the data for analysis by converting timestamps, calculating previous timestamps,
|
||||||
|
determining active status, and extracting the hour from the timestamp.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: The processed DataFrame with additional columns for analysis.
|
||||||
|
|
||||||
|
The returned DataFrame will have the following columns:
|
||||||
|
user_id name last_action status timestamp prev_timestamp was_active hour
|
||||||
|
0 12345678 UserName 2025-02-08 17:58:11 Okay 2025-02-08 18:09:41.867984056 NaT False 18
|
||||||
|
"""
|
||||||
|
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
||||||
|
df["last_action"] = pd.to_datetime(df["last_action"])
|
||||||
|
df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1)
|
||||||
|
df["was_active"] = (df["timestamp"] - df["last_action"]) <= pd.Timedelta(seconds=60)
|
||||||
|
df["was_active"] = df["was_active"].fillna(False)
|
||||||
|
df['hour'] = df['timestamp'].dt.hour
|
||||||
|
return df
|
||||||
|
|
||||||
|
def mk_plotdir(output_filename):
|
||||||
|
"""
|
||||||
|
Creates the directory for storing plots and generates the output path and URL for the plot.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
output_filename (str): The filename for the output plot.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: A dictionary containing the output path and plot URL.
|
||||||
|
"""
|
||||||
|
plots_dir = os.path.join(current_app.root_path, "static", "plots")
|
||||||
|
os.makedirs(plots_dir, exist_ok=True)
|
||||||
|
|
||||||
|
output_path = os.path.join(plots_dir, output_filename)
|
||||||
|
|
||||||
|
plot_url = url_for('static', filename=f'plots/{output_filename}', _external=True)
|
||||||
|
|
||||||
|
return {'output_path': output_path, 'plot_url': plot_url}
|
||||||
55
app/analysis/plot_bar_activity-user.py
Normal file
55
app/analysis/plot_bar_activity-user.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from .basePlotAnalysis import BasePlotAnalysis
|
||||||
|
from flask import current_app, url_for
|
||||||
|
|
||||||
|
from app.logging_config import get_logger
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use('Agg')
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
class PlotTopActiveUsers(BasePlotAnalysis):
|
||||||
|
"""
|
||||||
|
Class for analyzing the most active users and generating a bar chart.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
name (str): The name of the analysis.
|
||||||
|
description (str): A brief description of the analysis.
|
||||||
|
plot_filename (str): The filename for the output plot.
|
||||||
|
note (str): Additional notes for the analysis.
|
||||||
|
"""
|
||||||
|
name = "Top Active Users"
|
||||||
|
description = "Displays the most active users based on their number of recorded actions."
|
||||||
|
plot_filename = "bar_activity-per-user.png"
|
||||||
|
note = ""
|
||||||
|
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Transform data for the bar plot.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: The transformed DataFrame with active counts per user.
|
||||||
|
"""
|
||||||
|
df = df[df['was_active'] == True].groupby('name').size().reset_index(name='active_count')
|
||||||
|
return df
|
||||||
|
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Generate bar plot.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The transformed DataFrame containing active counts per user.
|
||||||
|
"""
|
||||||
|
# create a barplot from active counts sorted by active count
|
||||||
|
plt.figure(figsize=(10, 6))
|
||||||
|
sns.barplot(x='active_count', y='name', data=df.sort_values('active_count', ascending=False))
|
||||||
|
plt.xticks(rotation=90)
|
||||||
|
plt.title('Minutes Active')
|
||||||
|
plt.xlabel('Player')
|
||||||
|
plt.ylabel('Active Count')
|
||||||
54
app/analysis/plot_bar_peak_hours.py
Normal file
54
app/analysis/plot_bar_peak_hours.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from .basePlotAnalysis import BasePlotAnalysis
|
||||||
|
from flask import current_app, url_for
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use('Agg')
|
||||||
|
|
||||||
|
class PlotPeakHours(BasePlotAnalysis):
|
||||||
|
"""
|
||||||
|
Class for analyzing peak activity hours and generating a bar chart.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
name (str): The name of the analysis.
|
||||||
|
description (str): A brief description of the analysis.
|
||||||
|
plot_filename (str): The filename for the output plot.
|
||||||
|
note (str): Additional notes for the analysis.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "Peak Hours Analysis"
|
||||||
|
description = "Identifies peak activity hours using a bar chart."
|
||||||
|
plot_filename = "peak_hours.png"
|
||||||
|
note = ""
|
||||||
|
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Transform data to add was_active column and extract peak hours. See data_utils.py.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: The transformed DataFrame with additional columns for analysis.
|
||||||
|
"""
|
||||||
|
return df
|
||||||
|
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Generate bar chart for peak hours.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The transformed DataFrame containing user activity data.
|
||||||
|
"""
|
||||||
|
peak_hours = df[df["was_active"]]["hour"].value_counts().sort_index()
|
||||||
|
|
||||||
|
plt.figure(figsize=(12, 5))
|
||||||
|
sns.barplot(x=peak_hours.index, y=peak_hours.values, hue=peak_hours.values, palette="coolwarm")
|
||||||
|
|
||||||
|
plt.xlabel("Hour of the Day")
|
||||||
|
plt.ylabel("Activity Count")
|
||||||
|
plt.title("Peak Hours of User Activity")
|
||||||
|
plt.xticks(range(0, 24))
|
||||||
55
app/analysis/plot_heat_user-activity-hour.py
Normal file
55
app/analysis/plot_heat_user-activity-hour.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from .basePlotAnalysis import BasePlotAnalysis
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use('Agg')
|
||||||
|
|
||||||
|
class PlotActivityHeatmap(BasePlotAnalysis):
|
||||||
|
"""
|
||||||
|
Class for analyzing user activity trends over multiple days and generating a heatmap.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
name (str): The name of the analysis.
|
||||||
|
description (str): A brief description of the analysis.
|
||||||
|
plot_filename (str): The filename for the output plot.
|
||||||
|
note (str): Additional notes for the analysis.
|
||||||
|
"""
|
||||||
|
name = "Activity Heatmap"
|
||||||
|
description = "Displays user activity trends over multiple days using a heatmap. Generates a downloadable PNG image."
|
||||||
|
plot_filename = "activity_heatmap.png"
|
||||||
|
note = ""
|
||||||
|
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Transform data for the heatmap.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: The transformed DataFrame with activity counts by hour.
|
||||||
|
"""
|
||||||
|
active_counts = df[df['was_active']].pivot_table(
|
||||||
|
index='name',
|
||||||
|
columns='hour',
|
||||||
|
values='was_active',
|
||||||
|
aggfunc='sum',
|
||||||
|
fill_value=0
|
||||||
|
)
|
||||||
|
active_counts['total_active_minutes'] = active_counts.sum(axis=1)
|
||||||
|
return active_counts.sort_values(by='total_active_minutes', ascending=False)
|
||||||
|
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Generate heatmap plot.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
|
||||||
|
"""
|
||||||
|
plt.figure(figsize=(12, 8))
|
||||||
|
sns.heatmap(df.loc[:, df.columns != 'total_active_minutes'], cmap='viridis', cbar_kws={'label': 'Count of was_active == True'})
|
||||||
|
plt.xlabel('Hour of Day')
|
||||||
|
plt.ylabel('User ID')
|
||||||
|
plt.title('User Activity Heatmap')
|
||||||
71
app/analysis/plot_line_activity-user.py
Normal file
71
app/analysis/plot_line_activity-user.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from .basePlotAnalysis import BasePlotAnalysis
|
||||||
|
from flask import current_app, url_for
|
||||||
|
|
||||||
|
from app.logging_config import get_logger
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use('Agg')
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
class PlotLineActivityAllUsers(BasePlotAnalysis):
|
||||||
|
"""
|
||||||
|
Class for analyzing user activity trends over multiple days and generating a line graph.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
name (str): The name of the analysis.
|
||||||
|
description (str): A brief description of the analysis.
|
||||||
|
plot_filename (str): The filename for the output plot.
|
||||||
|
note (str): Additional notes for the analysis.
|
||||||
|
"""
|
||||||
|
name = "Activity Line Graph (All Users)"
|
||||||
|
description = "This analysis shows the activity line graph for all users. Gneerates a downloadable PNG image."
|
||||||
|
plot_filename = "line_activity-all_users.png"
|
||||||
|
note = ""
|
||||||
|
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Transform data for the line plot.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: The transformed DataFrame with activity counts by hour.
|
||||||
|
"""
|
||||||
|
df['hour'] = df['timestamp'].dt.hour
|
||||||
|
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
|
||||||
|
df['total_active_minutes'] = df.sum(axis=1)
|
||||||
|
df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1)
|
||||||
|
|
||||||
|
cumulative_sum_row = df.cumsum().iloc[-1]
|
||||||
|
df.loc['Cumulative Sum'] = cumulative_sum_row
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Generate line graph for user activity throughout the day.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
|
||||||
|
"""
|
||||||
|
plt.figure(figsize=(12, 6))
|
||||||
|
|
||||||
|
# Plot each user's activity
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
if index == 'Cumulative Sum':
|
||||||
|
plt.plot(row.index, row.values, label=index, linewidth=3, color='black') # Bold line for cumulative sum
|
||||||
|
else:
|
||||||
|
plt.plot(row.index, row.values, label=index)
|
||||||
|
|
||||||
|
# Add labels and title
|
||||||
|
plt.xlabel('Hour of Day')
|
||||||
|
plt.ylabel('Activity Count')
|
||||||
|
plt.title('User Activity Throughout the Day')
|
||||||
|
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
|
||||||
|
|
||||||
|
plt.grid(True)
|
||||||
86
app/analysis/plotly_heat_user-activity.py
Normal file
86
app/analysis/plotly_heat_user-activity.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import plotly.express as px
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from .basePlotlyAnalysis import BasePlotlyAnalysis
|
||||||
|
from flask import current_app, url_for
|
||||||
|
|
||||||
|
from app.logging_config import get_logger
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
class PlotlyActivityHeatmap(BasePlotlyAnalysis):
|
||||||
|
"""
|
||||||
|
Class for analyzing user activity trends over multiple days and generating an interactive heatmap.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
name (str): The name of the analysis.
|
||||||
|
description (str): A brief description of the analysis.
|
||||||
|
plot_filename (str): The filename for the output plot.
|
||||||
|
note (str): Additional notes for the analysis.
|
||||||
|
"""
|
||||||
|
name = "Activity Heatmap (Interactive)"
|
||||||
|
description = "Displays user activity trends over multiple days using an interactive heatmap."
|
||||||
|
plot_filename = "activity_heatmap.html"
|
||||||
|
note = ""
|
||||||
|
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Transform data for the heatmap.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: The transformed DataFrame with activity counts by hour.
|
||||||
|
"""
|
||||||
|
df['hour'] = df['timestamp'].dt.hour
|
||||||
|
active_counts = df[df['was_active']].pivot_table(
|
||||||
|
index='name',
|
||||||
|
columns='hour',
|
||||||
|
values='was_active',
|
||||||
|
aggfunc='sum',
|
||||||
|
fill_value=0
|
||||||
|
).reset_index()
|
||||||
|
|
||||||
|
# Ensure all hours are represented
|
||||||
|
all_hours = pd.DataFrame({'hour': range(24)})
|
||||||
|
active_counts = active_counts.melt(id_vars='name', var_name='hour', value_name='activity_count')
|
||||||
|
active_counts = active_counts.merge(all_hours, on='hour', how='right').fillna(0)
|
||||||
|
active_counts['hour'] = active_counts['hour'].astype(int) # Ensure hour is treated as numeric
|
||||||
|
return active_counts
|
||||||
|
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Generate heatmap plot.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
|
||||||
|
"""
|
||||||
|
df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0)
|
||||||
|
|
||||||
|
# Create a Plotly heatmap
|
||||||
|
self.fig = go.Figure(data=go.Heatmap(
|
||||||
|
z=df.values,
|
||||||
|
x=df.columns,
|
||||||
|
y=df.index,
|
||||||
|
colorscale='Viridis',
|
||||||
|
colorbar=dict(title='Count of was_active == True')
|
||||||
|
))
|
||||||
|
|
||||||
|
# Update layout
|
||||||
|
self.fig.update_layout(
|
||||||
|
title='User Activity Heatmap',
|
||||||
|
xaxis_title='Hour of Day',
|
||||||
|
yaxis_title='User ID',
|
||||||
|
xaxis=dict(tickmode='linear', dtick=1, range=[0, 23]), # Ensure x-axis covers all hours
|
||||||
|
template='plotly_white'
|
||||||
|
)
|
||||||
|
|
||||||
|
self.fig.update_traces(
|
||||||
|
hovertemplate="<br>".join([
|
||||||
|
"Hour: %{x}",
|
||||||
|
"Name: %{y}",
|
||||||
|
"Activity: %{z}",
|
||||||
|
])
|
||||||
|
)
|
||||||
69
app/analysis/plotly_line_activity-user.py
Normal file
69
app/analysis/plotly_line_activity-user.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
from plotly.subplots import make_subplots
|
||||||
|
from .basePlotlyAnalysis import BasePlotlyAnalysis
|
||||||
|
from flask import current_app, url_for
|
||||||
|
|
||||||
|
from app.logging_config import get_logger
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
|
||||||
|
"""
|
||||||
|
Class for analyzing user activity trends over multiple days and generating an interactive line graph.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
name (str): The name of the analysis.
|
||||||
|
description (str): A brief description of the analysis.
|
||||||
|
plot_filename (str): The filename for the output plot.
|
||||||
|
note (str): Additional notes for the analysis.
|
||||||
|
"""
|
||||||
|
name = "Activity Line Graph (All Users, Interactive)"
|
||||||
|
description = "This analysis shows the activity line graph for all users. The graph is interactive and can be used to explore the data."
|
||||||
|
plot_filename = "line_activity-all_users.html"
|
||||||
|
note = ""
|
||||||
|
|
||||||
|
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Transform data for the line plot.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The input DataFrame containing user activity data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: The transformed DataFrame with activity counts by hour.
|
||||||
|
"""
|
||||||
|
df['hour'] = df['timestamp'].dt.hour
|
||||||
|
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
|
||||||
|
df['total_active_minutes'] = df.sum(axis=1)
|
||||||
|
df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1)
|
||||||
|
|
||||||
|
cumulative_sum_row = df.cumsum().iloc[-1]
|
||||||
|
df.loc['Cumulative Sum'] = cumulative_sum_row
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def plot_data(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Generate interactive line graph for user activity throughout the day.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
|
||||||
|
"""
|
||||||
|
self.fig = make_subplots()
|
||||||
|
|
||||||
|
# Plot each user's activity
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
if index == 'Cumulative Sum':
|
||||||
|
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index, line=dict(width=3, color='black'))) # Bold line for cumulative sum
|
||||||
|
else:
|
||||||
|
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index))
|
||||||
|
|
||||||
|
self.fig.update_layout(
|
||||||
|
title='User Activity Throughout the Day',
|
||||||
|
xaxis_title='Hour of Day',
|
||||||
|
yaxis_title='Activity Count',
|
||||||
|
legend_title='User',
|
||||||
|
legend=dict(x=1, y=1),
|
||||||
|
template='plotly_white'
|
||||||
|
)
|
||||||
31
app/analysis/table_statistics.py
Normal file
31
app/analysis/table_statistics.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from .base import BaseAnalysis
|
||||||
|
from flask import render_template_string
|
||||||
|
|
||||||
|
class GenerateStatistics(BaseAnalysis):
|
||||||
|
name = "Test Statistics (Placeholder)"
|
||||||
|
description = "Generates activity statistics grouped by hour."
|
||||||
|
|
||||||
|
def execute(self, df: pd.DataFrame):
|
||||||
|
df["hour"] = df["timestamp"].dt.hour
|
||||||
|
statistics = df.groupby("hour").size().reset_index(name="count")
|
||||||
|
|
||||||
|
# Convert statistics DataFrame to HTML
|
||||||
|
table_html = statistics.to_html(classes="table table-bordered table-striped")
|
||||||
|
|
||||||
|
# Wrap it in Bootstrap styling
|
||||||
|
html_content = render_template_string(
|
||||||
|
"""
|
||||||
|
<div class="card mt-3">
|
||||||
|
<div class="card-header">
|
||||||
|
<h4>Activity Statistics</h4>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
{{ table_html | safe }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
""",
|
||||||
|
table_html=table_html
|
||||||
|
)
|
||||||
|
|
||||||
|
return html_content
|
||||||
@@ -6,7 +6,7 @@ import glob
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from app.models import Scraper, generate_statistics
|
from app.models import Scraper
|
||||||
from app.util import create_zip, delete_old_zips, tail, get_size
|
from app.util import create_zip, delete_old_zips, tail, get_size
|
||||||
from app.config import load_config
|
from app.config import load_config
|
||||||
from app.logging_config import get_logger
|
from app.logging_config import get_logger
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ from app.views import register_views
|
|||||||
from app.api import register_api
|
from app.api import register_api
|
||||||
from app.config import load_config
|
from app.config import load_config
|
||||||
from app.filters import register_filters
|
from app.filters import register_filters
|
||||||
from app.analysis import generate_statistics
|
|
||||||
|
|
||||||
def init_app():
|
def init_app():
|
||||||
config = load_config()
|
config = load_config()
|
||||||
|
|||||||
@@ -135,8 +135,4 @@ class Scraper:
|
|||||||
|
|
||||||
def stop_scraping(self):
|
def stop_scraping(self):
|
||||||
self.scraping_active = False
|
self.scraping_active = False
|
||||||
logger.debug("Scraping stopped by user")
|
logger.debug("Scraping stopped by user")
|
||||||
|
|
||||||
def generate_statistics(df):
|
|
||||||
df['hour'] = df['timestamp'].dt.hour # No need to convert timestamp again
|
|
||||||
return df.groupby('hour').size() # Activity by hour
|
|
||||||
7
app/static/common.js
Normal file
7
app/static/common.js
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
function checkAllCheckboxes(tableId, checkAllCheckboxId) {
|
||||||
|
const table = document.getElementById(tableId);
|
||||||
|
const checkboxes = table.querySelectorAll('input[type="checkbox"]');
|
||||||
|
const checkAllCheckbox = document.getElementById(checkAllCheckboxId);
|
||||||
|
|
||||||
|
checkboxes.forEach(checkbox => checkbox.checked = checkAllCheckbox.checked);
|
||||||
|
}
|
||||||
@@ -93,12 +93,4 @@ function sortTable(columnIndex, tableId) {
|
|||||||
|
|
||||||
// Reinsert sorted rows
|
// Reinsert sorted rows
|
||||||
rows.forEach(row => tbody.appendChild(row));
|
rows.forEach(row => tbody.appendChild(row));
|
||||||
}
|
}
|
||||||
|
|
||||||
function checkAllCheckboxes(tableId, checkAllCheckboxId) {
|
|
||||||
const table = document.getElementById(tableId);
|
|
||||||
const checkboxes = table.querySelectorAll('input[name="fileCheckbox"]');
|
|
||||||
const checkAllCheckbox = document.getElementById(checkAllCheckboxId);
|
|
||||||
|
|
||||||
checkboxes.forEach(checkbox => checkbox.checked = checkAllCheckbox.checked);
|
|
||||||
}
|
|
||||||
@@ -1,16 +1,100 @@
|
|||||||
{% extends 'base.html' %}
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<section class="container-fluid d-flex justify-content-center">
|
|
||||||
<div class="container-md my-5 mx-2 shadow-lg p-4 ">
|
<section class="container-fluid d-flex justify-content-center">
|
||||||
<div class="container-sm">
|
<div class="container-md my-5 mb-3 mx-2 shadow-lg p-4">
|
||||||
<div class="row">
|
<div class="container-sm">
|
||||||
<div class="col">
|
<div class="row">
|
||||||
<h2>Analyze</h2>
|
<div class="col">
|
||||||
</div>
|
<h2>User Activity Distribution</h2>
|
||||||
<div class="col">
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
<div class="row">
|
||||||
{% endblock content %}
|
<div class="col">
|
||||||
|
<form method="POST" action="{{ url_for('views.analyze') }}">
|
||||||
|
<!-- Dropdown for selecting data file -->
|
||||||
|
<label for="data_file" class="form-label">Choose Data File:</label>
|
||||||
|
<select name="data_file" id="data_file" class="form-select">
|
||||||
|
{% if data_files %}
|
||||||
|
{% for file in data_files %}
|
||||||
|
{{ file }}
|
||||||
|
{{ selected_file }}
|
||||||
|
<option value="{{ file }}" {% if file == selected_file %}selected{% endif %}>{{ file.split('/')[-1] }}</option>
|
||||||
|
{% endfor %}
|
||||||
|
{% else %}
|
||||||
|
<option disabled>No CSV files found</option>
|
||||||
|
{% endif %}
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<!-- Analysis Selection Table -->
|
||||||
|
<label for="analyses" class="form-label">Select Analyses:</label>
|
||||||
|
<table id="analysesTable" class="table table-bordered table-striped">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th width="2%"><input type="checkbox" id="checkAllAnalyses" class="form-check-input" onclick="checkAllCheckboxes('analysesTable', 'checkAllAnalyses')"></th>
|
||||||
|
<th>Analysis Name</th>
|
||||||
|
<th>Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% if analyses %}
|
||||||
|
{% for analysis in analyses %}
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<input class="form-check-input" type="checkbox" name="analyses" value="{{ analysis.name }}"
|
||||||
|
{% if analysis.name in selected_analyses %}checked{% endif %}>
|
||||||
|
</td>
|
||||||
|
<td>{{ analysis.name }}</td>
|
||||||
|
<td>{{ analysis.description }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
{% else %}
|
||||||
|
<tr>
|
||||||
|
<td colspan="3" class="text-center">No analyses available</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<button type="submit" class="btn btn-primary mt-3">Run Analyses</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% include 'includes/error.html' %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
{% if plot_url %}
|
||||||
|
<section class="container-fluid d-flex justify-content-center">
|
||||||
|
<div class="container-md my-1 mx-2 shadow-lg p-4">
|
||||||
|
<div class="container-sm">
|
||||||
|
<div class="row mt-4">
|
||||||
|
<div class="col">
|
||||||
|
<h4>Selected File: {{ selected_file.split('/')[-1] }}</h4>
|
||||||
|
<img src="{{ plot_url }}" class="img-fluid rounded shadow" alt="User Activity Distribution">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if results %}
|
||||||
|
{% for analysis_name, result in results.items() %}
|
||||||
|
<section class="container-fluid d-flex justify-content-center">
|
||||||
|
<div class="container-md my-2 mx-2 shadow p-4 pt-0">
|
||||||
|
<div class="container-sm">
|
||||||
|
<div class="results mt-4">
|
||||||
|
<h3>{{ analysis_name }}</h3>
|
||||||
|
<div class="analysis-output">
|
||||||
|
{{ result | safe }} <!-- This allows HTML output -->
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
|
|||||||
@@ -1,68 +0,0 @@
|
|||||||
{% extends 'base.html' %}
|
|
||||||
|
|
||||||
{% block content %}
|
|
||||||
<section class="container-fluid d-flex justify-content-center">
|
|
||||||
<div class="container-md my-5 mx-2 shadow-lg p-4 ">
|
|
||||||
<div class="container-sm">
|
|
||||||
<div class="row">
|
|
||||||
<div class="col">
|
|
||||||
<h2>User Activity Distribution</h2>
|
|
||||||
</div>
|
|
||||||
<div class="col text-end">
|
|
||||||
<!-- Dropdown for selecting data file -->
|
|
||||||
<form method="POST" action="{{ url_for('views.data_visualization') }}">
|
|
||||||
<label for="data_file" class="form-label">Choose Data File:</label>
|
|
||||||
<select name="data_file" id="data_file" class="form-select" onchange="this.form.submit()">
|
|
||||||
{% for file in data_files %}
|
|
||||||
<option value="{{ file }}" {% if file == selected_file %}selected{% endif %}>
|
|
||||||
{{ file.split('/')[-1] }}
|
|
||||||
</option>
|
|
||||||
{% endfor %}
|
|
||||||
</select>
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{% if error %}
|
|
||||||
<div class="alert alert-danger mt-3" role="alert">
|
|
||||||
{{ error }}
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% if plot_url %}
|
|
||||||
<div class="row mt-4">
|
|
||||||
<div class="col">
|
|
||||||
<h4>Selected File: {{ selected_file.split('/')[-1] }}</h4>
|
|
||||||
<img src="{{ plot_url }}" class="img-fluid rounded shadow" alt="User Activity Distribution">
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% if statistics %}
|
|
||||||
<div class="row mt-4">
|
|
||||||
<div class="col">
|
|
||||||
<h2>Activity Statistics</h2>
|
|
||||||
<table class="table table-bordered table-hover">
|
|
||||||
<thead class="table-dark">
|
|
||||||
<tr>
|
|
||||||
<th>Hour</th>
|
|
||||||
<th>Activity Count</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{% for hour, count in statistics.items() %}
|
|
||||||
<tr>
|
|
||||||
<td>{{ hour }}</td>
|
|
||||||
<td>{{ count }}</td>
|
|
||||||
</tr>
|
|
||||||
{% endfor %}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
{% endblock content %}
|
|
||||||
@@ -18,7 +18,7 @@
|
|||||||
<table id="dataFilesTable" class="table table-striped table-bordered table-hover">
|
<table id="dataFilesTable" class="table table-striped table-bordered table-hover">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th width="2%"><input type="checkbox" id="checkAllData" onclick="checkAllCheckboxes('dataFilesTable', 'checkAllData')"></th>
|
<th width="2%"><input type="checkbox" class="form-check-input" id="checkAllData" onclick="checkAllCheckboxes('dataFilesTable', 'checkAllData')"></th>
|
||||||
<th onclick="sortTable(1, 'dataFilesTable')">File Name</th>
|
<th onclick="sortTable(1, 'dataFilesTable')">File Name</th>
|
||||||
<th onclick="sortTable(2, 'dataFilesTable')">Last Modified</th>
|
<th onclick="sortTable(2, 'dataFilesTable')">Last Modified</th>
|
||||||
<th onclick="sortTable(3, 'dataFilesTable')">Created</th>
|
<th onclick="sortTable(3, 'dataFilesTable')">Created</th>
|
||||||
@@ -30,7 +30,7 @@
|
|||||||
<tbody>
|
<tbody>
|
||||||
{% for file in files.data %}
|
{% for file in files.data %}
|
||||||
<tr>
|
<tr>
|
||||||
<td><input type="checkbox" name="fileCheckbox" value="{{ url_for('download_data_file', filename=file.name_display) }}"{{ ' disabled' if file.active }}></td>
|
<td><input type="checkbox" name="fileCheckbox" class="form-check-input" value="{{ url_for('download_data_file', filename=file.name_display) }}"{{ ' disabled' if file.active }}></td>
|
||||||
<td><a href="{{ url_for('download_data_file', filename=file.name_display) }}" target="_blank">{{ file.name_display }}</a></td>
|
<td><a href="{{ url_for('download_data_file', filename=file.name_display) }}" target="_blank">{{ file.name_display }}</a></td>
|
||||||
<td>{{ file.last_modified | datetimeformat }}</td>
|
<td>{{ file.last_modified | datetimeformat }}</td>
|
||||||
<td>{{ file.created | datetimeformat }}</td>
|
<td>{{ file.created | datetimeformat }}</td>
|
||||||
@@ -79,7 +79,7 @@
|
|||||||
<tbody>
|
<tbody>
|
||||||
{% for file in files.log %}
|
{% for file in files.log %}
|
||||||
<tr>
|
<tr>
|
||||||
<td><input type="checkbox" name="fileCheckbox" value="{{ url_for('download_log_file', filename=file.name_display) }}"{{ ' disabled' if file.active }}></td>
|
<td><input type="checkbox" name="fileCheckbox" class="form-check-input" value="{{ url_for('download_log_file', filename=file.name_display) }}"{{ ' disabled' if file.active }}></td>
|
||||||
<td><a href="{{ url_for('download_log_file', filename=file.name_display) }}" target="_blank">{{ file.name_display }}</a></td>
|
<td><a href="{{ url_for('download_log_file', filename=file.name_display) }}" target="_blank">{{ file.name_display }}</a></td>
|
||||||
<td>{{ file.last_modified | datetimeformat }}</td>
|
<td>{{ file.last_modified | datetimeformat }}</td>
|
||||||
<td>{{ file.created | datetimeformat }}</td>
|
<td>{{ file.created | datetimeformat }}</td>
|
||||||
@@ -98,8 +98,5 @@
|
|||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
{% block scripts %}
|
|
||||||
{{ bootstrap.load_js() }}
|
|
||||||
<script src="{{url_for('.static', filename='download_results.js')}}"></script>
|
<script src="{{url_for('.static', filename='download_results.js')}}"></script>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
{% endblock content %}
|
|
||||||
6
app/templates/includes/error.html
Normal file
6
app/templates/includes/error.html
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{% if error %}
|
||||||
|
<div class="alert alert-danger alert-dismissible fade show mt-3" role="alert">
|
||||||
|
<strong>Error:</strong> {{ error }}
|
||||||
|
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
<div class="container-fluid">
|
<div class="container-fluid">
|
||||||
<a class="navbar-brand" href="/">Torn User Activity Scraper</a>
|
<a class="navbar-brand" href="/">Torn User Activity Scraper</a>
|
||||||
{% from 'bootstrap4/nav.html' import render_nav_item %}
|
{% from 'bootstrap4/nav.html' import render_nav_item %}
|
||||||
{{ render_nav_item('views.data_visualization', 'Data Visualization') }}
|
{{ render_nav_item('views.analyze', 'Data Visualization') }}
|
||||||
{{ render_nav_item('download_results', 'Files') }}
|
{{ render_nav_item('download_results', 'Files') }}
|
||||||
{{ render_nav_item('log_viewer', 'Logs') }}
|
{{ render_nav_item('log_viewer', 'Logs') }}
|
||||||
<div class="d-flex" id="color-mode-toggle">
|
<div class="d-flex" id="color-mode-toggle">
|
||||||
|
|||||||
@@ -1,2 +1,3 @@
|
|||||||
{{ bootstrap.load_js() }}
|
{{ bootstrap.load_js() }}
|
||||||
<script src="{{url_for('static', filename='color_mode.js')}}"></script>
|
<script src="{{url_for('static', filename='color_mode.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='common.js') }}"></script>
|
||||||
|
|||||||
61
app/views.py
61
app/views.py
@@ -5,9 +5,10 @@ from flask import render_template, Blueprint, current_app, request
|
|||||||
from app.forms import ScrapingForm
|
from app.forms import ScrapingForm
|
||||||
from app.util import get_size
|
from app.util import get_size
|
||||||
from app.config import load_config
|
from app.config import load_config
|
||||||
from app.api import scraper as scraper# Import the scraper instance
|
from app.api import scraper as scraper
|
||||||
from app.logging_config import get_logger
|
from app.logging_config import get_logger
|
||||||
from app.analysis import load_data, generate_statistics, plot_activity_distribution
|
|
||||||
|
from app.analysis import load_data, load_analysis_modules
|
||||||
|
|
||||||
|
|
||||||
from app.state import log_file_name
|
from app.state import log_file_name
|
||||||
@@ -29,10 +30,6 @@ def register_views(app):
|
|||||||
def results():
|
def results():
|
||||||
return render_template('results.html')
|
return render_template('results.html')
|
||||||
|
|
||||||
@app.route('/analyze')
|
|
||||||
def analyze():
|
|
||||||
return render_template('analyze.html')
|
|
||||||
|
|
||||||
@app.route('/log_viewer')
|
@app.route('/log_viewer')
|
||||||
def log_viewer():
|
def log_viewer():
|
||||||
return render_template('log_viewer.html')
|
return render_template('log_viewer.html')
|
||||||
@@ -87,42 +84,44 @@ def register_views(app):
|
|||||||
|
|
||||||
views_bp = Blueprint("views", __name__)
|
views_bp = Blueprint("views", __name__)
|
||||||
|
|
||||||
@views_bp.route("/data-visualization", methods=["GET", "POST"])
|
@views_bp.route("/analyze", methods=["GET", "POST"])
|
||||||
def data_visualization():
|
def analyze():
|
||||||
"""Route to display activity statistics with a visualization."""
|
analysis_modules = load_analysis_modules() # Load available analyses
|
||||||
data_dir = current_app.config["DATA"]["DATA_DIR"]
|
data_dir = current_app.config.get("DATA", {}).get("DATA_DIR")
|
||||||
|
|
||||||
|
selected_file = None
|
||||||
|
selected_analyses = []
|
||||||
|
|
||||||
# Find all available CSV files
|
# Find all available CSV files
|
||||||
data_files = sorted(
|
data_files = sorted(
|
||||||
glob.glob(os.path.join(data_dir, "*.csv")),
|
glob.glob(os.path.join(data_dir, "*.csv")),
|
||||||
key=os.path.getmtime,
|
key=os.path.getmtime,
|
||||||
reverse=True
|
reverse=True
|
||||||
)
|
) if data_dir else []
|
||||||
|
|
||||||
if not data_files:
|
context = {
|
||||||
return render_template("data_visualization.html", error="No data files found.", data_files=[])
|
"data_files": data_files,
|
||||||
|
"analyses": analysis_modules,
|
||||||
|
"selected_file": selected_file,
|
||||||
|
"selected_analyses": selected_analyses
|
||||||
|
}
|
||||||
|
|
||||||
# Get the selected file from the dropdown (default to the latest file)
|
if request.method == "POST":
|
||||||
selected_file = request.form.get("data_file", data_files[0] if data_files else None)
|
selected_analyses = request.form.getlist("analyses")
|
||||||
|
selected_file = request.form.get("data_file")
|
||||||
|
|
||||||
|
if not selected_file:
|
||||||
|
context["error"] = "No file selected."
|
||||||
|
return render_template("analyze.html", **context)
|
||||||
|
|
||||||
if selected_file and os.path.exists(selected_file):
|
|
||||||
df = load_data(selected_file)
|
df = load_data(selected_file)
|
||||||
statistics = generate_statistics(df)
|
results = {}
|
||||||
|
|
||||||
# ✅ Generate the plot and get the correct URL path
|
|
||||||
# remove app/ from the base URL
|
|
||||||
plot_url = plot_activity_distribution(df).replace("app/", "")
|
|
||||||
|
|
||||||
else:
|
|
||||||
return render_template("data_visualization.html", error="Invalid file selection.", data_files=data_files)
|
|
||||||
|
|
||||||
return render_template(
|
for analysis in analysis_modules:
|
||||||
"data_visualization.html",
|
if analysis.name in selected_analyses:
|
||||||
plot_url=plot_url,
|
results[analysis.name] = analysis.execute(df) # Some may return HTML
|
||||||
statistics=statistics.to_dict(),
|
|
||||||
data_files=data_files,
|
|
||||||
selected_file=selected_file
|
|
||||||
)
|
|
||||||
|
|
||||||
|
context["results"] = results
|
||||||
|
|
||||||
|
return render_template("analyze.html", **context)
|
||||||
app.register_blueprint(views_bp)
|
app.register_blueprint(views_bp)
|
||||||
|
|||||||
428
tests/analyses.ipynb
Normal file
428
tests/analyses.ipynb
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user