fixes #4 - adds modular analyses system using plugins
This commit is contained in:
34
app/analysis/__init__.py
Normal file
34
app/analysis/__init__.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import os
|
||||
import pkgutil
|
||||
import importlib
|
||||
import inspect
|
||||
from abc import ABC
|
||||
|
||||
from .base import BaseAnalysis
|
||||
|
||||
import pandas as pd
|
||||
|
||||
def load_analysis_modules():
|
||||
analysis_modules = []
|
||||
package_path = __path__[0]
|
||||
|
||||
for _, module_name, _ in pkgutil.iter_modules([package_path]):
|
||||
module = importlib.import_module(f"app.analysis.{module_name}")
|
||||
|
||||
for _, obj in inspect.getmembers(module, inspect.isclass):
|
||||
# Exclude abstract classes (like BasePlotAnalysis)
|
||||
if issubclass(obj, BaseAnalysis) and obj is not BaseAnalysis and not inspect.isabstract(obj):
|
||||
analysis_modules.append(obj()) # Instantiate only concrete classes
|
||||
|
||||
return analysis_modules
|
||||
|
||||
def load_data(file_path: str) -> pd.DataFrame:
|
||||
"""Loads the scraped data from a CSV file into a Pandas DataFrame."""
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File {file_path} not found.")
|
||||
|
||||
df = pd.read_csv(file_path)
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
||||
df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
|
||||
|
||||
return df
|
||||
11
app/analysis/base.py
Normal file
11
app/analysis/base.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import pandas as pd
|
||||
|
||||
class BaseAnalysis(ABC):
|
||||
name = "Base Analysis"
|
||||
description = "This is a base analysis module."
|
||||
|
||||
@abstractmethod
|
||||
def execute(self, df: pd.DataFrame):
|
||||
"""Run analysis on the given DataFrame"""
|
||||
pass
|
||||
54
app/analysis/basePlotAnalysis.py
Normal file
54
app/analysis/basePlotAnalysis.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from flask import url_for
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from .base import BaseAnalysis
|
||||
from app.analysis.data_utils import prepare_data, mk_plotdir
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
|
||||
# -------------------------------------------
|
||||
# Base Class for All Plot Analyses
|
||||
# -------------------------------------------
|
||||
class BasePlotAnalysis(BaseAnalysis, ABC):
|
||||
"""
|
||||
Base class for all plot-based analyses.
|
||||
It enforces a structure for:
|
||||
- Data preparation
|
||||
- Transformation
|
||||
- Plot generation
|
||||
- Memory cleanup
|
||||
"""
|
||||
|
||||
plot_filename = "default_plot.png"
|
||||
alt_text = "Default Alt Text"
|
||||
|
||||
def execute(self, df: pd.DataFrame):
|
||||
"""Executes the full analysis pipeline"""
|
||||
df = prepare_data(df) # Step 1: Prepare data
|
||||
|
||||
paths = mk_plotdir(self.plot_filename)
|
||||
self.output_path, self.plot_url = paths['output_path'], paths['plot_url']
|
||||
|
||||
df = self.transform_data(df) # Step 2: Transform data (implemented by subclass)
|
||||
self.plot_data(df) # Step 3: Create the plot
|
||||
|
||||
plt.savefig(self.output_path, bbox_inches="tight")
|
||||
plt.close()
|
||||
|
||||
del df # Step 4: Free memory
|
||||
return f'<img src="{self.plot_url}" alt="{self.note}">'
|
||||
|
||||
@abstractmethod
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Subclasses must define how they transform the data"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Subclasses must define how they generate the plot"""
|
||||
pass
|
||||
50
app/analysis/basePlotlyAnalysis.py
Normal file
50
app/analysis/basePlotlyAnalysis.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import plotly.graph_objects as go
|
||||
from flask import url_for
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from .base import BaseAnalysis
|
||||
from app.analysis.data_utils import prepare_data, mk_plotdir
|
||||
|
||||
# -------------------------------------------
|
||||
# Base Class for All Plotly Plot Analyses
|
||||
# -------------------------------------------
|
||||
class BasePlotlyAnalysis(BaseAnalysis, ABC):
|
||||
"""
|
||||
Base class for all Plotly plot-based analyses.
|
||||
It enforces a structure for:
|
||||
- Data preparation
|
||||
- Transformation
|
||||
- Plot generation
|
||||
- Memory cleanup
|
||||
"""
|
||||
|
||||
plot_filename = "default_plot.html"
|
||||
alt_text = "Default Alt Text"
|
||||
|
||||
def execute(self, df: pd.DataFrame):
|
||||
"""Executes the full analysis pipeline"""
|
||||
df = prepare_data(df) # Step 1: Prepare data
|
||||
|
||||
paths = mk_plotdir(self.plot_filename)
|
||||
self.output_path, self.plot_url = paths['output_path'], paths['plot_url']
|
||||
|
||||
df = self.transform_data(df) # Step 2: Transform data (implemented by subclass)
|
||||
self.plot_data(df) # Step 3: Create the plot
|
||||
|
||||
# Save the plot as an HTML file
|
||||
self.fig.write_html(self.output_path)
|
||||
|
||||
del df # Step 4: Free memory
|
||||
return f'<iframe src="{self.plot_url}" width="100%" height="600"></iframe>'
|
||||
|
||||
@abstractmethod
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Subclasses must define how they transform the data"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Subclasses must define how they generate the plot"""
|
||||
pass
|
||||
22
app/analysis/data_utils.py
Normal file
22
app/analysis/data_utils.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from flask import current_app, url_for
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
def prepare_data(df):
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
||||
df["last_action"] = pd.to_datetime(df["last_action"])
|
||||
df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1)
|
||||
df["was_active"] = (df["timestamp"] - df["last_action"]) <= pd.Timedelta(seconds=60)
|
||||
df["was_active"] = df["was_active"].fillna(False)
|
||||
df['hour'] = df['timestamp'].dt.hour
|
||||
return df
|
||||
|
||||
def mk_plotdir(output_filename):
|
||||
plots_dir = os.path.join(current_app.root_path, "static", "plots")
|
||||
os.makedirs(plots_dir, exist_ok=True)
|
||||
|
||||
output_path = os.path.join(plots_dir, output_filename)
|
||||
|
||||
plot_url = url_for('static', filename=f'plots/{output_filename}', _external=True)
|
||||
|
||||
return {'output_path': output_path, 'plot_url': plot_url}
|
||||
33
app/analysis/plot_bar_activity-user.py
Normal file
33
app/analysis/plot_bar_activity-user.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from .basePlotAnalysis import BasePlotAnalysis
|
||||
from flask import current_app, url_for
|
||||
|
||||
from app.logging_config import get_logger
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
class PlotTopActiveUsers(BasePlotAnalysis):
|
||||
name = "Top Active Users"
|
||||
description = "Displays the most active users based on their number of recorded actions."
|
||||
plot_filename = "bar_activity-per-user.png"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the bar plot"""
|
||||
df = df[df['was_active'] == True].groupby('name').size().reset_index(name='active_count')
|
||||
return df
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Generate bar plot"""
|
||||
# create a barplot from active counts sorted by active count
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.barplot(x='active_count', y='name', data=df.sort_values('active_count', ascending=False))
|
||||
plt.xticks(rotation=90)
|
||||
plt.title('Minutes Active')
|
||||
plt.xlabel('Player')
|
||||
plt.ylabel('Active Count')
|
||||
31
app/analysis/plot_bar_peak_hours.py
Normal file
31
app/analysis/plot_bar_peak_hours.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from .basePlotAnalysis import BasePlotAnalysis
|
||||
from flask import current_app, url_for
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
|
||||
class PlotPeakHours(BasePlotAnalysis):
|
||||
name = "Peak Hours Analysis"
|
||||
description = "Identifies peak activity hours using a bar chart."
|
||||
plot_filename = "peak_hours.png"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data to add was_active column and extract peak hours"""
|
||||
return df
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Generate bar chart for peak hours"""
|
||||
peak_hours = df[df["was_active"]]["hour"].value_counts().sort_index()
|
||||
|
||||
plt.figure(figsize=(12, 5))
|
||||
sns.barplot(x=peak_hours.index, y=peak_hours.values, hue=peak_hours.values, palette="coolwarm")
|
||||
|
||||
plt.xlabel("Hour of the Day")
|
||||
plt.ylabel("Activity Count")
|
||||
plt.title("Peak Hours of User Activity")
|
||||
plt.xticks(range(0, 24))
|
||||
33
app/analysis/plot_heat_user-activity-hour.py
Normal file
33
app/analysis/plot_heat_user-activity-hour.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from .basePlotAnalysis import BasePlotAnalysis
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
|
||||
class PlotActivityHeatmap(BasePlotAnalysis):
|
||||
name = "Activity Heatmap"
|
||||
description = "Displays user activity trends over multiple days using a heatmap. Generates a downloadable PNG image."
|
||||
plot_filename = "activity_heatmap.png"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the heatmap"""
|
||||
active_counts = df[df['was_active']].pivot_table(
|
||||
index='name',
|
||||
columns='hour',
|
||||
values='was_active',
|
||||
aggfunc='sum',
|
||||
fill_value=0
|
||||
)
|
||||
active_counts['total_active_minutes'] = active_counts.sum(axis=1)
|
||||
return active_counts.sort_values(by='total_active_minutes', ascending=False)
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Generate heatmap plot"""
|
||||
plt.figure(figsize=(12, 8))
|
||||
sns.heatmap(df.loc[:, df.columns != 'total_active_minutes'], cmap='viridis', cbar_kws={'label': 'Count of was_active == True'})
|
||||
plt.xlabel('Hour of Day')
|
||||
plt.ylabel('User ID')
|
||||
plt.title('User Activity Heatmap')
|
||||
49
app/analysis/plot_line_activity-user.py
Normal file
49
app/analysis/plot_line_activity-user.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from .basePlotAnalysis import BasePlotAnalysis
|
||||
from flask import current_app, url_for
|
||||
|
||||
from app.logging_config import get_logger
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
class PlotLineActivityAllUsers(BasePlotAnalysis):
|
||||
name = "Activity Line Graph (All Users)"
|
||||
description = "This analysis shows the activity line graph for all users. Gneerates a downloadable PNG image."
|
||||
plot_filename = "line_activity-all_users.png"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the bar plot"""
|
||||
df['hour'] = df['timestamp'].dt.hour
|
||||
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
|
||||
df['total_active_minutes'] = df.sum(axis=1)
|
||||
df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1)
|
||||
|
||||
cumulative_sum_row = df.cumsum().iloc[-1]
|
||||
df.loc['Cumulative Sum'] = cumulative_sum_row
|
||||
|
||||
return df
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
# Plot activity throughout the day for each user with the cumulative sum having a bold line in one plot
|
||||
plt.figure(figsize=(12, 6))
|
||||
|
||||
# Plot each user's activity
|
||||
for index, row in df.iterrows():
|
||||
if index == 'Cumulative Sum':
|
||||
plt.plot(row.index, row.values, label=index, linewidth=3, color='black') # Bold line for cumulative sum
|
||||
else:
|
||||
plt.plot(row.index, row.values, label=index)
|
||||
|
||||
# Add labels and title
|
||||
plt.xlabel('Hour of Day')
|
||||
plt.ylabel('Activity Count')
|
||||
plt.title('User Activity Throughout the Day')
|
||||
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
|
||||
|
||||
plt.grid(True)
|
||||
64
app/analysis/plotly_heat_user-activity.py
Normal file
64
app/analysis/plotly_heat_user-activity.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from .basePlotlyAnalysis import BasePlotlyAnalysis
|
||||
from flask import current_app, url_for
|
||||
|
||||
from app.logging_config import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
class PlotlyActivityHeatmap(BasePlotlyAnalysis):
|
||||
name = "Activity Heatmap (Interactive)"
|
||||
description = "Displays user activity trends over multiple days using an interactive heatmap."
|
||||
plot_filename = "activity_heatmap.html"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the heatmap"""
|
||||
df['hour'] = df['timestamp'].dt.hour
|
||||
active_counts = df[df['was_active']].pivot_table(
|
||||
index='name',
|
||||
columns='hour',
|
||||
values='was_active',
|
||||
aggfunc='sum',
|
||||
fill_value=0
|
||||
).reset_index()
|
||||
|
||||
# Ensure all hours are represented
|
||||
all_hours = pd.DataFrame({'hour': range(24)})
|
||||
active_counts = active_counts.melt(id_vars='name', var_name='hour', value_name='activity_count')
|
||||
active_counts = active_counts.merge(all_hours, on='hour', how='right').fillna(0)
|
||||
active_counts['hour'] = active_counts['hour'].astype(int) # Ensure hour is treated as numeric
|
||||
return active_counts
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
"""Generate heatmap plot"""
|
||||
df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0)
|
||||
|
||||
# Create a Plotly heatmap
|
||||
self.fig = go.Figure(data=go.Heatmap(
|
||||
z=df.values,
|
||||
x=df.columns,
|
||||
y=df.index,
|
||||
colorscale='Viridis',
|
||||
colorbar=dict(title='Count of was_active == True')
|
||||
))
|
||||
|
||||
# Update layout
|
||||
self.fig.update_layout(
|
||||
title='User Activity Heatmap',
|
||||
xaxis_title='Hour of Day',
|
||||
yaxis_title='User ID',
|
||||
xaxis=dict(tickmode='linear', dtick=1, range=[0, 23]), # Ensure x-axis covers all hours
|
||||
template='plotly_white'
|
||||
)
|
||||
|
||||
self.fig.update_traces(
|
||||
hovertemplate="<br>".join([
|
||||
"Hour: %{x}",
|
||||
"Name: %{y}",
|
||||
"Activity: %{z}",
|
||||
])
|
||||
)
|
||||
48
app/analysis/plotly_line_activity-user.py
Normal file
48
app/analysis/plotly_line_activity-user.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import pandas as pd
|
||||
import plotly.graph_objects as go
|
||||
from plotly.subplots import make_subplots
|
||||
from .basePlotlyAnalysis import BasePlotlyAnalysis
|
||||
from flask import current_app, url_for
|
||||
|
||||
from app.logging_config import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
|
||||
name = "Activity Line Graph (All Users, Interactive)"
|
||||
description = "This analysis shows the activity line graph for all users. The graph is interactive and can be used to explore the data."
|
||||
plot_filename = "line_activity-all_users.html"
|
||||
note = ""
|
||||
|
||||
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Transform data for the line plot"""
|
||||
df['hour'] = df['timestamp'].dt.hour
|
||||
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
|
||||
df['total_active_minutes'] = df.sum(axis=1)
|
||||
df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1)
|
||||
|
||||
cumulative_sum_row = df.cumsum().iloc[-1]
|
||||
df.loc['Cumulative Sum'] = cumulative_sum_row
|
||||
|
||||
return df
|
||||
|
||||
def plot_data(self, df: pd.DataFrame):
|
||||
# Create a Plotly figure
|
||||
self.fig = make_subplots()
|
||||
|
||||
# Plot each user's activity
|
||||
for index, row in df.iterrows():
|
||||
if index == 'Cumulative Sum':
|
||||
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index, line=dict(width=3, color='black'))) # Bold line for cumulative sum
|
||||
else:
|
||||
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index))
|
||||
|
||||
# Update layout
|
||||
self.fig.update_layout(
|
||||
title='User Activity Throughout the Day',
|
||||
xaxis_title='Hour of Day',
|
||||
yaxis_title='Activity Count',
|
||||
legend_title='User',
|
||||
legend=dict(x=1, y=1),
|
||||
template='plotly_white'
|
||||
)
|
||||
31
app/analysis/table_statistics.py
Normal file
31
app/analysis/table_statistics.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import pandas as pd
|
||||
from .base import BaseAnalysis
|
||||
from flask import render_template_string
|
||||
|
||||
class GenerateStatistics(BaseAnalysis):
|
||||
name = "Test Statistics (Placeholder)"
|
||||
description = "Generates activity statistics grouped by hour."
|
||||
|
||||
def execute(self, df: pd.DataFrame):
|
||||
df["hour"] = df["timestamp"].dt.hour
|
||||
statistics = df.groupby("hour").size().reset_index(name="count")
|
||||
|
||||
# Convert statistics DataFrame to HTML
|
||||
table_html = statistics.to_html(classes="table table-bordered table-striped")
|
||||
|
||||
# Wrap it in Bootstrap styling
|
||||
html_content = render_template_string(
|
||||
"""
|
||||
<div class="card mt-3">
|
||||
<div class="card-header">
|
||||
<h4>Activity Statistics</h4>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
{{ table_html | safe }}
|
||||
</div>
|
||||
</div>
|
||||
""",
|
||||
table_html=table_html
|
||||
)
|
||||
|
||||
return html_content
|
||||
Reference in New Issue
Block a user