feature/analysis-form #10

Merged
mischbeck merged 4 commits from feature/analysis-form into master 2025-02-10 03:11:58 +01:00
22 changed files with 1018 additions and 179 deletions
Showing only changes of commit e57869374b - Show all commits

View File

@@ -1,60 +0,0 @@
import os
import pandas as pd
import matplotlib
matplotlib.use("Agg") # Prevents GUI-related issues in Flask
import matplotlib.pyplot as plt
import seaborn as sns
def load_data(file_path: str) -> pd.DataFrame:
"""Loads the scraped data from a CSV file into a Pandas DataFrame."""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File {file_path} not found.")
df = pd.read_csv(file_path)
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
return df
def generate_statistics(df: pd.DataFrame):
"""Generates activity statistics grouped by hour."""
df["hour"] = df["timestamp"].dt.hour
return df.groupby("hour").size()
def plot_activity_distribution(df: pd.DataFrame, output_path="activity_distribution.png"):
"""Plots user activity distribution and saves the figure."""
# Ensure the directory exists
static_dir = os.path.join("app", "static", "plots")
output_path = os.path.join(static_dir, output_path)
os.makedirs(static_dir, exist_ok=True)
# Convert timestamp column to datetime (if not already)
if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
df["hour"] = df["timestamp"].dt.hour
activity_counts = df.groupby("hour").size().reset_index(name="count")
# Use non-GUI backend for Matplotlib
plt.figure(figsize=(10, 5))
# Fix Seaborn Warning: Assign `hue` explicitly
sns.barplot(x="hour", y="count", data=activity_counts, hue="hour", palette="Blues", legend=False)
plt.xlabel("Hour of the Day")
plt.ylabel("Activity Count")
plt.title("User Activity Distribution")
plt.xticks(range(0, 24))
# Save the plot file safely
plt.savefig(output_path, bbox_inches="tight")
plt.close()
# Verify the file exists after saving
if not os.path.exists(output_path):
raise FileNotFoundError(f"Plot could not be saved to {output_path}")
return output_path

34
app/analysis/__init__.py Normal file
View File

@@ -0,0 +1,34 @@
import os
import pkgutil
import importlib
import inspect
from abc import ABC
from .base import BaseAnalysis
import pandas as pd
def load_analysis_modules():
analysis_modules = []
package_path = __path__[0]
for _, module_name, _ in pkgutil.iter_modules([package_path]):
module = importlib.import_module(f"app.analysis.{module_name}")
for _, obj in inspect.getmembers(module, inspect.isclass):
# Exclude abstract classes (like BasePlotAnalysis)
if issubclass(obj, BaseAnalysis) and obj is not BaseAnalysis and not inspect.isabstract(obj):
analysis_modules.append(obj()) # Instantiate only concrete classes
return analysis_modules
def load_data(file_path: str) -> pd.DataFrame:
"""Loads the scraped data from a CSV file into a Pandas DataFrame."""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File {file_path} not found.")
df = pd.read_csv(file_path)
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
return df

11
app/analysis/base.py Normal file
View File

@@ -0,0 +1,11 @@
from abc import ABC, abstractmethod
import pandas as pd
class BaseAnalysis(ABC):
name = "Base Analysis"
description = "This is a base analysis module."
@abstractmethod
def execute(self, df: pd.DataFrame):
"""Run analysis on the given DataFrame"""
pass

View File

@@ -0,0 +1,54 @@
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from flask import url_for
from abc import ABC, abstractmethod
from .base import BaseAnalysis
from app.analysis.data_utils import prepare_data, mk_plotdir
import matplotlib
matplotlib.use('Agg')
# -------------------------------------------
# Base Class for All Plot Analyses
# -------------------------------------------
class BasePlotAnalysis(BaseAnalysis, ABC):
"""
Base class for all plot-based analyses.
It enforces a structure for:
- Data preparation
- Transformation
- Plot generation
- Memory cleanup
"""
plot_filename = "default_plot.png"
alt_text = "Default Alt Text"
def execute(self, df: pd.DataFrame):
"""Executes the full analysis pipeline"""
df = prepare_data(df) # Step 1: Prepare data
paths = mk_plotdir(self.plot_filename)
self.output_path, self.plot_url = paths['output_path'], paths['plot_url']
df = self.transform_data(df) # Step 2: Transform data (implemented by subclass)
self.plot_data(df) # Step 3: Create the plot
plt.savefig(self.output_path, bbox_inches="tight")
plt.close()
del df # Step 4: Free memory
return f'<img src="{self.plot_url}" alt="{self.note}">'
@abstractmethod
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Subclasses must define how they transform the data"""
pass
@abstractmethod
def plot_data(self, df: pd.DataFrame):
"""Subclasses must define how they generate the plot"""
pass

View File

@@ -0,0 +1,50 @@
import os
import pandas as pd
import plotly.graph_objects as go
from flask import url_for
from abc import ABC, abstractmethod
from .base import BaseAnalysis
from app.analysis.data_utils import prepare_data, mk_plotdir
# -------------------------------------------
# Base Class for All Plotly Plot Analyses
# -------------------------------------------
class BasePlotlyAnalysis(BaseAnalysis, ABC):
"""
Base class for all Plotly plot-based analyses.
It enforces a structure for:
- Data preparation
- Transformation
- Plot generation
- Memory cleanup
"""
plot_filename = "default_plot.html"
alt_text = "Default Alt Text"
def execute(self, df: pd.DataFrame):
"""Executes the full analysis pipeline"""
df = prepare_data(df) # Step 1: Prepare data
paths = mk_plotdir(self.plot_filename)
self.output_path, self.plot_url = paths['output_path'], paths['plot_url']
df = self.transform_data(df) # Step 2: Transform data (implemented by subclass)
self.plot_data(df) # Step 3: Create the plot
# Save the plot as an HTML file
self.fig.write_html(self.output_path)
del df # Step 4: Free memory
return f'<iframe src="{self.plot_url}" width="100%" height="600"></iframe>'
@abstractmethod
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Subclasses must define how they transform the data"""
pass
@abstractmethod
def plot_data(self, df: pd.DataFrame):
"""Subclasses must define how they generate the plot"""
pass

View File

@@ -0,0 +1,22 @@
from flask import current_app, url_for
import os
import pandas as pd
def prepare_data(df):
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["last_action"] = pd.to_datetime(df["last_action"])
df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1)
df["was_active"] = (df["timestamp"] - df["last_action"]) <= pd.Timedelta(seconds=60)
df["was_active"] = df["was_active"].fillna(False)
df['hour'] = df['timestamp'].dt.hour
return df
def mk_plotdir(output_filename):
plots_dir = os.path.join(current_app.root_path, "static", "plots")
os.makedirs(plots_dir, exist_ok=True)
output_path = os.path.join(plots_dir, output_filename)
plot_url = url_for('static', filename=f'plots/{output_filename}', _external=True)
return {'output_path': output_path, 'plot_url': plot_url}

View File

@@ -0,0 +1,33 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis
from flask import current_app, url_for
from app.logging_config import get_logger
import matplotlib
matplotlib.use('Agg')
logger = get_logger()
class PlotTopActiveUsers(BasePlotAnalysis):
name = "Top Active Users"
description = "Displays the most active users based on their number of recorded actions."
plot_filename = "bar_activity-per-user.png"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the bar plot"""
df = df[df['was_active'] == True].groupby('name').size().reset_index(name='active_count')
return df
def plot_data(self, df: pd.DataFrame):
"""Generate bar plot"""
# create a barplot from active counts sorted by active count
plt.figure(figsize=(10, 6))
sns.barplot(x='active_count', y='name', data=df.sort_values('active_count', ascending=False))
plt.xticks(rotation=90)
plt.title('Minutes Active')
plt.xlabel('Player')
plt.ylabel('Active Count')

View File

@@ -0,0 +1,31 @@
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis
from flask import current_app, url_for
import matplotlib
matplotlib.use('Agg')
class PlotPeakHours(BasePlotAnalysis):
name = "Peak Hours Analysis"
description = "Identifies peak activity hours using a bar chart."
plot_filename = "peak_hours.png"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data to add was_active column and extract peak hours"""
return df
def plot_data(self, df: pd.DataFrame):
"""Generate bar chart for peak hours"""
peak_hours = df[df["was_active"]]["hour"].value_counts().sort_index()
plt.figure(figsize=(12, 5))
sns.barplot(x=peak_hours.index, y=peak_hours.values, hue=peak_hours.values, palette="coolwarm")
plt.xlabel("Hour of the Day")
plt.ylabel("Activity Count")
plt.title("Peak Hours of User Activity")
plt.xticks(range(0, 24))

View File

@@ -0,0 +1,33 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis
import matplotlib
matplotlib.use('Agg')
class PlotActivityHeatmap(BasePlotAnalysis):
name = "Activity Heatmap"
description = "Displays user activity trends over multiple days using a heatmap. Generates a downloadable PNG image."
plot_filename = "activity_heatmap.png"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the heatmap"""
active_counts = df[df['was_active']].pivot_table(
index='name',
columns='hour',
values='was_active',
aggfunc='sum',
fill_value=0
)
active_counts['total_active_minutes'] = active_counts.sum(axis=1)
return active_counts.sort_values(by='total_active_minutes', ascending=False)
def plot_data(self, df: pd.DataFrame):
"""Generate heatmap plot"""
plt.figure(figsize=(12, 8))
sns.heatmap(df.loc[:, df.columns != 'total_active_minutes'], cmap='viridis', cbar_kws={'label': 'Count of was_active == True'})
plt.xlabel('Hour of Day')
plt.ylabel('User ID')
plt.title('User Activity Heatmap')

View File

@@ -0,0 +1,49 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis
from flask import current_app, url_for
from app.logging_config import get_logger
import matplotlib
matplotlib.use('Agg')
logger = get_logger()
class PlotLineActivityAllUsers(BasePlotAnalysis):
name = "Activity Line Graph (All Users)"
description = "This analysis shows the activity line graph for all users. Gneerates a downloadable PNG image."
plot_filename = "line_activity-all_users.png"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the bar plot"""
df['hour'] = df['timestamp'].dt.hour
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
df['total_active_minutes'] = df.sum(axis=1)
df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1)
cumulative_sum_row = df.cumsum().iloc[-1]
df.loc['Cumulative Sum'] = cumulative_sum_row
return df
def plot_data(self, df: pd.DataFrame):
# Plot activity throughout the day for each user with the cumulative sum having a bold line in one plot
plt.figure(figsize=(12, 6))
# Plot each user's activity
for index, row in df.iterrows():
if index == 'Cumulative Sum':
plt.plot(row.index, row.values, label=index, linewidth=3, color='black') # Bold line for cumulative sum
else:
plt.plot(row.index, row.values, label=index)
# Add labels and title
plt.xlabel('Hour of Day')
plt.ylabel('Activity Count')
plt.title('User Activity Throughout the Day')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.grid(True)

View File

@@ -0,0 +1,64 @@
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from .basePlotlyAnalysis import BasePlotlyAnalysis
from flask import current_app, url_for
from app.logging_config import get_logger
logger = get_logger()
class PlotlyActivityHeatmap(BasePlotlyAnalysis):
name = "Activity Heatmap (Interactive)"
description = "Displays user activity trends over multiple days using an interactive heatmap."
plot_filename = "activity_heatmap.html"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the heatmap"""
df['hour'] = df['timestamp'].dt.hour
active_counts = df[df['was_active']].pivot_table(
index='name',
columns='hour',
values='was_active',
aggfunc='sum',
fill_value=0
).reset_index()
# Ensure all hours are represented
all_hours = pd.DataFrame({'hour': range(24)})
active_counts = active_counts.melt(id_vars='name', var_name='hour', value_name='activity_count')
active_counts = active_counts.merge(all_hours, on='hour', how='right').fillna(0)
active_counts['hour'] = active_counts['hour'].astype(int) # Ensure hour is treated as numeric
return active_counts
def plot_data(self, df: pd.DataFrame):
"""Generate heatmap plot"""
df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0)
# Create a Plotly heatmap
self.fig = go.Figure(data=go.Heatmap(
z=df.values,
x=df.columns,
y=df.index,
colorscale='Viridis',
colorbar=dict(title='Count of was_active == True')
))
# Update layout
self.fig.update_layout(
title='User Activity Heatmap',
xaxis_title='Hour of Day',
yaxis_title='User ID',
xaxis=dict(tickmode='linear', dtick=1, range=[0, 23]), # Ensure x-axis covers all hours
template='plotly_white'
)
self.fig.update_traces(
hovertemplate="<br>".join([
"Hour: %{x}",
"Name: %{y}",
"Activity: %{z}",
])
)

View File

@@ -0,0 +1,48 @@
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from .basePlotlyAnalysis import BasePlotlyAnalysis
from flask import current_app, url_for
from app.logging_config import get_logger
logger = get_logger()
class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
name = "Activity Line Graph (All Users, Interactive)"
description = "This analysis shows the activity line graph for all users. The graph is interactive and can be used to explore the data."
plot_filename = "line_activity-all_users.html"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Transform data for the line plot"""
df['hour'] = df['timestamp'].dt.hour
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
df['total_active_minutes'] = df.sum(axis=1)
df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1)
cumulative_sum_row = df.cumsum().iloc[-1]
df.loc['Cumulative Sum'] = cumulative_sum_row
return df
def plot_data(self, df: pd.DataFrame):
# Create a Plotly figure
self.fig = make_subplots()
# Plot each user's activity
for index, row in df.iterrows():
if index == 'Cumulative Sum':
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index, line=dict(width=3, color='black'))) # Bold line for cumulative sum
else:
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index))
# Update layout
self.fig.update_layout(
title='User Activity Throughout the Day',
xaxis_title='Hour of Day',
yaxis_title='Activity Count',
legend_title='User',
legend=dict(x=1, y=1),
template='plotly_white'
)

View File

@@ -0,0 +1,31 @@
import pandas as pd
from .base import BaseAnalysis
from flask import render_template_string
class GenerateStatistics(BaseAnalysis):
name = "Test Statistics (Placeholder)"
description = "Generates activity statistics grouped by hour."
def execute(self, df: pd.DataFrame):
df["hour"] = df["timestamp"].dt.hour
statistics = df.groupby("hour").size().reset_index(name="count")
# Convert statistics DataFrame to HTML
table_html = statistics.to_html(classes="table table-bordered table-striped")
# Wrap it in Bootstrap styling
html_content = render_template_string(
"""
<div class="card mt-3">
<div class="card-header">
<h4>Activity Statistics</h4>
</div>
<div class="card-body">
{{ table_html | safe }}
</div>
</div>
""",
table_html=table_html
)
return html_content

View File

@@ -6,7 +6,7 @@ import glob
from datetime import datetime from datetime import datetime
import pandas as pd import pandas as pd
from app.models import Scraper, generate_statistics from app.models import Scraper
from app.util import create_zip, delete_old_zips, tail, get_size from app.util import create_zip, delete_old_zips, tail, get_size
from app.config import load_config from app.config import load_config
from app.logging_config import get_logger from app.logging_config import get_logger

View File

@@ -6,7 +6,6 @@ from app.views import register_views
from app.api import register_api from app.api import register_api
from app.config import load_config from app.config import load_config
from app.filters import register_filters from app.filters import register_filters
from app.analysis import generate_statistics
def init_app(): def init_app():
config = load_config() config = load_config()

View File

@@ -135,8 +135,4 @@ class Scraper:
def stop_scraping(self): def stop_scraping(self):
self.scraping_active = False self.scraping_active = False
logger.debug("Scraping stopped by user") logger.debug("Scraping stopped by user")
def generate_statistics(df):
df['hour'] = df['timestamp'].dt.hour # No need to convert timestamp again
return df.groupby('hour').size() # Activity by hour

View File

@@ -1,16 +1,100 @@
{% extends 'base.html' %} {% extends 'base.html' %}
{% block content %} {% block content %}
<section class="container-fluid d-flex justify-content-center">
<div class="container-md my-5 mx-2 shadow-lg p-4 "> <section class="container-fluid d-flex justify-content-center">
<div class="container-sm"> <div class="container-md my-5 mb-3 mx-2 shadow-lg p-4">
<div class="row"> <div class="container-sm">
<div class="col"> <div class="row">
<h2>Analyze</h2> <div class="col">
</div> <h2>User Activity Distribution</h2>
<div class="col">
</div>
</div>
</div> </div>
</div> </div>
</section> <div class="row">
{% endblock content %} <div class="col">
<form method="POST" action="{{ url_for('views.analyze') }}">
<!-- Dropdown for selecting data file -->
<label for="data_file" class="form-label">Choose Data File:</label>
<select name="data_file" id="data_file" class="form-select">
{% if data_files %}
{% for file in data_files %}
{{ file }}
{{ selected_file }}
<option value="{{ file }}" {% if file == selected_file %}selected{% endif %}>{{ file.split('/')[-1] }}</option>
{% endfor %}
{% else %}
<option disabled>No CSV files found</option>
{% endif %}
</select>
<!-- Analysis Selection Table -->
<label for="analyses" class="form-label">Select Analyses:</label>
<table class="table table-bordered table-striped">
<thead>
<tr>
<th>Select</th>
<th>Analysis Name</th>
<th>Description</th>
</tr>
</thead>
<tbody>
{% if analyses %}
{% for analysis in analyses %}
<tr>
<td>
<input class="form-check-input" type="checkbox" name="analyses" value="{{ analysis.name }}"
{% if analysis.name in selected_analyses %}checked{% endif %}>
</td>
<td>{{ analysis.name }}</td>
<td>{{ analysis.description }}</td>
</tr>
{% endfor %}
{% else %}
<tr>
<td colspan="3" class="text-center">No analyses available</td>
</tr>
{% endif %}
</tbody>
</table>
<button type="submit" class="btn btn-primary mt-3">Run Analyses</button>
</form>
</div>
</div>
{% include 'includes/error.html' %}
</div>
</div>
</section>
{% if plot_url %}
<section class="container-fluid d-flex justify-content-center">
<div class="container-md my-1 mx-2 shadow-lg p-4">
<div class="container-sm">
<div class="row mt-4">
<div class="col">
<h4>Selected File: {{ selected_file.split('/')[-1] }}</h4>
<img src="{{ plot_url }}" class="img-fluid rounded shadow" alt="User Activity Distribution">
</div>
</div>
</div>
</div>
</section>
{% endif %}
{% if results %}
{% for analysis_name, result in results.items() %}
<section class="container-fluid d-flex justify-content-center">
<div class="container-md my-2 mx-2 shadow p-4 pt-0">
<div class="container-sm">
<div class="results mt-4">
<h3>{{ analysis_name }}</h3>
<div class="analysis-output">
{{ result | safe }} <!-- This allows HTML output -->
</div>
</div>
</div>
</div>
</section>
{% endfor %}
{% endif %}
{% endblock %}

View File

@@ -1,68 +0,0 @@
{% extends 'base.html' %}
{% block content %}
<section class="container-fluid d-flex justify-content-center">
<div class="container-md my-5 mx-2 shadow-lg p-4 ">
<div class="container-sm">
<div class="row">
<div class="col">
<h2>User Activity Distribution</h2>
</div>
<div class="col text-end">
<!-- Dropdown for selecting data file -->
<form method="POST" action="{{ url_for('views.data_visualization') }}">
<label for="data_file" class="form-label">Choose Data File:</label>
<select name="data_file" id="data_file" class="form-select" onchange="this.form.submit()">
{% for file in data_files %}
<option value="{{ file }}" {% if file == selected_file %}selected{% endif %}>
{{ file.split('/')[-1] }}
</option>
{% endfor %}
</select>
</form>
</div>
</div>
{% if error %}
<div class="alert alert-danger mt-3" role="alert">
{{ error }}
</div>
{% endif %}
{% if plot_url %}
<div class="row mt-4">
<div class="col">
<h4>Selected File: {{ selected_file.split('/')[-1] }}</h4>
<img src="{{ plot_url }}" class="img-fluid rounded shadow" alt="User Activity Distribution">
</div>
</div>
{% endif %}
{% if statistics %}
<div class="row mt-4">
<div class="col">
<h2>Activity Statistics</h2>
<table class="table table-bordered table-hover">
<thead class="table-dark">
<tr>
<th>Hour</th>
<th>Activity Count</th>
</tr>
</thead>
<tbody>
{% for hour, count in statistics.items() %}
<tr>
<td>{{ hour }}</td>
<td>{{ count }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endif %}
</div>
</div>
</section>
{% endblock content %}

View File

@@ -0,0 +1,6 @@
{% if error %}
<div class="alert alert-danger alert-dismissible fade show mt-3" role="alert">
<strong>Error:</strong> {{ error }}
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
</div>
{% endif %}

View File

@@ -3,7 +3,7 @@
<div class="container-fluid"> <div class="container-fluid">
<a class="navbar-brand" href="/">Torn User Activity Scraper</a> <a class="navbar-brand" href="/">Torn User Activity Scraper</a>
{% from 'bootstrap4/nav.html' import render_nav_item %} {% from 'bootstrap4/nav.html' import render_nav_item %}
{{ render_nav_item('views.data_visualization', 'Data Visualization') }} {{ render_nav_item('views.analyze', 'Data Visualization') }}
{{ render_nav_item('download_results', 'Files') }} {{ render_nav_item('download_results', 'Files') }}
{{ render_nav_item('log_viewer', 'Logs') }} {{ render_nav_item('log_viewer', 'Logs') }}
<div class="d-flex" id="color-mode-toggle"> <div class="d-flex" id="color-mode-toggle">

View File

@@ -5,9 +5,10 @@ from flask import render_template, Blueprint, current_app, request
from app.forms import ScrapingForm from app.forms import ScrapingForm
from app.util import get_size from app.util import get_size
from app.config import load_config from app.config import load_config
from app.api import scraper as scraper# Import the scraper instance from app.api import scraper as scraper
from app.logging_config import get_logger from app.logging_config import get_logger
from app.analysis import load_data, generate_statistics, plot_activity_distribution
from app.analysis import load_data, load_analysis_modules
from app.state import log_file_name from app.state import log_file_name
@@ -29,10 +30,6 @@ def register_views(app):
def results(): def results():
return render_template('results.html') return render_template('results.html')
@app.route('/analyze')
def analyze():
return render_template('analyze.html')
@app.route('/log_viewer') @app.route('/log_viewer')
def log_viewer(): def log_viewer():
return render_template('log_viewer.html') return render_template('log_viewer.html')
@@ -87,42 +84,44 @@ def register_views(app):
views_bp = Blueprint("views", __name__) views_bp = Blueprint("views", __name__)
@views_bp.route("/data-visualization", methods=["GET", "POST"]) @views_bp.route("/analyze", methods=["GET", "POST"])
def data_visualization(): def analyze():
"""Route to display activity statistics with a visualization.""" analysis_modules = load_analysis_modules() # Load available analyses
data_dir = current_app.config["DATA"]["DATA_DIR"] data_dir = current_app.config.get("DATA", {}).get("DATA_DIR")
selected_file = None
selected_analyses = []
# Find all available CSV files # Find all available CSV files
data_files = sorted( data_files = sorted(
glob.glob(os.path.join(data_dir, "*.csv")), glob.glob(os.path.join(data_dir, "*.csv")),
key=os.path.getmtime, key=os.path.getmtime,
reverse=True reverse=True
) ) if data_dir else []
if not data_files: context = {
return render_template("data_visualization.html", error="No data files found.", data_files=[]) "data_files": data_files,
"analyses": analysis_modules,
"selected_file": selected_file,
"selected_analyses": selected_analyses
}
# Get the selected file from the dropdown (default to the latest file) if request.method == "POST":
selected_file = request.form.get("data_file", data_files[0] if data_files else None) selected_analyses = request.form.getlist("analyses")
selected_file = request.form.get("data_file")
if not selected_file:
context["error"] = "No file selected."
return render_template("analyze.html", **context)
if selected_file and os.path.exists(selected_file):
df = load_data(selected_file) df = load_data(selected_file)
statistics = generate_statistics(df) results = {}
# ✅ Generate the plot and get the correct URL path
# remove app/ from the base URL
plot_url = plot_activity_distribution(df).replace("app/", "")
else:
return render_template("data_visualization.html", error="Invalid file selection.", data_files=data_files)
return render_template( for analysis in analysis_modules:
"data_visualization.html", if analysis.name in selected_analyses:
plot_url=plot_url, results[analysis.name] = analysis.execute(df) # Some may return HTML
statistics=statistics.to_dict(),
data_files=data_files,
selected_file=selected_file
)
context["results"] = results
return render_template("analyze.html", **context)
app.register_blueprint(views_bp) app.register_blueprint(views_bp)

423
tests/analyses.ipynb Normal file

File diff suppressed because one or more lines are too long