adds data analysis
This commit is contained in:
60
app/analysis.py
Normal file
60
app/analysis.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use("Agg") # Prevents GUI-related issues in Flask
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
|
|
||||||
|
def load_data(file_path: str) -> pd.DataFrame:
|
||||||
|
"""Loads the scraped data from a CSV file into a Pandas DataFrame."""
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
raise FileNotFoundError(f"File {file_path} not found.")
|
||||||
|
|
||||||
|
df = pd.read_csv(file_path)
|
||||||
|
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
||||||
|
df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def generate_statistics(df: pd.DataFrame):
|
||||||
|
"""Generates activity statistics grouped by hour."""
|
||||||
|
df["hour"] = df["timestamp"].dt.hour
|
||||||
|
return df.groupby("hour").size()
|
||||||
|
|
||||||
|
def plot_activity_distribution(df: pd.DataFrame, output_path="activity_distribution.png"):
|
||||||
|
"""Plots user activity distribution and saves the figure."""
|
||||||
|
|
||||||
|
# Ensure the directory exists
|
||||||
|
static_dir = os.path.join("app", "static", "plots")
|
||||||
|
output_path = os.path.join(static_dir, output_path)
|
||||||
|
os.makedirs(static_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Convert timestamp column to datetime (if not already)
|
||||||
|
if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
|
||||||
|
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
||||||
|
|
||||||
|
df["hour"] = df["timestamp"].dt.hour
|
||||||
|
activity_counts = df.groupby("hour").size().reset_index(name="count")
|
||||||
|
|
||||||
|
# Use non-GUI backend for Matplotlib
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
|
||||||
|
# Fix Seaborn Warning: Assign `hue` explicitly
|
||||||
|
sns.barplot(x="hour", y="count", data=activity_counts, hue="hour", palette="Blues", legend=False)
|
||||||
|
|
||||||
|
plt.xlabel("Hour of the Day")
|
||||||
|
plt.ylabel("Activity Count")
|
||||||
|
plt.title("User Activity Distribution")
|
||||||
|
plt.xticks(range(0, 24))
|
||||||
|
|
||||||
|
# Save the plot file safely
|
||||||
|
plt.savefig(output_path, bbox_inches="tight")
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# Verify the file exists after saving
|
||||||
|
if not os.path.exists(output_path):
|
||||||
|
raise FileNotFoundError(f"Plot could not be saved to {output_path}")
|
||||||
|
|
||||||
|
return output_path
|
||||||
@@ -6,6 +6,7 @@ from app.views import register_views
|
|||||||
from app.api import register_api
|
from app.api import register_api
|
||||||
from app.config import load_config
|
from app.config import load_config
|
||||||
from app.filters import register_filters
|
from app.filters import register_filters
|
||||||
|
from app.analysis import generate_statistics
|
||||||
|
|
||||||
def init_app():
|
def init_app():
|
||||||
config = load_config()
|
config = load_config()
|
||||||
|
|||||||
68
app/templates/data_visualization.html
Normal file
68
app/templates/data_visualization.html
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<section class="container-fluid d-flex justify-content-center">
|
||||||
|
<div class="container-md my-5 mx-2 shadow-lg p-4 ">
|
||||||
|
<div class="container-sm">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<h2>User Activity Distribution</h2>
|
||||||
|
</div>
|
||||||
|
<div class="col text-end">
|
||||||
|
<!-- Dropdown for selecting data file -->
|
||||||
|
<form method="POST" action="{{ url_for('views.data_visualization') }}">
|
||||||
|
<label for="data_file" class="form-label">Choose Data File:</label>
|
||||||
|
<select name="data_file" id="data_file" class="form-select" onchange="this.form.submit()">
|
||||||
|
{% for file in data_files %}
|
||||||
|
<option value="{{ file }}" {% if file == selected_file %}selected{% endif %}>
|
||||||
|
{{ file.split('/')[-1] }}
|
||||||
|
</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if error %}
|
||||||
|
<div class="alert alert-danger mt-3" role="alert">
|
||||||
|
{{ error }}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if plot_url %}
|
||||||
|
<div class="row mt-4">
|
||||||
|
<div class="col">
|
||||||
|
<h4>Selected File: {{ selected_file.split('/')[-1] }}</h4>
|
||||||
|
<img src="{{ plot_url }}" class="img-fluid rounded shadow" alt="User Activity Distribution">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if statistics %}
|
||||||
|
<div class="row mt-4">
|
||||||
|
<div class="col">
|
||||||
|
<h2>Activity Statistics</h2>
|
||||||
|
<table class="table table-bordered table-hover">
|
||||||
|
<thead class="table-dark">
|
||||||
|
<tr>
|
||||||
|
<th>Hour</th>
|
||||||
|
<th>Activity Count</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for hour, count in statistics.items() %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ hour }}</td>
|
||||||
|
<td>{{ count }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
{% endblock content %}
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
<div class="container-fluid">
|
<div class="container-fluid">
|
||||||
<a class="navbar-brand" href="/">Torn User Activity Scraper</a>
|
<a class="navbar-brand" href="/">Torn User Activity Scraper</a>
|
||||||
{% from 'bootstrap4/nav.html' import render_nav_item %}
|
{% from 'bootstrap4/nav.html' import render_nav_item %}
|
||||||
{{ render_nav_item('analyze', 'Analyze') }}
|
{{ render_nav_item('views.data_visualization', 'Data Visualization') }}
|
||||||
{{ render_nav_item('download_results', 'Files') }}
|
{{ render_nav_item('download_results', 'Files') }}
|
||||||
{{ render_nav_item('log_viewer', 'Logs') }}
|
{{ render_nav_item('log_viewer', 'Logs') }}
|
||||||
<div class="d-flex" id="color-mode-toggle">
|
<div class="d-flex" id="color-mode-toggle">
|
||||||
|
|||||||
50
app/views.py
50
app/views.py
@@ -1,12 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
import glob
|
import glob
|
||||||
from flask import render_template
|
from flask import render_template, Blueprint, current_app, request
|
||||||
|
|
||||||
from app.forms import ScrapingForm
|
from app.forms import ScrapingForm
|
||||||
from app.util import get_size
|
from app.util import get_size
|
||||||
from app.config import load_config
|
from app.config import load_config
|
||||||
from app.api import scraper as scraper# Import the scraper instance
|
from app.api import scraper as scraper# Import the scraper instance
|
||||||
from app.logging_config import get_logger
|
from app.logging_config import get_logger
|
||||||
|
from app.analysis import load_data, generate_statistics, plot_activity_distribution
|
||||||
|
|
||||||
|
|
||||||
from app.state import log_file_name
|
from app.state import log_file_name
|
||||||
|
|
||||||
@@ -15,6 +17,8 @@ print(f"A imported log_file_name: {log_file_name}")
|
|||||||
config = load_config()
|
config = load_config()
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
views_bp = Blueprint("views", __name__)
|
||||||
|
|
||||||
def register_views(app):
|
def register_views(app):
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
def index():
|
def index():
|
||||||
@@ -79,4 +83,46 @@ def register_views(app):
|
|||||||
|
|
||||||
files = {"data": data_files_info, "log": log_files_info}
|
files = {"data": data_files_info, "log": log_files_info}
|
||||||
|
|
||||||
return render_template('download_results.html', files=files)
|
return render_template('download_results.html', files=files)
|
||||||
|
|
||||||
|
views_bp = Blueprint("views", __name__)
|
||||||
|
|
||||||
|
@views_bp.route("/data-visualization", methods=["GET", "POST"])
|
||||||
|
def data_visualization():
|
||||||
|
"""Route to display activity statistics with a visualization."""
|
||||||
|
data_dir = current_app.config["DATA"]["DATA_DIR"]
|
||||||
|
|
||||||
|
# Find all available CSV files
|
||||||
|
data_files = sorted(
|
||||||
|
glob.glob(os.path.join(data_dir, "*.csv")),
|
||||||
|
key=os.path.getmtime,
|
||||||
|
reverse=True
|
||||||
|
)
|
||||||
|
|
||||||
|
if not data_files:
|
||||||
|
return render_template("data_visualization.html", error="No data files found.", data_files=[])
|
||||||
|
|
||||||
|
# Get the selected file from the dropdown (default to the latest file)
|
||||||
|
selected_file = request.form.get("data_file", data_files[0] if data_files else None)
|
||||||
|
|
||||||
|
if selected_file and os.path.exists(selected_file):
|
||||||
|
df = load_data(selected_file)
|
||||||
|
statistics = generate_statistics(df)
|
||||||
|
|
||||||
|
# ✅ Generate the plot and get the correct URL path
|
||||||
|
# remove app/ from the base URL
|
||||||
|
plot_url = plot_activity_distribution(df).replace("app/", "")
|
||||||
|
|
||||||
|
else:
|
||||||
|
return render_template("data_visualization.html", error="Invalid file selection.", data_files=data_files)
|
||||||
|
|
||||||
|
return render_template(
|
||||||
|
"data_visualization.html",
|
||||||
|
plot_url=plot_url,
|
||||||
|
statistics=statistics.to_dict(),
|
||||||
|
data_files=data_files,
|
||||||
|
selected_file=selected_file
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
app.register_blueprint(views_bp)
|
||||||
|
|||||||
Reference in New Issue
Block a user