init
This commit is contained in:
198
.gitignore
vendored
Normal file
198
.gitignore
vendored
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
# Created by https://www.toptal.com/developers/gitignore/api/flask
|
||||||
|
# Edit at https://www.toptal.com/developers/gitignore?templates=flask
|
||||||
|
|
||||||
|
### Flask ###
|
||||||
|
instance/*
|
||||||
|
!instance/.gitignore
|
||||||
|
.webassets-cache
|
||||||
|
.env
|
||||||
|
|
||||||
|
### Flask.Python Stack ###
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
||||||
|
# End of https://www.toptal.com/developers/gitignore/api/flask
|
||||||
|
|
||||||
|
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode
|
||||||
|
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode
|
||||||
|
|
||||||
|
### VisualStudioCode ###
|
||||||
|
.vscode/*
|
||||||
|
!.vscode/settings.json
|
||||||
|
!.vscode/tasks.json
|
||||||
|
!.vscode/launch.json
|
||||||
|
!.vscode/extensions.json
|
||||||
|
!.vscode/*.code-snippets
|
||||||
|
|
||||||
|
# Local History for Visual Studio Code
|
||||||
|
.history/
|
||||||
|
|
||||||
|
# Built Visual Studio Code Extensions
|
||||||
|
*.vsix
|
||||||
|
|
||||||
|
### VisualStudioCode Patch ###
|
||||||
|
# Ignore all local history of files
|
||||||
|
.history
|
||||||
|
.ionide
|
||||||
|
|
||||||
|
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode
|
||||||
|
|
||||||
|
# Exclude data files
|
||||||
|
*.csv
|
||||||
|
config.ini
|
||||||
61
README.md
Normal file
61
README.md
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
# Torn User Activity Scraper
|
||||||
|
|
||||||
|
This project is a web application that scrapes user activity data from the Torn API and displays the results. It includes features for starting and stopping the scraping process, viewing logs, and downloading results.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Start and stop scraping user activity data
|
||||||
|
- View real-time logs
|
||||||
|
- Download data and log files
|
||||||
|
- View scraping results and statistics
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Python 3.8+
|
||||||
|
- Flask
|
||||||
|
- Flask-Bootstrap
|
||||||
|
- Flask-WTF
|
||||||
|
- Pandas
|
||||||
|
- Requests
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
1. Clone the repository:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
git clone https://github.com/yourusername/torn-user-activity-scraper.git
|
||||||
|
cd torn-user-activity-scraper
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create a virtual environment and activate it:
|
||||||
|
```sh
|
||||||
|
python3 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Install the required packages:
|
||||||
|
```sh
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Set up your configuration file:
|
||||||
|
Create a `config.ini` file in the root directory of the project by renaming `example_config.ini` with the following content:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[DEFAULT]
|
||||||
|
SECRET_KEY = your_secret_key
|
||||||
|
API_KEY = your_api_key
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. Run the Flask application:
|
||||||
|
```sh
|
||||||
|
flask run
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Open your web browser and navigate to `http://127.0.0.1:5000/`.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is licensed under the MIT License.
|
||||||
259
app.py
Normal file
259
app.py
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
from flask import Flask, request, render_template, Response, jsonify, url_for
|
||||||
|
from flask_bootstrap import Bootstrap5
|
||||||
|
from forms import ScrapingForm
|
||||||
|
import requests
|
||||||
|
import pandas as pd
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import threading
|
||||||
|
import logging
|
||||||
|
from logging.handlers import QueueHandler
|
||||||
|
from queue import Queue
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
from datetime import datetime
|
||||||
|
from flask import send_from_directory
|
||||||
|
import configparser
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
# Load configuration
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
config.read('config.ini')
|
||||||
|
|
||||||
|
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
|
||||||
|
API_KEY = config['DEFAULT']['API_KEY']
|
||||||
|
|
||||||
|
bootstrap = Bootstrap5(app)
|
||||||
|
|
||||||
|
# Initialize the logger
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(logging.DEBUG) # Adjust as needed
|
||||||
|
|
||||||
|
# Make any logger.info() call go to both the log file and the queue.
|
||||||
|
# 1) FILE HANDLER
|
||||||
|
logFile = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
|
||||||
|
file_handler = logging.FileHandler(logFile, mode='w')
|
||||||
|
file_handler.setLevel(logging.DEBUG) # or INFO, WARNING, etc.
|
||||||
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s',
|
||||||
|
datefmt='%m/%d/%Y %I:%M:%S %p')
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
# 2) QUEUE HANDLER
|
||||||
|
log_queue = Queue()
|
||||||
|
queue_handler = QueueHandler(log_queue)
|
||||||
|
queue_handler.setLevel(logging.DEBUG)
|
||||||
|
logger.addHandler(queue_handler)
|
||||||
|
|
||||||
|
# Global state
|
||||||
|
scraping_active = False
|
||||||
|
scraping_thread = None
|
||||||
|
|
||||||
|
def fetch_faction_data(faction_id):
|
||||||
|
url = f"https://api.torn.com/faction/{faction_id}?selections=&key={API_KEY}"
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.info(f"Fetched data for faction ID {faction_id}")
|
||||||
|
return response.json()
|
||||||
|
else:
|
||||||
|
logger.warning(f"Failed to fetch faction data for faction ID {faction_id}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def fetch_user_activity(user_id):
|
||||||
|
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={API_KEY}"
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to fetch user activity for user ID {user_id}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def scrape_data(faction_id, fetch_interval, run_interval):
|
||||||
|
global scraping_active
|
||||||
|
end_time = datetime.now() + timedelta(days=run_interval)
|
||||||
|
filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
|
||||||
|
|
||||||
|
while datetime.now() < end_time and scraping_active:
|
||||||
|
logger.info(f"Fetching data at {datetime.now()}")
|
||||||
|
faction_data = fetch_faction_data(faction_id)
|
||||||
|
if faction_data and 'members' in faction_data:
|
||||||
|
user_activity_data = []
|
||||||
|
for user_id, user_info in faction_data['members'].items():
|
||||||
|
user_activity = fetch_user_activity(user_id)
|
||||||
|
if user_activity:
|
||||||
|
user_activity_data.append({
|
||||||
|
'user_id': user_id,
|
||||||
|
'name': user_activity.get('name', ''),
|
||||||
|
'last_action': user_activity.get('last_action', {}).get('timestamp', 0),
|
||||||
|
'status': user_activity.get('status', {}).get('state', ''),
|
||||||
|
'timestamp': datetime.now().timestamp()
|
||||||
|
})
|
||||||
|
logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
|
||||||
|
|
||||||
|
# Append data to the file
|
||||||
|
df = pd.DataFrame(user_activity_data)
|
||||||
|
df['last_action'] = pd.to_datetime(df['last_action'], unit='s')
|
||||||
|
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
||||||
|
|
||||||
|
if not os.path.isfile(filename):
|
||||||
|
df.to_csv(filename, index=False)
|
||||||
|
else:
|
||||||
|
df.to_csv(filename, mode='a', header=False, index=False)
|
||||||
|
|
||||||
|
logger.info(f"Data appended to {filename}")
|
||||||
|
|
||||||
|
time.sleep(fetch_interval)
|
||||||
|
else:
|
||||||
|
if datetime.now() < end_time:
|
||||||
|
logger.warning(f"Scraping stopped at {datetime.now()}")
|
||||||
|
elif scraping_active == False:
|
||||||
|
logger.warning(f"Scraping stopped at {datetime.now()} due to user request")
|
||||||
|
else:
|
||||||
|
logger.error(f"Scraping stopped due to timeout at {datetime.now()}")
|
||||||
|
logger.info("Scraping completed.")
|
||||||
|
scraping_active = False
|
||||||
|
|
||||||
|
def generate_statistics(df):
|
||||||
|
df['hour'] = df['timestamp'].dt.hour
|
||||||
|
activity_by_hour = df.groupby('hour').size()
|
||||||
|
return activity_by_hour
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
form = ScrapingForm()
|
||||||
|
return render_template('index.html', form=form)
|
||||||
|
|
||||||
|
@app.route('/start_scraping', methods=['POST'])
|
||||||
|
def start_scraping():
|
||||||
|
global scraping_active, scraping_thread
|
||||||
|
form = ScrapingForm()
|
||||||
|
if form.validate_on_submit():
|
||||||
|
if scraping_active:
|
||||||
|
logger.warning("Can't start scraping process: scraping already in progress")
|
||||||
|
return jsonify({"status": "Scraping already in progress"})
|
||||||
|
|
||||||
|
scraping_active = True
|
||||||
|
|
||||||
|
faction_id = form.faction_id.data
|
||||||
|
fetch_interval = form.fetch_interval.data
|
||||||
|
run_interval = form.run_interval.data
|
||||||
|
|
||||||
|
# Start scraping in a separate thread
|
||||||
|
scraping_thread = threading.Thread(target=scrape_data, args=(faction_id, fetch_interval, run_interval))
|
||||||
|
scraping_thread.daemon = True
|
||||||
|
scraping_thread.start()
|
||||||
|
|
||||||
|
return jsonify({"status": "Scraping started"})
|
||||||
|
return jsonify({"status": "Invalid form data"})
|
||||||
|
|
||||||
|
@app.route('/stop_scraping', methods=['POST'])
|
||||||
|
def stop_scraping():
|
||||||
|
global scraping_active
|
||||||
|
if not scraping_active:
|
||||||
|
return jsonify({"status": "No scraping in progress"})
|
||||||
|
|
||||||
|
scraping_active = False
|
||||||
|
logger.debug("scraping_active set to False")
|
||||||
|
return jsonify({"status": "Scraping stopped"})
|
||||||
|
|
||||||
|
@app.route('/scraping_status', methods=['GET'])
|
||||||
|
def scraping_status():
|
||||||
|
global scraping_active
|
||||||
|
logger.debug(f"scraping_status called: scraping_active = {scraping_active}")
|
||||||
|
return jsonify({"scraping_active": scraping_active})
|
||||||
|
|
||||||
|
@app.route('/logs')
|
||||||
|
def logs():
|
||||||
|
def generate():
|
||||||
|
while True:
|
||||||
|
if not log_queue.empty():
|
||||||
|
log = log_queue.get().getMessage()
|
||||||
|
yield f"data: {log}\n\n"
|
||||||
|
time.sleep(0.1)
|
||||||
|
return Response(generate(), mimetype='text/event-stream')
|
||||||
|
|
||||||
|
@app.route('/logfile', methods=['GET'])
|
||||||
|
def logfile():
|
||||||
|
lines = int(request.args.get('lines', 100)) # Number of lines to read
|
||||||
|
log_file_path = logFile # Path to the current log file
|
||||||
|
|
||||||
|
if not os.path.isfile(log_file_path):
|
||||||
|
return jsonify({"error": "Log file not found"}), 404
|
||||||
|
|
||||||
|
with open(log_file_path, 'r') as file:
|
||||||
|
log_lines = file.readlines()
|
||||||
|
|
||||||
|
return jsonify({"log": log_lines[-lines:]})
|
||||||
|
|
||||||
|
@app.route('/results')
|
||||||
|
def results():
|
||||||
|
# Assuming the scraping is done and data is saved somewhere
|
||||||
|
faction_id = request.args.get('faction_id')
|
||||||
|
filename = f"data/{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
|
||||||
|
if os.path.isfile(filename):
|
||||||
|
df = pd.read_csv(filename)
|
||||||
|
stats = generate_statistics(df)
|
||||||
|
return render_template('results.html', stats=stats.to_dict())
|
||||||
|
else:
|
||||||
|
return "No data found."
|
||||||
|
|
||||||
|
@app.route('/download_results')
|
||||||
|
def download_results():
|
||||||
|
data_files = glob.glob("data/*.csv")
|
||||||
|
log_files = glob.glob("log/*.log")
|
||||||
|
|
||||||
|
def get_file_info(file_path):
|
||||||
|
return {
|
||||||
|
"name": file_path,
|
||||||
|
"last_modified": os.path.getmtime(file_path),
|
||||||
|
"created": os.path.getctime(file_path),
|
||||||
|
"size": get_size(file_path)
|
||||||
|
}
|
||||||
|
|
||||||
|
data_files_info = [get_file_info(file) for file in data_files]
|
||||||
|
log_files_info = [get_file_info(file) for file in log_files]
|
||||||
|
|
||||||
|
files = {"data": data_files_info, "log": log_files_info}
|
||||||
|
return render_template('download_results.html', files=files)
|
||||||
|
|
||||||
|
@app.route('/delete_file', methods=['POST'])
|
||||||
|
def delete_file():
|
||||||
|
file_path = request.form.get('file_path')
|
||||||
|
|
||||||
|
if not file_path or not os.path.isfile(file_path):
|
||||||
|
return jsonify({"error": "File not found"}), 404
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.remove(file_path)
|
||||||
|
return jsonify({"success": True}), 200
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"error": str(e)}), 500
|
||||||
|
|
||||||
|
@app.template_filter('datetimeformat')
|
||||||
|
def datetimeformat(value):
|
||||||
|
return datetime.fromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
def get_size(path):
|
||||||
|
size = os.path.getsize(path)
|
||||||
|
if size < 1024:
|
||||||
|
return f"{size} bytes"
|
||||||
|
elif size < pow(1024,2):
|
||||||
|
return f"{round(size/1024, 2)} KB"
|
||||||
|
elif size < pow(1024,3):
|
||||||
|
return f"{round(size/(pow(1024,2)), 2)} MB"
|
||||||
|
elif size < pow(1024,4):
|
||||||
|
return f"{round(size/(pow(1024,3)), 2)} GB"
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/data/<path:filename>')
|
||||||
|
def download_data_file(filename):
|
||||||
|
return send_from_directory('data', filename)
|
||||||
|
|
||||||
|
@app.route('/logs/<path:filename>')
|
||||||
|
def download_log_file(filename):
|
||||||
|
return send_from_directory('logs', filename)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True, threaded=True)
|
||||||
3
example_config.ini
Normal file
3
example_config.ini
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[DEFAULT]
|
||||||
|
SECRET_KEY = your_secret_key
|
||||||
|
API_KEY = your_api_key
|
||||||
9
forms.py
Normal file
9
forms.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from flask_wtf import FlaskForm
|
||||||
|
from wtforms import StringField, IntegerField, SubmitField
|
||||||
|
from wtforms.validators import DataRequired
|
||||||
|
|
||||||
|
class ScrapingForm(FlaskForm):
|
||||||
|
faction_id = StringField('Faction ID', validators=[DataRequired()], default='9686')
|
||||||
|
fetch_interval = IntegerField('Fetch Interval (seconds)', validators=[DataRequired()], default=60)
|
||||||
|
run_interval = IntegerField('Run Interval (days)', validators=[DataRequired()], default=1)
|
||||||
|
submit = SubmitField('Start Scraping')
|
||||||
87
requirements.txt
Normal file
87
requirements.txt
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
appdirs==1.4.4
|
||||||
|
application-utility==1.3.3
|
||||||
|
attrs==23.2.1.dev0
|
||||||
|
autocommand==2.2.2
|
||||||
|
beautifulsoup4==4.12.3
|
||||||
|
btrfsutil==6.12
|
||||||
|
CacheControl==0.14.1
|
||||||
|
cachetools==5.5.0
|
||||||
|
certifi==2024.8.30
|
||||||
|
cffi==1.17.1
|
||||||
|
chardet==5.2.0
|
||||||
|
charset-normalizer==3.4.0
|
||||||
|
contourpy==1.3.1
|
||||||
|
coverage==7.6.8
|
||||||
|
cryptography==43.0.3
|
||||||
|
cssselect==1.2.0
|
||||||
|
cupshelpers==1.0
|
||||||
|
cycler==0.12.1
|
||||||
|
dbus-python==1.3.2
|
||||||
|
distro==1.9.0
|
||||||
|
docopt==0.6.2
|
||||||
|
filelock==3.16.1
|
||||||
|
fonttools==4.55.3
|
||||||
|
idna==3.10
|
||||||
|
inputs==0.5
|
||||||
|
jaraco.collections==5.0.1
|
||||||
|
jaraco.context==5.3.0
|
||||||
|
jaraco.functools==4.0.2
|
||||||
|
jaraco.text==4.0.0
|
||||||
|
keyutils==0.6
|
||||||
|
kiwisolver==1.4.5
|
||||||
|
lit==18.1.8.dev0
|
||||||
|
lockfile==0.12.2
|
||||||
|
lxml==5.3.0
|
||||||
|
Markdown==3.7
|
||||||
|
matplotlib==3.9.3
|
||||||
|
meson==1.6.0
|
||||||
|
moddb==0.11.0
|
||||||
|
more-itertools==10.3.0
|
||||||
|
msgpack==1.0.5
|
||||||
|
netsnmp-python==1.0a1
|
||||||
|
nftables==0.1
|
||||||
|
npyscreen==4.10.5
|
||||||
|
numpy==2.2.0
|
||||||
|
packaging==24.2
|
||||||
|
pacman_mirrors==4.27
|
||||||
|
pillow==11.0.0
|
||||||
|
platformdirs==4.3.6
|
||||||
|
ply==3.11
|
||||||
|
ProtonUp-Qt==2.10.0
|
||||||
|
pspdfutils==3.3.6
|
||||||
|
psutil==6.1.0
|
||||||
|
puremagic==1.28
|
||||||
|
pyaml==24.9.0
|
||||||
|
pycairo==1.27.0
|
||||||
|
pycparser==2.22
|
||||||
|
pycryptodomex==3.21.0
|
||||||
|
pycups==2.0.4
|
||||||
|
Pygments==2.18.0
|
||||||
|
PyGObject==3.50.0
|
||||||
|
pyparsing==3.1.2
|
||||||
|
pypdf==5.1.0
|
||||||
|
PyQt5==5.15.11
|
||||||
|
PyQt5_sip==12.16.1
|
||||||
|
pyserial==3.5
|
||||||
|
PySide6==6.8.1
|
||||||
|
pysmbc==1.0.25.1
|
||||||
|
python-dateutil==2.9.0
|
||||||
|
pyxdg==0.28
|
||||||
|
PyYAML==6.0.2
|
||||||
|
reportlab==4.2.2
|
||||||
|
requests==2.32.3
|
||||||
|
scour==0.38.2
|
||||||
|
setuptools==75.2.0
|
||||||
|
shiboken6==6.8.1
|
||||||
|
shiboken6-generator==6.8.1
|
||||||
|
six==1.16.0
|
||||||
|
smbus==1.1
|
||||||
|
soupsieve==2.6
|
||||||
|
steam==1.6.1
|
||||||
|
TBB==0.2
|
||||||
|
tqdm==4.67.1
|
||||||
|
udiskie==2.5.3
|
||||||
|
urllib3==1.26.20
|
||||||
|
vdf==4.0
|
||||||
|
wheel==0.45.0
|
||||||
|
zstandard==0.22.0
|
||||||
106
static/app.js
Normal file
106
static/app.js
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
|
const form = document.getElementById('scrapingForm');
|
||||||
|
const stopButton = document.getElementById('stopButton');
|
||||||
|
const logsElement = document.getElementById('logs');
|
||||||
|
const prevPageButton = document.getElementById('prevPage');
|
||||||
|
const nextPageButton = document.getElementById('nextPage');
|
||||||
|
let currentPage = 0;
|
||||||
|
const linesPerPage = 50;
|
||||||
|
let autoRefreshInterval;
|
||||||
|
|
||||||
|
console.log('Form:', form);
|
||||||
|
console.log('Submit button:', form.querySelector('button[type="submit"]'));
|
||||||
|
|
||||||
|
const fetchLogs = (page) => {
|
||||||
|
fetch(`/logfile?lines=${linesPerPage * (page + 1)}`)
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
if (data.error) {
|
||||||
|
logsElement.textContent = data.error;
|
||||||
|
} else {
|
||||||
|
// Reverse the order of log lines
|
||||||
|
const reversedLogs = data.log.reverse();
|
||||||
|
logsElement.textContent = reversedLogs.join('');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const startAutoRefresh = () => {
|
||||||
|
autoRefreshInterval = setInterval(() => {
|
||||||
|
fetchLogs(currentPage);
|
||||||
|
}, 5000); // Refresh every 5 seconds
|
||||||
|
};
|
||||||
|
|
||||||
|
const stopAutoRefresh = () => {
|
||||||
|
clearInterval(autoRefreshInterval);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check scraping status on page load
|
||||||
|
fetch('/scraping_status')
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
if (data.scraping_active) {
|
||||||
|
startButton.disabled = true;
|
||||||
|
stopButton.disabled = false;
|
||||||
|
startAutoRefresh(); // Start auto-refresh if scraping is active
|
||||||
|
} else {
|
||||||
|
startButton.disabled = false;
|
||||||
|
stopButton.disabled = true;
|
||||||
|
}
|
||||||
|
fetchLogs(currentPage);
|
||||||
|
});
|
||||||
|
|
||||||
|
prevPageButton.addEventListener('click', () => {
|
||||||
|
if (currentPage > 0) {
|
||||||
|
currentPage--;
|
||||||
|
fetchLogs(currentPage);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
nextPageButton.addEventListener('click', () => {
|
||||||
|
currentPage++;
|
||||||
|
fetchLogs(currentPage);
|
||||||
|
});
|
||||||
|
|
||||||
|
form.addEventListener('submit', function(e) {
|
||||||
|
e.preventDefault();
|
||||||
|
const formData = new FormData(this);
|
||||||
|
|
||||||
|
fetch('/start_scraping', {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData
|
||||||
|
}).then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
console.log(data);
|
||||||
|
const submitButton = form.querySelector('button[type="submit"]');
|
||||||
|
if (data.status === "Scraping started") {
|
||||||
|
if (submitButton) {
|
||||||
|
submitButton.disabled = true;
|
||||||
|
}
|
||||||
|
stopButton.disabled = false;
|
||||||
|
startAutoRefresh(); // Start auto-refresh when scraping starts
|
||||||
|
} else {
|
||||||
|
// Handle errors or other statuses
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
stopButton.addEventListener('click', function() {
|
||||||
|
fetch('/stop_scraping', {
|
||||||
|
method: 'POST'
|
||||||
|
}).then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
console.log(data);
|
||||||
|
const submitButton = form.querySelector('button[type="submit"]');
|
||||||
|
if (data.status === "Scraping stopped") {
|
||||||
|
if (submitButton) {
|
||||||
|
submitButton.disabled = false;
|
||||||
|
}
|
||||||
|
stopButton.disabled = true;
|
||||||
|
stopAutoRefresh(); // Stop auto-refresh when scraping stops
|
||||||
|
} else {
|
||||||
|
// Handle errors or other statuses
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
0
static/style.css
Normal file
0
static/style.css
Normal file
23
templates/base.html
Normal file
23
templates/base.html
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Your page title</title>
|
||||||
|
<!-- Required meta tags -->
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||||
|
{{ bootstrap.load_css() }}
|
||||||
|
<link rel="stylesheet" href="{{url_for('.static', filename='styles.css')}}">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>Torn User Activity Scraper</h1>
|
||||||
|
<nav class="navbar navbar-expand-lg navbar-light bg-light">
|
||||||
|
<div class="navbar-nav mr-auto">
|
||||||
|
{% from 'bootstrap4/nav.html' import render_nav_item %}
|
||||||
|
{{ render_nav_item('index', 'Home') }}
|
||||||
|
{{ render_nav_item('results', 'Results') }}
|
||||||
|
{{ render_nav_item('download_results', 'Download Results') }}
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
</header>
|
||||||
52
templates/download_results.html
Normal file
52
templates/download_results.html
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Your page title</title>
|
||||||
|
<!-- Required meta tags -->
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||||
|
{{ bootstrap.load_css() }}
|
||||||
|
<link rel="stylesheet" href="{{url_for('.static', filename='styles.css')}}">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>Torn User Activity Scraper</h1>
|
||||||
|
<nav class="navbar navbar-expand-lg navbar-light bg-light">
|
||||||
|
<div class="navbar-nav mr-auto">
|
||||||
|
{% from 'bootstrap4/nav.html' import render_nav_item %}
|
||||||
|
{{ render_nav_item('index', 'Home') }}
|
||||||
|
{{ render_nav_item('results', 'Results') }}
|
||||||
|
{{ render_nav_item('download_results', 'Download Results') }}
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<main>
|
||||||
|
<section id="scrapingFormContainer" class="container-fluid d-flex justify-content-center">
|
||||||
|
<div class="container-md my-5 mx-2 shadow-lg p-4 ">
|
||||||
|
<h2>Available Files</h2>
|
||||||
|
<table class="table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<td>Data</td>
|
||||||
|
<td>Logs</td>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for file in files %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="{{ url_for('download_results', filename=file) }}">{{ file }}</a></td>
|
||||||
|
<td><a href="{{ url_for('download_logs', filename=file) }}">{{ file }}</a></td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
{% block scripts %}
|
||||||
|
{{ bootstrap.load_js() }}
|
||||||
|
<script src="{{url_for('.static', filename='app.js')}}"></script>
|
||||||
|
{% endblock %}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
64
templates/index.html
Normal file
64
templates/index.html
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Your page title</title>
|
||||||
|
<!-- Required meta tags -->
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||||
|
{{ bootstrap.load_css() }}
|
||||||
|
<link rel="stylesheet" href="{{url_for('.static', filename='styles.css')}}">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>Torn User Activity Scraper</h1>
|
||||||
|
<nav class="navbar navbar-expand-lg navbar-light bg-light">
|
||||||
|
<div class="navbar-nav mr-auto">
|
||||||
|
{% from 'bootstrap4/nav.html' import render_nav_item %}
|
||||||
|
{{ render_nav_item('index', 'Home') }}
|
||||||
|
{{ render_nav_item('results', 'Results') }}
|
||||||
|
{{ render_nav_item('download_results', 'Download Results') }}
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<section id="scrapingFormContainer" class="container-fluid d-flex justify-content-center">
|
||||||
|
<div class="container-md my-5 mx-2 shadow-lg p-4 ">
|
||||||
|
<h2>Config</h2>
|
||||||
|
<form id="scrapingForm" method="POST" action="{{ url_for('start_scraping') }}">
|
||||||
|
{{ form.hidden_tag() }}
|
||||||
|
<div class="form-group">
|
||||||
|
{{ form.faction_id.label(class="form-control-label") }}
|
||||||
|
{{ form.faction_id(class="form-control") }}
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
{{ form.fetch_interval.label(class="form-control-label") }}
|
||||||
|
{{ form.fetch_interval(class="form-control") }}
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
{{ form.run_interval.label(class="form-control-label") }}
|
||||||
|
{{ form.run_interval(class="form-control") }}
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
{{ form.submit(class="btn btn-primary", type="submit", id="startButton") }}
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
<button id="stopButton" class="btn btn-primary">Stop Scraping</button>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section id="resultsContainer" class="container-fluid d-flex justify-content-center">
|
||||||
|
<div class="container-md my-5 mx-2 shadow-lg p-4">
|
||||||
|
<h2>Logs</h2>
|
||||||
|
<button id="prevPage">Previous</button>
|
||||||
|
<button id="nextPage">Next</button>
|
||||||
|
<pre class="pre-scrollable"><code id="logs"></code></pre>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
{% block scripts %}
|
||||||
|
{{ bootstrap.load_js() }}
|
||||||
|
<script src="{{url_for('.static', filename='app.js')}}"></script>
|
||||||
|
{% endblock %}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
22
templates/results.html
Normal file
22
templates/results.html
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Scraping Results</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>User Activity Statistics</h1>
|
||||||
|
<table border="1">
|
||||||
|
<tr>
|
||||||
|
<th>Hour</th>
|
||||||
|
<th>Activity Count</th>
|
||||||
|
</tr>
|
||||||
|
{% for hour, count in stats.items() %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ hour }}</td>
|
||||||
|
<td>{{ count }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user