Restructures. adds TimeSlice, ClearDupes and more comments.
This commit is contained in:
42
config.py
Normal file
42
config.py
Normal file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
'''
|
||||
Created on Wed Jun 21 13:58:42 2023
|
||||
|
||||
@author: michael
|
||||
'''
|
||||
|
||||
## Setup directories
|
||||
# WD Michael
|
||||
wd = '/home/michael/Documents/PS/Data/collectTweets/'
|
||||
# WD Server
|
||||
# wd = '/home/yunohost.multimedia/polsoc/Politics & Society/TweetCollection/'
|
||||
|
||||
# Tweet-datafile output directory
|
||||
td = 'data/tweets/'
|
||||
|
||||
# Name of file that all tweets will be written to
|
||||
file_alltweets = 'ALL-SENATORS-TWEETS.csv'
|
||||
|
||||
path_to_tweetdfs = wd + td
|
||||
|
||||
## Define Timespan
|
||||
# Format: %Y-%m-%dT%H:%M:%SZ (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)
|
||||
ts_beg = '2020-01-01T00:00:00Z' # start of scraping
|
||||
ts_end = '2023-01-03T00:00:00Z' # end of straping
|
||||
no_slices = 24 # Number of slices / time periods.
|
||||
|
||||
# Maximum tweets to be scraped by snscrape. Can be left untouched.
|
||||
maxTweets = 5000
|
||||
|
||||
|
||||
## Install snscrape from local git repo to make shure that it fits the used version.
|
||||
# If snscrape is already installed, uncomment the following lines:
|
||||
'''
|
||||
import subprocess
|
||||
os.chdir('snscrape/')
|
||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.'])
|
||||
os.chdir(wd)
|
||||
'''
|
||||
|
||||
|
||||
Reference in New Issue
Block a user