|
|
@@ -16,8 +16,9 @@ from logging.config import fileConfig
|
|
|
|
|
|
appconfig = configparser.ConfigParser()
|
|
|
appconfig.read('blogs-i-read_v2.ini')
|
|
|
-blogs_to_read = appconfig['blogsiread']['blogfile']
|
|
|
+blogs_to_read = appconfig['blogsiread']['blogfile']
|
|
|
cronlinks_file = appconfig['blogsiread']['cronlinksfile']
|
|
|
+feed_timeout = float(appconfig['blogsiread']['feedtimeout'])
|
|
|
|
|
|
fileConfig('logging_config.ini')
|
|
|
logger = logging.getLogger("blogs-i-read_v2")
|
|
|
@@ -30,7 +31,6 @@ with open(blogs_to_read, 'r') as blogfile:
|
|
|
met_offset = 3600
|
|
|
md5_sums = {}
|
|
|
try:
|
|
|
- # Read the JSON file containing the MD5 sums
|
|
|
with open('md5_sums.json', 'r') as file:
|
|
|
md5_sums = json.load(file)
|
|
|
except:
|
|
|
@@ -44,14 +44,19 @@ def examine_feed(url):
|
|
|
count = 0
|
|
|
while count <= 3:
|
|
|
count += 1
|
|
|
+ #if True:
|
|
|
try:
|
|
|
logger.debug(f"attempt {count} to read from {url}")
|
|
|
- feed = feedparser.parse(url)
|
|
|
+ response = requests.get(url,
|
|
|
+ headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'},
|
|
|
+ timeout=feed_timeout )
|
|
|
+ response.encoding = 'utf-8'
|
|
|
+ feed = feedparser.parse(response.text)
|
|
|
break
|
|
|
except:
|
|
|
if count == 3:
|
|
|
break
|
|
|
- # if True:
|
|
|
+ #if True:
|
|
|
try:
|
|
|
post_title = feed.entries[0].title
|
|
|
post_url = feed.entries[0].link
|
|
|
@@ -71,9 +76,11 @@ def examine_feed(url):
|
|
|
else:
|
|
|
logger.debug('new feed')
|
|
|
except:
|
|
|
+ #if False:
|
|
|
logger.info(f'error when parsing feed {url}')
|
|
|
try:
|
|
|
- post_title = md5_sums[url]['post_title']
|
|
|
+ #if True:
|
|
|
+ post_title = md5_sums[url]['current_title']
|
|
|
post_url = md5_sums[url]['post_url']
|
|
|
md5 = md5_sums[url]['md5']
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
@@ -153,7 +160,7 @@ def get_default_values(url):
|
|
|
# get stored values if they exist
|
|
|
try:
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
- post_title = md5_sums[url]['post_title']
|
|
|
+ post_title = md5_sums[url]['current_title']
|
|
|
post_url = md5_sums[url]['post_url']
|
|
|
except:
|
|
|
pass
|
|
|
@@ -166,7 +173,7 @@ def examine_url(url):
|
|
|
loaded_cookies = md5_sums[url]['cookies']
|
|
|
except:
|
|
|
loaded_cookies = {}
|
|
|
- response = requests.get(url, cookies=loaded_cookies)
|
|
|
+ response = requests.get(url, cookies=loaded_cookies, timeout=feed_timeout)
|
|
|
#if True:
|
|
|
try:
|
|
|
saved_cookies = requests.utils.dict_from_cookiejar(response.cookies)
|