|
|
@@ -41,14 +41,23 @@ results = {}
|
|
|
|
|
|
def examine_feed(url):
|
|
|
(md5, post_title, post_url, last_update) = get_default_values(url)
|
|
|
- try:
|
|
|
+ count = 0
|
|
|
+ while count <= 3:
|
|
|
+ count += 1
|
|
|
+ try:
|
|
|
+ logger.debug(f"attempt {count} to read from {url}")
|
|
|
+ feed = feedparser.parse(url)
|
|
|
+ break
|
|
|
+ except:
|
|
|
+ if count == 3:
|
|
|
+ break
|
|
|
# if True:
|
|
|
- feed = feedparser.parse(url)
|
|
|
+ try:
|
|
|
post_title = feed.entries[0].title
|
|
|
post_url = feed.entries[0].link
|
|
|
old_md5 = hashlib.md5( post_title.encode('utf-8')
|
|
|
+ feed.entries[0].updated.encode('utf-8') ).hexdigest()
|
|
|
- logger.debug( post_title.encode('utf-8') + post_url.encode('utf-8') )
|
|
|
+ logger.debug( f"{post_title.encode('utf-8')} // {post_url.encode('utf-8')}" )
|
|
|
md5 = 'v2_' + hashlib.md5( post_title.encode('utf-8')
|
|
|
+ post_url.encode('utf-8') ).hexdigest()
|
|
|
# make it dependant on change
|
|
|
@@ -57,12 +66,19 @@ def examine_feed(url):
|
|
|
if md5_sums[url]['md5'] not in [ md5, old_md5 ]:
|
|
|
logger.debug(f'hashes NOT equal')
|
|
|
else:
|
|
|
- logger.debug('hashes equal to old or new')
|
|
|
+ logger.debug('newhash equal to old or new saved hashes')
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
else:
|
|
|
logger.debug('new feed')
|
|
|
except:
|
|
|
logger.info(f'error when parsing feed {url}')
|
|
|
+ try:
|
|
|
+ post_title = md5_sums[url]['post_title']
|
|
|
+ post_url = md5_sums[url]['post_url']
|
|
|
+ md5 = md5_sums[url]['md5']
|
|
|
+ last_update = md5_sums[url]['timestamp']
|
|
|
+ except:
|
|
|
+ pass
|
|
|
return md5, post_title, post_url, last_update
|
|
|
|
|
|
def examine_photoplacegallery(soup, url, md5):
|
|
|
@@ -131,8 +147,8 @@ def examine_generic_website(soup, url, md5):
|
|
|
|
|
|
def get_default_values(url):
|
|
|
# initialize variables, suitable for new urls
|
|
|
- (md5, post_title, post_url, last_update) = ['', '', '',
|
|
|
- int(time.mktime(datetime.utcnow().timetuple())) + met_offset]
|
|
|
+ (md5, post_title, post_url, last_update) = ['', '', '', 0]
|
|
|
+ # int(time.mktime(datetime.utcnow().timetuple())) + met_offset]
|
|
|
if url in md5_sums:
|
|
|
# get stored values if they exist
|
|
|
try:
|
|
|
@@ -226,7 +242,9 @@ def get_url_info( blog ):
|
|
|
post_title = md5_sums[url]['current_title']
|
|
|
post_url = md5_sums[url]['post_url']
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
-
|
|
|
+
|
|
|
+ if url not in md5_sums.keys():
|
|
|
+ md5_sums[url] = {}
|
|
|
md5_sums[url]['post_url'] = post_url
|
|
|
md5_sums[url]['current_title'] = post_title
|
|
|
md5_sums[url]['md5'] = md5
|