|
@@ -39,14 +39,14 @@ except:
|
|
|
# Dictionary to store the results
|
|
# Dictionary to store the results
|
|
|
results = {}
|
|
results = {}
|
|
|
|
|
|
|
|
-def examine_feed(url):
|
|
|
|
|
- (md5, post_title, post_url, last_update) = get_default_values(url)
|
|
|
|
|
|
|
+def get_feed_content(url):
|
|
|
count = 0
|
|
count = 0
|
|
|
while count <= 3:
|
|
while count <= 3:
|
|
|
count += 1
|
|
count += 1
|
|
|
#if True:
|
|
#if True:
|
|
|
try:
|
|
try:
|
|
|
- logger.debug(f"attempt {count} to read from {url}")
|
|
|
|
|
|
|
+ if count > 1:
|
|
|
|
|
+ logger.debug(f"attempt {count} to read from {url}")
|
|
|
response = requests.get(url,
|
|
response = requests.get(url,
|
|
|
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'},
|
|
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'},
|
|
|
timeout=feed_timeout )
|
|
timeout=feed_timeout )
|
|
@@ -56,13 +56,17 @@ def examine_feed(url):
|
|
|
except:
|
|
except:
|
|
|
if count == 3:
|
|
if count == 3:
|
|
|
break
|
|
break
|
|
|
- #if True:
|
|
|
|
|
- try:
|
|
|
|
|
|
|
+ return feed
|
|
|
|
|
+
|
|
|
|
|
+def examine_feed(url):
|
|
|
|
|
+ (md5, post_title, post_url, last_update) = get_default_values(url)
|
|
|
|
|
+ feed = get_feed_content(url)
|
|
|
|
|
+ if True:
|
|
|
|
|
+ #try:
|
|
|
post_title = feed.entries[0].title
|
|
post_title = feed.entries[0].title
|
|
|
post_url = feed.entries[0].link
|
|
post_url = feed.entries[0].link
|
|
|
old_md5 = hashlib.md5( post_title.encode('utf-8')
|
|
old_md5 = hashlib.md5( post_title.encode('utf-8')
|
|
|
+ feed.entries[0].updated.encode('utf-8') ).hexdigest()
|
|
+ feed.entries[0].updated.encode('utf-8') ).hexdigest()
|
|
|
- logger.debug( f"{post_title.encode('utf-8')} // {post_url.encode('utf-8')}" )
|
|
|
|
|
md5 = 'v2_' + hashlib.md5( post_title.encode('utf-8')
|
|
md5 = 'v2_' + hashlib.md5( post_title.encode('utf-8')
|
|
|
+ post_url.encode('utf-8') ).hexdigest()
|
|
+ post_url.encode('utf-8') ).hexdigest()
|
|
|
# make it dependant on change
|
|
# make it dependant on change
|
|
@@ -70,13 +74,14 @@ def examine_feed(url):
|
|
|
logger.debug('existent feed')
|
|
logger.debug('existent feed')
|
|
|
if md5_sums[url]['md5'] not in [ md5, old_md5 ]:
|
|
if md5_sums[url]['md5'] not in [ md5, old_md5 ]:
|
|
|
logger.debug(f'hashes NOT equal')
|
|
logger.debug(f'hashes NOT equal')
|
|
|
|
|
+ last_update = int(time.mktime(datetime.utcnow().timetuple())) + met_offset
|
|
|
else:
|
|
else:
|
|
|
logger.debug('newhash equal to old or new saved hashes')
|
|
logger.debug('newhash equal to old or new saved hashes')
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
else:
|
|
else:
|
|
|
logger.debug('new feed')
|
|
logger.debug('new feed')
|
|
|
- except:
|
|
|
|
|
- #if False:
|
|
|
|
|
|
|
+ #except:
|
|
|
|
|
+ if False:
|
|
|
logger.info(f'error when parsing feed {url}')
|
|
logger.info(f'error when parsing feed {url}')
|
|
|
try:
|
|
try:
|
|
|
#if True:
|
|
#if True:
|
|
@@ -86,6 +91,7 @@ def examine_feed(url):
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
except:
|
|
except:
|
|
|
pass
|
|
pass
|
|
|
|
|
+ logger.debug(f"last_update: {last_update}")
|
|
|
return md5, post_title, post_url, last_update
|
|
return md5, post_title, post_url, last_update
|
|
|
|
|
|
|
|
def examine_photoplacegallery(soup, url, md5):
|
|
def examine_photoplacegallery(soup, url, md5):
|
|
@@ -154,8 +160,8 @@ def examine_generic_website(soup, url, md5):
|
|
|
|
|
|
|
|
def get_default_values(url):
|
|
def get_default_values(url):
|
|
|
# initialize variables, suitable for new urls
|
|
# initialize variables, suitable for new urls
|
|
|
- (md5, post_title, post_url, last_update) = ['', '', '', 0]
|
|
|
|
|
- # int(time.mktime(datetime.utcnow().timetuple())) + met_offset]
|
|
|
|
|
|
|
+ (md5, post_title, post_url, last_update) = \
|
|
|
|
|
+ ['', '', '', int(time.mktime(datetime.utcnow().timetuple())) + met_offset]
|
|
|
if url in md5_sums:
|
|
if url in md5_sums:
|
|
|
# get stored values if they exist
|
|
# get stored values if they exist
|
|
|
try:
|
|
try:
|
|
@@ -164,6 +170,7 @@ def get_default_values(url):
|
|
|
post_url = md5_sums[url]['post_url']
|
|
post_url = md5_sums[url]['post_url']
|
|
|
except:
|
|
except:
|
|
|
pass
|
|
pass
|
|
|
|
|
+ logger.debug(f"last_update: {last_update}")
|
|
|
return(md5, post_title, post_url, last_update)
|
|
return(md5, post_title, post_url, last_update)
|
|
|
|
|
|
|
|
def examine_url(url):
|
|
def examine_url(url):
|