|
|
@@ -53,8 +53,16 @@ def get_timestamp(ts):
|
|
|
sys.exit(1)
|
|
|
|
|
|
def examine_feed(url):
|
|
|
- (md5, post_title, post_url, last_update) = ['', '', '', 0]
|
|
|
+ (md5, post_title, post_url, last_update) = get_default_values(url)
|
|
|
# logger.debug(f'examine_feed {url}')
|
|
|
+ if url in md5_sums:
|
|
|
+ # get stored values if they exist
|
|
|
+ try:
|
|
|
+ last_update = md5_sums[url]['timestamp']
|
|
|
+ post_title = md5_sums[url]['post_title']
|
|
|
+ post_url = md5_sums[url]['post_url']
|
|
|
+ except:
|
|
|
+ pass
|
|
|
try:
|
|
|
#if True:
|
|
|
feed = feedparser.parse(url)
|
|
|
@@ -141,8 +149,7 @@ def examine_generic_website(soup, url, md5):
|
|
|
last_update = time.time()
|
|
|
return md5, post_title, post_url, last_update
|
|
|
|
|
|
-def examine_url(url):
|
|
|
- logger.debug(url)
|
|
|
+def get_default_values(url):
|
|
|
# initialize variables, suitable for new urls
|
|
|
(md5, post_title, post_url, last_update) = ['', '', '', 0]
|
|
|
if url in md5_sums:
|
|
|
@@ -153,6 +160,11 @@ def examine_url(url):
|
|
|
post_url = md5_sums[url]['post_url']
|
|
|
except:
|
|
|
pass
|
|
|
+ return(md5, post_title, post_url, last_update)
|
|
|
+
|
|
|
+def examine_url(url):
|
|
|
+ logger.debug(url)
|
|
|
+ (md5, post_title, post_url, last_update) = get_default_values(url)
|
|
|
try:
|
|
|
response = requests.get(url)
|
|
|
md5 = hashlib.md5(response.content).hexdigest() # Calculate the MD5 hash
|