|
|
@@ -219,25 +219,42 @@ def examine_url(url):
|
|
|
logger.debug(last_update)
|
|
|
return md5, post_title, post_url, last_update
|
|
|
|
|
|
+def needs_update(url):
|
|
|
+ last_update = md5_sums[url]['timestamp']
|
|
|
+ logger.debug(f"{last_update} - {time.mktime(datetime.utcnow().timetuple())} : {last_update - time.mktime(datetime.utcnow().timetuple())}")
|
|
|
+ diff = last_update - time.mktime(datetime.utcnow().timetuple())
|
|
|
+ if diff > 3600*12:
|
|
|
+ logger.debug('need update')
|
|
|
+ return True
|
|
|
+ elif diff < 3600*3:
|
|
|
+ logger.debug('NO need update')
|
|
|
+ return False
|
|
|
+ logger.debug('need update')
|
|
|
+ return True
|
|
|
+
|
|
|
# Function to get the title, MD5 hash of the HTML content, and the time of the last change for a given URL
|
|
|
def get_url_info( blog ):
|
|
|
if 'feed' in blog.keys():
|
|
|
- (md5, post_title, post_url, last_update) = examine_feed(blog['feed'])
|
|
|
url = blog['feed']
|
|
|
else:
|
|
|
- (md5, post_title, post_url, last_update) = examine_url(blog['url'])
|
|
|
url = blog['url']
|
|
|
- logger.debug(last_update)
|
|
|
- time_diff = 0
|
|
|
-
|
|
|
+ if needs_update(url):
|
|
|
+ if 'feed' in blog.keys():
|
|
|
+ (md5, post_title, post_url, last_update) = examine_feed(blog['feed'])
|
|
|
+ else:
|
|
|
+ (md5, post_title, post_url, last_update) = examine_url(blog['url'])
|
|
|
+ else:
|
|
|
+ md5 = md5_sums[url]['md5']
|
|
|
+ post_title = md5_sums[url]['current_title']
|
|
|
+ post_url = md5_sums[url]['post_url']
|
|
|
+ last_update = md5_sums[url]['timestamp']
|
|
|
+
|
|
|
# Compare the MD5 hash with the one from the JSON file
|
|
|
if url in md5_sums and md5_sums[url]['md5'] == md5:
|
|
|
- change_status = 'No Change'
|
|
|
if md5_sums[url]['timestamp'] < 1:
|
|
|
logger.debug('correcting 0 timestamp')
|
|
|
md5_sums[url]['timestamp'] = last_update
|
|
|
else:
|
|
|
- change_status = 'Changed'
|
|
|
md5_sums[url] = { 'md5' : md5, 'timestamp' : last_update }
|
|
|
|
|
|
md5_sums[url]['post_url'] = post_url
|
|
|
@@ -249,7 +266,8 @@ def get_url_info( blog ):
|
|
|
'post_url': post_url,
|
|
|
'md5': md5,
|
|
|
'last_update': last_update }
|
|
|
-
|
|
|
+
|
|
|
+# ------------------------------------------------------------- main ---
|
|
|
filter = False
|
|
|
if len(sys.argv) > 1:
|
|
|
filter = sys.argv[1]
|