|
|
@@ -111,7 +111,7 @@ def examine_photoplacegallery(soup, url, md5):
|
|
|
return md5, post_title, post_url, last_update
|
|
|
|
|
|
def examine_lfionline(soup, url, md5):
|
|
|
- (post_title, post_url, last_update) = ['', '', 0]
|
|
|
+ (post_title, post_url, last_update) = ['', '', time.time()]
|
|
|
logger.debug('examine_lfionline')
|
|
|
all_cards = soup.find_all(name="div", class_="card")
|
|
|
for card in all_cards:
|
|
|
@@ -124,13 +124,11 @@ def examine_lfionline(soup, url, md5):
|
|
|
if md5_sums[url]['md5'] != md5:
|
|
|
logger.debug('md5 not equal')
|
|
|
md5_sums[url]['md5'] = md5
|
|
|
- last_update = time.time()
|
|
|
else:
|
|
|
logger.debug('md5 equal')
|
|
|
md5 = md5_sums[url]['md5']
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
- else:
|
|
|
- last_update = time.time()
|
|
|
+ logger.debug(f"{post_title} {post_url} {last_update}")
|
|
|
return md5, post_title, post_url, last_update
|
|
|
|
|
|
def examine_generic_website(soup, url, md5):
|
|
|
@@ -144,7 +142,7 @@ def examine_generic_website(soup, url, md5):
|
|
|
md5_sums[url]['md5'] = md5
|
|
|
last_update = time.time()
|
|
|
else:
|
|
|
- #logger.debug('md5 equal')
|
|
|
+ # logger.debug('md5 equal')
|
|
|
# logger.debug(md5_sums[url]['timestamp'])
|
|
|
if md5_sums[url]['timestamp'] > 0:
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
@@ -176,7 +174,7 @@ def examine_url(url):
|
|
|
except:
|
|
|
loaded_cookies = {}
|
|
|
response = requests.get(url, cookies=loaded_cookies)
|
|
|
- # if True:
|
|
|
+ #if True:
|
|
|
try:
|
|
|
# logger.debug(response.cookies)
|
|
|
saved_cookies = requests.utils.dict_from_cookiejar(response.cookies)
|
|
|
@@ -196,15 +194,14 @@ def examine_url(url):
|
|
|
(md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
|
|
|
except:
|
|
|
pass
|
|
|
- # logger.debug(last_update)
|
|
|
return md5, post_title, post_url, last_update
|
|
|
|
|
|
def needs_update(url):
|
|
|
if url not in md5_sums:
|
|
|
return True
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
- epoch = time.mktime(datetime.utcnow().timetuple())
|
|
|
- logger.debug(f"{last_update} - {epoch} : {(epoch - last_update)/3600}")
|
|
|
+ epoch = time.mktime(datetime.utcnow().timetuple()) + met_offset
|
|
|
+ logger.debug(f"{last_update} - {epoch} : {((epoch - last_update)/3600):.1f} hours old")
|
|
|
minute = datetime.utcfromtimestamp(epoch).minute
|
|
|
quarter = 0
|
|
|
if 15 <= minute < 30:
|
|
|
@@ -213,15 +210,25 @@ def needs_update(url):
|
|
|
quarter = 2
|
|
|
else:
|
|
|
quarter = 3
|
|
|
+
|
|
|
diff = epoch - last_update
|
|
|
- if diff > 3600*18:
|
|
|
+ if diff > 3600*24*7:
|
|
|
+ if quarter == 1:
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ return False
|
|
|
+ elif diff > 3600*18:
|
|
|
return True
|
|
|
elif diff > 3600*12:
|
|
|
if quarter % 2 == 1:
|
|
|
return True
|
|
|
+ else:
|
|
|
+ return False
|
|
|
elif diff > 3600*6:
|
|
|
if quarter == 1:
|
|
|
return True
|
|
|
+ else:
|
|
|
+ return False
|
|
|
else:
|
|
|
return False
|
|
|
|
|
|
@@ -244,16 +251,10 @@ def get_url_info( blog ):
|
|
|
post_url = md5_sums[url]['post_url']
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
|
|
|
- # Compare the MD5 hash with the one from the JSON file
|
|
|
- if url in md5_sums and md5_sums[url]['md5'] == md5:
|
|
|
- if md5_sums[url]['timestamp'] < 1:
|
|
|
- logger.debug('correcting 0 timestamp')
|
|
|
- md5_sums[url]['timestamp'] = last_update
|
|
|
- else:
|
|
|
- md5_sums[url] = { 'md5' : md5, 'timestamp' : last_update }
|
|
|
-
|
|
|
md5_sums[url]['post_url'] = post_url
|
|
|
md5_sums[url]['current_title'] = post_title
|
|
|
+ md5_sums[url]['md5'] = md5
|
|
|
+ md5_sums[url]['timestamp'] = last_update
|
|
|
|
|
|
results[url] = { 'blog_url': blog['url'],
|
|
|
'blog_title': blog['title'],
|