|
|
@@ -149,7 +149,7 @@ def examine_generic_website(soup, url, md5):
|
|
|
if md5_sums[url]['timestamp'] > 0:
|
|
|
last_update = md5_sums[url]['timestamp']
|
|
|
else:
|
|
|
- last_update = time.time() + 24*3600
|
|
|
+ last_update = time.time() - 24*3600
|
|
|
else:
|
|
|
last_update = time.time()
|
|
|
logger.debug(last_update)
|
|
|
@@ -169,13 +169,24 @@ def get_default_values(url):
|
|
|
return(md5, post_title, post_url, last_update)
|
|
|
|
|
|
def examine_url(url):
|
|
|
- logger.debug(url)
|
|
|
(md5, post_title, post_url, last_update) = get_default_values(url)
|
|
|
+ logger.debug(url)
|
|
|
+ try:
|
|
|
+ loaded_cookies = md5_sums[url]['cookies']
|
|
|
+ except:
|
|
|
+ loaded_cookies = {}
|
|
|
+ response = requests.get(url, cookies=loaded_cookies)
|
|
|
# if True:
|
|
|
try:
|
|
|
- response = requests.get(url)
|
|
|
- md5 = hashlib.md5(response.content).hexdigest() # Calculate the MD5 hash
|
|
|
+ logger.debug(response.cookies)
|
|
|
+ saved_cookies = requests.utils.dict_from_cookiejar(response.cookies)
|
|
|
+ cookies_json = json.dumps(saved_cookies, indent=4)
|
|
|
+ logger.debug(cookies_json)
|
|
|
+ md5_sums[url]['cookies'] = saved_cookies
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
+ all_text = "".join(soup.body.get_text())
|
|
|
+ md5 = hashlib.sha256(all_text.encode('utf-8')).hexdigest()
|
|
|
+ #md5 = hashlib.md5(response.content).hexdigest() # Calculate the MD5 hash
|
|
|
body = soup.find('body')
|
|
|
if 'lfi-online.de' in url:
|
|
|
(md5, post_title, post_url, last_update) = examine_lfionline(soup, url, md5)
|
|
|
@@ -286,7 +297,7 @@ with open(cronlinks_file, "w") as cronlinks:
|
|
|
cronlinks.write("</ul>\n<li style='font-weight: bold;'>A day and older</li>\n\t<ul>\n")
|
|
|
time_separator_flag = 1
|
|
|
sdr = sorted_data[r]
|
|
|
- cronlinks.write(f"\t<li><a href='{sdr['blog_url']}' target='_blank'>{sdr['blog_title']}</a>" +
|
|
|
+ cronlinks.write(f"\t<li><a href='{sdr['blog_url']}' target='_blank' title='{datetime.fromtimestamp(sdr['last_update'])}'>{sdr['blog_title']}</a>" +
|
|
|
" // " +
|
|
|
f"<a href='{sdr['post_url']}' target='_blank'>{sdr['current_title']}</a></li>\n")
|
|
|
# \t<!-- {datetime.datetime.fromtimestamp(lupd)} // {epoch_time} - {lupd} = {epoch_time - lupd} :: {time_separator_flag} -->\n"
|