Просмотр исходного кода

added cookies storage, changed md5 calculation for non-feeds

Markus Spring 1 год назад
Родитель
Сommit
dbc159ded2
1 измененных файлов с 16 добавлено и 5 удалено
  1. 16 5
      blogs-i-read_v2.py

+ 16 - 5
blogs-i-read_v2.py

@@ -149,7 +149,7 @@ def examine_generic_website(soup, url, md5):
             if md5_sums[url]['timestamp'] > 0:
                 last_update = md5_sums[url]['timestamp']
             else:
-                last_update = time.time() + 24*3600
+                last_update = time.time() - 24*3600
     else:
         last_update = time.time()
     logger.debug(last_update)
@@ -169,13 +169,24 @@ def get_default_values(url):
     return(md5, post_title, post_url, last_update)
 
 def examine_url(url):
-    logger.debug(url)
     (md5, post_title, post_url, last_update) = get_default_values(url)
+    logger.debug(url)
+    try:
+        loaded_cookies = md5_sums[url]['cookies']
+    except:
+        loaded_cookies = {}
+    response = requests.get(url, cookies=loaded_cookies)
     # if True:
     try:
-        response = requests.get(url)
-        md5 = hashlib.md5(response.content).hexdigest()  # Calculate the MD5 hash
+        logger.debug(response.cookies)
+        saved_cookies = requests.utils.dict_from_cookiejar(response.cookies)
+        cookies_json = json.dumps(saved_cookies, indent=4)
+        logger.debug(cookies_json)
+        md5_sums[url]['cookies'] = saved_cookies
         soup = BeautifulSoup(response.text, 'html.parser')
+        all_text = "".join(soup.body.get_text())
+        md5 = hashlib.sha256(all_text.encode('utf-8')).hexdigest()
+        #md5 = hashlib.md5(response.content).hexdigest()  # Calculate the MD5 hash
         body = soup.find('body')
         if 'lfi-online.de' in url:
             (md5, post_title, post_url, last_update) = examine_lfionline(soup, url, md5)
@@ -286,7 +297,7 @@ with open(cronlinks_file, "w") as cronlinks:
                 cronlinks.write("</ul>\n<li style='font-weight: bold;'>A day and older</li>\n\t<ul>\n")
             time_separator_flag = 1
         sdr = sorted_data[r]
-        cronlinks.write(f"\t<li><a href='{sdr['blog_url']}' target='_blank'>{sdr['blog_title']}</a>" +
+        cronlinks.write(f"\t<li><a href='{sdr['blog_url']}' target='_blank' title='{datetime.fromtimestamp(sdr['last_update'])}'>{sdr['blog_title']}</a>" +
                         "&nbsp;//&nbsp;" +
                         f"<a href='{sdr['post_url']}' target='_blank'>{sdr['current_title']}</a></li>\n")
         # \t<!-- {datetime.datetime.fromtimestamp(lupd)} // {epoch_time} - {lupd} = {epoch_time - lupd} :: {time_separator_flag} -->\n"