Просмотр исходного кода

correction for timestamp with value 0

Markus Spring 1 год назад
Родитель
Сommit
73185672f3
1 измененных файлов с 22 добавлено и 16 удалено
  1. 22 16
      blogs-i-read_v2.py

+ 22 - 16
blogs-i-read_v2.py

@@ -14,10 +14,10 @@ import time
 import logging
 from logging.config import fileConfig
 
-config = configparser.ConfigParser()
-config.read('blogs-i-read_v2.ini')
-blogs_to_read = config['blogsiread']['blogfile']
-cronlinks_file = config['blogsiread']['cronlinksfile']
+appconfig = configparser.ConfigParser()
+appconfig.read('blogs-i-read_v2.ini')
+blogs_to_read = appconfig['blogsiread']['blogfile']
+cronlinks_file = appconfig['blogsiread']['cronlinksfile']
 
 fileConfig('logging_config.ini')
 logger = logging.getLogger("blogs-i-read_v2")
@@ -136,6 +136,7 @@ def examine_lfionline(soup, url, md5):
 def examine_generic_website(soup, url, md5):
     (post_title, post_url, last_update) = ['', '', 0]
     prothost    = re.search(r'^http[s]*:\/\/[\w\.]*', url).group()
+    logger.debug(url)
     if url in md5_sums:
         logger.debug(f"found {url} in md5_sums: \n\t{md5_sums[url]['md5']} vs \n\t{md5}")
         if md5_sums[url]['md5'] != md5:
@@ -144,14 +145,19 @@ def examine_generic_website(soup, url, md5):
             last_update = time.time()
         else:
             logger.debug('md5 equal')
-            last_update = md5_sums[url]['timestamp']
+            logger.debug(md5_sums[url]['timestamp'])
+            if md5_sums[url]['timestamp'] > 0:
+                last_update = md5_sums[url]['timestamp']
+            else:
+                last_update = time.time() + 24*3600
     else:
         last_update = time.time()
+    logger.debug(last_update)
     return md5, post_title, post_url, last_update
 
 def get_default_values(url):
     # initialize variables, suitable for new urls
-    (md5, post_title, post_url, last_update) = ['', '', '', 0]
+    (md5, post_title, post_url, last_update) = ['', '', '', time.time()]
     if url in md5_sums:
         # get stored values if they exist
         try:
@@ -165,13 +171,12 @@ def get_default_values(url):
 def examine_url(url):
     logger.debug(url)
     (md5, post_title, post_url, last_update) = get_default_values(url)
-    try:
+    if True:
+    # try:
         response = requests.get(url)
         md5 = hashlib.md5(response.content).hexdigest()  # Calculate the MD5 hash
         soup = BeautifulSoup(response.text, 'html.parser')
         body = soup.find('body')
-        the_contents_of_body_without_body_tags = body.findChildren(recursive=False)
-    # if True:
         if 'lfi-online.de' in url:
             (md5, post_title, post_url, last_update) = examine_lfionline(soup, url, md5)
         elif "photoplacegallery.com" in url:
@@ -179,32 +184,29 @@ def examine_url(url):
         elif "claudioturri.it" in url:
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "picturesfromthezone" in url:
-            md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "magnumphotos" in url:
-            md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "robdeloephotography" in url:
-            md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "camerawork.de" in url:
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "jeanlucfeixa" in url:
+            logger.debug(f"jeanlucfeixa in {url}")
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "rudyortega.com" in url:
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "donttakepictures.com" in url:
-            md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "mikepeters-photography.com" in url:
-            md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "zauber-allenthalben" in url:
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         else:
             logger.info(f"needs treatment: {url}")
-    except:
-        pass
+    # except:
+    #     pass
+    logger.debug(last_update)
     return md5, post_title, post_url, last_update
 
 # Function to get the title, MD5 hash of the HTML content, and the time of the last change for a given URL
@@ -215,11 +217,15 @@ def get_url_info( blog ):
     else:
         (md5, post_title, post_url, last_update) = examine_url(blog['url']) 
         url = blog['url']
+        logger.debug(last_update)
     time_diff = 0
 
     # Compare the MD5 hash with the one from the JSON file
     if url in md5_sums and md5_sums[url]['md5'] == md5:
         change_status = 'No Change'
+        if  md5_sums[url]['timestamp'] < 1: 
+            logger.debug('correcting 0 timestamp')
+            md5_sums[url]['timestamp'] = last_update
     else:
         change_status = 'Changed'
         md5_sums[url] = { 'md5' : md5, 'timestamp' : last_update }