Explorar o código

refactored examine_feed, repaired last_update for unequal hashes

Markus Spring hai 1 ano
pai
achega
0ff6900ad1
Modificáronse 1 ficheiros con 17 adicións e 10 borrados
  1. 17 10
      blogs-i-read_v2.py

+ 17 - 10
blogs-i-read_v2.py

@@ -39,14 +39,14 @@ except:
 # Dictionary to store the results
 results = {}
         
-def examine_feed(url):
-    (md5, post_title, post_url, last_update) = get_default_values(url)
+def get_feed_content(url):
     count = 0
     while count <= 3:
         count += 1
         #if True:
         try:
-            logger.debug(f"attempt {count} to read from {url}")
+            if count > 1:
+                logger.debug(f"attempt {count} to read from {url}")
             response = requests.get(url,
                 headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'},
                                     timeout=feed_timeout )
@@ -56,13 +56,17 @@ def examine_feed(url):
         except:
             if count == 3:
                 break
-    #if True:
-    try:
+    return feed
+
+def examine_feed(url):
+    (md5, post_title, post_url, last_update) = get_default_values(url)
+    feed = get_feed_content(url)
+    if True:
+    #try:
         post_title = feed.entries[0].title
         post_url = feed.entries[0].link
         old_md5 = hashlib.md5( post_title.encode('utf-8')
                                + feed.entries[0].updated.encode('utf-8') ).hexdigest()
-        logger.debug( f"{post_title.encode('utf-8')} // {post_url.encode('utf-8')}" )
         md5 = 'v2_' + hashlib.md5( post_title.encode('utf-8')
                                    + post_url.encode('utf-8') ).hexdigest()
         # make it dependant on change
@@ -70,13 +74,14 @@ def examine_feed(url):
             logger.debug('existent feed')
             if md5_sums[url]['md5'] not in [ md5, old_md5 ]:
                 logger.debug(f'hashes NOT equal')
+                last_update = int(time.mktime(datetime.utcnow().timetuple())) + met_offset
             else:
                 logger.debug('newhash equal to old or new saved hashes')
                 last_update = md5_sums[url]['timestamp']
         else:
             logger.debug('new feed')
-    except:
-    #if False:
+    #except:
+    if False:
         logger.info(f'error when parsing feed {url}')
         try:
         #if True:
@@ -86,6 +91,7 @@ def examine_feed(url):
             last_update = md5_sums[url]['timestamp']
         except:
             pass
+    logger.debug(f"last_update: {last_update}")
     return md5, post_title, post_url, last_update
 
 def examine_photoplacegallery(soup, url, md5):
@@ -154,8 +160,8 @@ def examine_generic_website(soup, url, md5):
 
 def get_default_values(url):
     # initialize variables, suitable for new urls
-    (md5, post_title, post_url, last_update) = ['', '', '', 0]
-                        # int(time.mktime(datetime.utcnow().timetuple())) + met_offset]
+    (md5, post_title, post_url, last_update) = \
+        ['', '', '', int(time.mktime(datetime.utcnow().timetuple())) + met_offset]
     if url in md5_sums:
         # get stored values if they exist
         try:
@@ -164,6 +170,7 @@ def get_default_values(url):
             post_url = md5_sums[url]['post_url']
         except:
             pass
+    logger.debug(f"last_update: {last_update}")
     return(md5, post_title, post_url, last_update)
 
 def examine_url(url):