Просмотр исходного кода

function needs_update verbessert: >7 Tage nicht upgedatedete Blogs werden nur noch 1x/h geprüft

Markus Spring 1 год назад
Родитель
Сommit
c40f42857f
1 измененных файлов с 19 добавлено и 18 удалено
  1. 19 18
      blogs-i-read_v2.py

+ 19 - 18
blogs-i-read_v2.py

@@ -111,7 +111,7 @@ def examine_photoplacegallery(soup, url, md5):
     return md5, post_title, post_url, last_update
 
 def examine_lfionline(soup, url, md5):
-    (post_title, post_url, last_update) = ['', '', 0]
+    (post_title, post_url, last_update) = ['', '', time.time()]
     logger.debug('examine_lfionline')
     all_cards = soup.find_all(name="div", class_="card")
     for card in all_cards:
@@ -124,13 +124,11 @@ def examine_lfionline(soup, url, md5):
         if md5_sums[url]['md5'] != md5:
             logger.debug('md5 not equal')
             md5_sums[url]['md5'] = md5
-            last_update = time.time()
         else:
             logger.debug('md5 equal')
             md5 = md5_sums[url]['md5']
             last_update = md5_sums[url]['timestamp']
-    else:
-        last_update = time.time()
+    logger.debug(f"{post_title} {post_url} {last_update}")
     return md5, post_title, post_url, last_update
 
 def examine_generic_website(soup, url, md5):
@@ -144,7 +142,7 @@ def examine_generic_website(soup, url, md5):
             md5_sums[url]['md5'] = md5
             last_update = time.time()
         else:
-            #logger.debug('md5 equal')
+            # logger.debug('md5 equal')
             # logger.debug(md5_sums[url]['timestamp'])
             if md5_sums[url]['timestamp'] > 0:
                 last_update = md5_sums[url]['timestamp']
@@ -176,7 +174,7 @@ def examine_url(url):
     except:
         loaded_cookies = {}
     response = requests.get(url, cookies=loaded_cookies)
-    # if True:
+    #if True:
     try:
         # logger.debug(response.cookies)
         saved_cookies = requests.utils.dict_from_cookiejar(response.cookies)
@@ -196,15 +194,14 @@ def examine_url(url):
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
     except:
         pass
-    # logger.debug(last_update)
     return md5, post_title, post_url, last_update
 
 def needs_update(url):
     if url not in md5_sums:
         return True
     last_update = md5_sums[url]['timestamp']
-    epoch = time.mktime(datetime.utcnow().timetuple())
-    logger.debug(f"{last_update} - {epoch} : {(epoch - last_update)/3600}")
+    epoch = time.mktime(datetime.utcnow().timetuple()) + met_offset 
+    logger.debug(f"{last_update} - {epoch} : {((epoch - last_update)/3600):.1f} hours old")
     minute = datetime.utcfromtimestamp(epoch).minute
     quarter = 0
     if 15 <= minute < 30:
@@ -213,15 +210,25 @@ def needs_update(url):
         quarter = 2
     else:
         quarter = 3
+        
     diff = epoch - last_update
-    if diff > 3600*18:
+    if diff > 3600*24*7:
+        if quarter == 1:
+            return True
+        else:
+            return False
+    elif diff > 3600*18:
         return True
     elif diff > 3600*12:
         if quarter % 2 == 1:
             return True
+        else:
+            return False
     elif diff > 3600*6:
         if quarter == 1:
             return True
+        else:
+            return False
     else:
         return False
 
@@ -244,16 +251,10 @@ def get_url_info( blog ):
         post_url = md5_sums[url]['post_url']
         last_update = md5_sums[url]['timestamp']
         
-    # Compare the MD5 hash with the one from the JSON file
-    if url in md5_sums and md5_sums[url]['md5'] == md5:
-        if  md5_sums[url]['timestamp'] < 1: 
-            logger.debug('correcting 0 timestamp')
-            md5_sums[url]['timestamp'] = last_update
-    else:
-        md5_sums[url] = { 'md5' : md5, 'timestamp' : last_update }
-
     md5_sums[url]['post_url'] = post_url
     md5_sums[url]['current_title'] = post_title
+    md5_sums[url]['md5'] = md5
+    md5_sums[url]['timestamp'] = last_update
     
     results[url] = { 'blog_url': blog['url'],
                      'blog_title': blog['title'],