Pārlūkot izejas kodu

introduced timeout into feed retrieval

Markus Spring 1 gadu atpakaļ
vecāks
revīzija
46efa1c309
1 mainītis faili ar 14 papildinājumiem un 7 dzēšanām
  1. 14 7
      blogs-i-read_v2.py

+ 14 - 7
blogs-i-read_v2.py

@@ -16,8 +16,9 @@ from logging.config import fileConfig
 
 appconfig = configparser.ConfigParser()
 appconfig.read('blogs-i-read_v2.ini')
-blogs_to_read = appconfig['blogsiread']['blogfile']
+blogs_to_read  = appconfig['blogsiread']['blogfile']
 cronlinks_file = appconfig['blogsiread']['cronlinksfile']
+feed_timeout   = float(appconfig['blogsiread']['feedtimeout'])
 
 fileConfig('logging_config.ini')
 logger = logging.getLogger("blogs-i-read_v2")
@@ -30,7 +31,6 @@ with open(blogs_to_read, 'r') as blogfile:
 met_offset = 3600
 md5_sums = {}
 try:
-    # Read the JSON file containing the MD5 sums
     with open('md5_sums.json', 'r') as file:
         md5_sums = json.load(file)
 except:
@@ -44,14 +44,19 @@ def examine_feed(url):
     count = 0
     while count <= 3:
         count += 1
+        #if True:
         try:
             logger.debug(f"attempt {count} to read from {url}")
-            feed = feedparser.parse(url)
+            response = requests.get(url,
+                headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'},
+                                    timeout=feed_timeout )
+            response.encoding = 'utf-8'
+            feed = feedparser.parse(response.text)
             break
         except:
             if count == 3:
                 break
-    # if True:
+    #if True:
     try:
         post_title = feed.entries[0].title
         post_url = feed.entries[0].link
@@ -71,9 +76,11 @@ def examine_feed(url):
         else:
             logger.debug('new feed')
     except:
+    #if False:
         logger.info(f'error when parsing feed {url}')
         try:
-            post_title = md5_sums[url]['post_title']
+        #if True:
+            post_title = md5_sums[url]['current_title']
             post_url = md5_sums[url]['post_url']
             md5 = md5_sums[url]['md5']
             last_update = md5_sums[url]['timestamp']
@@ -153,7 +160,7 @@ def get_default_values(url):
         # get stored values if they exist
         try:
             last_update = md5_sums[url]['timestamp']
-            post_title = md5_sums[url]['post_title']
+            post_title = md5_sums[url]['current_title']
             post_url = md5_sums[url]['post_url']
         except:
             pass
@@ -166,7 +173,7 @@ def examine_url(url):
         loaded_cookies = md5_sums[url]['cookies']
     except:
         loaded_cookies = {}
-    response = requests.get(url, cookies=loaded_cookies)
+    response = requests.get(url, cookies=loaded_cookies, timeout=feed_timeout)
     #if True:
     try:
         saved_cookies = requests.utils.dict_from_cookiejar(response.cookies)