Browse Source

Logging-Konfiguration ausgelagert. Konfiguration ausgelagert. Fehler behoben

Markus Spring 1 năm trước cách đây
mục cha
commit
3818f43fff
3 tập tin đã thay đổi với 43 bổ sung44 xóa
  1. 3 0
      blogs-i-read_v2.ini
  2. 18 44
      blogs-i-read_v2.py
  3. 22 0
      logging_config.ini

+ 3 - 0
blogs-i-read_v2.ini

@@ -0,0 +1,3 @@
+[blogsiread]
+blogfile = blogs2.json
+cronlinksfile = cronlinks2.html

+ 18 - 44
blogs-i-read_v2.py

@@ -1,5 +1,6 @@
 #!/usr/bin/python3
 from bs4 import BeautifulSoup
+import configparser
 from datetime import datetime
 from dateutil.parser import parse
 import feedparser
@@ -10,36 +11,15 @@ import re
 import sys
 import time
 import logging
+from logging.config import fileConfig
 
-# List of URLs  and titles in json file 
-blogs_to_read  = 'blogs2.json'
-# output html file
-cronlinks_file = "cronlinks2.html"
-# ------------------------------------------ nothing to change below ---
+config = configparser.ConfigParser()
+config.read('blogs-i-read_v2.ini')
+blogs_to_read = config['blogsiread']['blogfile']
+cronlinks_file = config['blogsiread']['cronlinksfile']
 
+fileConfig('logging_config.ini')
 logger = logging.getLogger("blogs-i-read_v2")
-logger.setLevel(logging.DEBUG)
-
-# Create handlers for logging to the standard output and a file
-stdoutHandler = logging.StreamHandler(stream=sys.stdout)
-errHandler = logging.FileHandler("error.log")
-
-# Set the log levels on the handlers
-stdoutHandler.setLevel(logging.DEBUG)
-errHandler.setLevel(logging.ERROR)
-
-# Create a log format using Log Record attributes
-logfmt = logging.Formatter(
-    "%(levelname)s | %(filename)s:%(lineno)s >>> %(message)s"
-)
-
-# Set the log format on each handler
-stdoutHandler.setFormatter(logfmt)
-errHandler.setFormatter(logfmt)
-
-# Add each handler to the Logger object
-logger.addHandler(stdoutHandler)
-logger.addHandler(errHandler)
 
 with open(blogs_to_read, 'r') as blogfile:
     blogs = json.load(blogfile)
@@ -102,7 +82,7 @@ def examine_feed(url):
 
 def examine_photoplacegallery(soup, url, md5):
     (post_title, post_url, last_update) = ['', '', 0]
-    logger.debug('examine_photoplacegallery')
+    # logger.debug('examine_photoplacegallery')
     prothost    = re.search(r'^http[s]*:\/\/[\w\.]*', url).group()
     firstah3    = soup.find_all('a','h3')[0]
     post_title  = firstah3.string
@@ -140,26 +120,24 @@ def examine_generic_website(soup, url, md5):
 def examine_url(url):
     logger.debug(url)
     (md5, post_title, post_url, last_update) = ['', '', '', 0]
-    response = requests.get(url)
-    md5 = hashlib.md5(response.content).hexdigest()  # Calculate the MD5 hash
-    soup = BeautifulSoup(response.text, 'html.parser')
-    # try:
-    if True:
+    try:
+        response = requests.get(url)
+        md5 = hashlib.md5(response.content).hexdigest()  # Calculate the MD5 hash
+        soup = BeautifulSoup(response.text, 'html.parser')
+        body = soup.find('body')
+        the_contents_of_body_without_body_tags = body.findChildren(recursive=False)
+    # if True:
         if "photoplacegallery.com" in url:
             (md5, post_title, post_url, last_update) = examine_photoplacegallery(soup, url, md5)
         elif "claudioturri.it" in url:
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "picturesfromthezone" in url:
+            md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "magnumphotos" in url:
-            body = soup.find('body')
-            the_contents_of_body_without_body_tags = body.findChildren(recursive=False)
             md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
-            (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "robdeloephotography" in url:
-            body = soup.find('body')
-            the_contents_of_body_without_body_tags = body.findChildren(recursive=False)
             md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "camerawork.de" in url:
@@ -169,21 +147,17 @@ def examine_url(url):
         elif "rudyortega.com" in url:
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "donttakepictures.com" in url:
-            body = soup.find('body')
-            the_contents_of_body_without_body_tags = body.findChildren(recursive=False)
             md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "mikepeters-photography.com" in url:
-            body = soup.find('body')
-            the_contents_of_body_without_body_tags = body.findChildren(recursive=False)
             md5 = hashlib.md5(body.get_text().encode('utf-8')).hexdigest()
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         elif "zauber-allenthalben" in url:
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
         else:
             logger.info(f"needs treatment: {url}")
-    # except:
-    #     pass
+    except:
+        pass
     return md5, post_title, post_url, last_update
 
 # Function to get the title, MD5 hash of the HTML content, and the time of the last change for a given URL

+ 22 - 0
logging_config.ini

@@ -0,0 +1,22 @@
+[loggers]
+keys=root
+
+[handlers]
+keys=stream_handler
+
+[formatters]
+keys=formatter
+
+[logger_root]
+level=DEBUG
+handlers=stream_handler
+
+[handler_stream_handler]
+class=StreamHandler
+level=DEBUG
+formatter=formatter
+args=(sys.stderr,)
+
+[formatter_formatter]
+format=%(asctime)s %(name)s.%(funcName)s:%(lineno)s:  %(message)s
+#format=%(asctime)s %(lineno)-4s:  %(message)s