3 år sedan · 00a6aa9665
--- a/blogsiread.py
+++ b/blogsiread.py
@@ -1,10 +1,14 @@
 
				 #!/usr/bin/python3
			
 
				 # -*- coding: utf-8 -*-
			
 
				+# coding=utf8
			
 
				 
			
 
				-# 	$Id: blogsiread.py,v 1.8 2022/10/12 19:41:36 springm Exp springm $
			
 
				-#       $Revision: 1.8 $
			
 
				-#       $Date: 2022/10/12 19:41:36 $
			
 
				+# 	$Id: blogsiread.py,v 1.9 2022/10/12 19:56:10 springm Exp springm $
			
 
				+#       $Revision: 1.9 $
			
 
				+#       $Date: 2022/10/12 19:56:10 $
			
 
				 #       $Log: blogsiread.py,v $
			
 
				+#       Revision 1.9  2022/10/12 19:56:10  springm
			
 
				+#       Summary: coding utf-8 hinzugefuegt
			
 
				+#
			
 
				 #       Revision 1.8  2022/10/12 19:41:36  springm
			
 
				 #       Summary: lfionline zurückgestellt auf hash des gesamten html.
			
 
				 #
			
@@ -50,6 +54,8 @@ spring2life_links_url = 'http://spring2life-links.blogspot.com/'
 
				 html_file             = '/home/springm/docker/wordpress-nginx/wordpress/wp-content/themes/twentytwentyone-child-spring2life/cronlinks.html'
			
 
				 database_file         = '/home/springm/docker/wordpress-nginx/blogsiread.json'
			
 
				 loglevel              = logging.WARN
			
 
				+
			
 
				+# ------------------------------------------ nothing to change below ---
			
 
				 if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell':    # for development
			
 
				     html_file     = 'cronlinks.html'
			
 
				     database_file = 'blogsiread.json'
			
@@ -57,6 +63,7 @@ if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell':    #
 
				 timestamp             = int(time.time())
			
 
				 list_of_blogs         = {}
			
 
				 last_separator        = ''
			
 
				+re_flags              = re.MULTILINE | re.DOTALL | re.UNICODE | re.IGNORECASE
			
 
				 
			
 
				 alternative_blog_urls = { 'jlfeixa.tumblr.com' : 'www.jeanlucfeixa.com' }
			
 
				 
			
@@ -185,16 +192,19 @@ def photoplacegallery(b, domain, i):
 
				             return (q, meta_values[domain]['timestamp'])
			
 
				         return (b, meta_values[domain]['timestamp'])
			
 
				 
			
 
				-def lfionlinede(b, domain, i):
			
 
				+def lfionlinede(matchgroup, domain, i):
			
 
				     global meta_values
			
 
				     m = hashlib.sha256()
			
 
				     html = ""
			
 
				     ts = 0                      # timestamp
			
 
				-    with urllib.request.urlopen(b[1]) as response:
			
 
				+    with urllib.request.urlopen(matchgroup[1]) as response:
			
 
				         html = response.read()
			
 
				-        string2hash = f"""html"""
			
 
				-        p = re.search('titlebox30 cu-pointer[\'"] onclick=[\'"]window.location\s*=\s*[\'"](https://.*?)[\'"][\'"]>\s*<h1.*?>(.*?)</h1', html.decode('utf-8'), re.MULTILINE | re.DOTALL)
			
 
				-        # logger.debug(f"{p[2]}")
			
 
				+        logger.debug(f"{matchgroup[1]} -- {len(html.decode('utf-8'))}")
			
 
				+        # string2hash = f"""html"""
			
 
				+        regex = r"""<div class="titlebox30 cu-pointer" onclick="window.location = '(.*?)'">\s*<h1 class="typo-1">(.*?)</h1>.*?</div>""" 
			
 
				+        p = re.search( regex, html.decode('utf-8'), re_flags )
			
 
				+        string2hash = p[0]
			
 
				+        logger.debug(f"{p[0]}")
			
 
				         m.update(string2hash.encode('utf-8'))
			
 
				         hash = (m.hexdigest())
			
 
				         if not domain in meta_values: # first run
			
@@ -219,7 +229,7 @@ def lfionlinede(b, domain, i):
 
				             q[1] = meta_values[domain]['posturl']
			
 
				             q[2] = meta_values[domain]['posttitle']
			
 
				             return (q, meta_values[domain]['timestamp'])
			
 
				-        return (b, meta_values[domain]['timestamp'])
			
 
				+        return (matchgroup, meta_values[domain]['timestamp'])
			
 
				 
			
 
				 def picturesfromthezone(b, domain, i):
			
 
				     global meta_values