%!s(int64=3) %!d(string=hai) anos · 3fb1685925
--- a/blogsiread.py
+++ b/blogsiread.py
@@ -1,9 +1,12 @@
 
				 #!/usr/bin/python3
			
 
				 
			
 
				-# 	$Id: blogsiread.py,v 1.4 2022/09/29 04:42:00 springm Exp springm $
			
 
				-#       $Revision: 1.4 $
			
 
				-#       $Date: 2022/09/29 04:42:00 $
			
 
				+# 	$Id: blogsiread.py,v 1.5 2022/10/01 11:36:32 springm Exp springm $
			
 
				+#       $Revision: 1.5 $
			
 
				+#       $Date: 2022/10/01 11:36:32 $
			
 
				 #       $Log: blogsiread.py,v $
			
 
				+#       Revision 1.5  2022/10/01 11:36:32  springm
			
 
				+#       Summary: Works
			
 
				+#
			
 
				 #       Revision 1.4  2022/09/29 04:42:00  springm
			
 
				 #       Summary: works, but LFI gets on top too often
			
 
				 #
			
@@ -11,8 +14,6 @@
 
				 #       Summary: photoplacegallery hash jetzt vom Titel der ersten Ausstellung
			
 
				 #
			
 
				 
			
 
				-
			
 
				-
			
 
				 """
			
 
				 * if yes
			
 
				   * read the spring2life linklist on blogger,
			
@@ -38,13 +39,17 @@ from pathlib import Path
 
				 spring2life_links_url = 'http://spring2life-links.blogspot.com/'
			
 
				 html_file             = '/home/springm/docker/wordpress-nginx/wordpress/wp-content/themes/twentytwentyone-child-spring2life/cronlinks.html'
			
 
				 database_file         = '/home/springm/docker/wordpress-nginx/blogsiread.json'
			
 
				+loglevel              = logging.WARN
			
 
				 if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell':    # for development
			
 
				-    html_file             = 'cronlinks.html'
			
 
				-    database_file         = 'blogsiread.json'
			
 
				+    html_file     = 'cronlinks.html'
			
 
				+    database_file = 'blogsiread.json'
			
 
				+    loglevel      = logging.DEBUG
			
 
				 timestamp             = int(time.time())
			
 
				 list_of_blogs         = {}
			
 
				 last_separator        = ''
			
 
				 
			
 
				+alternative_blog_urls = { 'jlfeixa.tumblr.com' : 'www.jeanlucfeixa.com' }
			
 
				+
			
 
				 def reduce_lines(html):
			
 
				     lines = html.split('\n')
			
 
				     i = 0
			
@@ -176,36 +181,32 @@ def lfionlinede(b, domain, i):
 
				     html = ""
			
 
				     ts = 0                      # timestamp
			
 
				     url = 'https://lfi-online.de/ceemes/de/blog/'
			
 
				-    # logger.debug(f"{b[1]}")
			
 
				+    logger.debug(f"{b[1]}")
			
 
				     with urllib.request.urlopen(b[1]) as response:
			
 
				         html = response.read()
			
 
				         p = re.search('titlebox30 cu-pointer[\'"] onclick=[\'"]window.location\s*=\s*[\'"](https://.*?)[\'"][\'"]>\s*<h1.*?>(.*?)</h1', html.decode('utf-8'), re.MULTILINE | re.DOTALL)
			
 
				+        logger.debug(f"{p[2]}")
			
 
				         string2hash = f"""p[2]"""
			
 
				         m.update(string2hash.encode('utf-8'))
			
 
				         hash = (m.hexdigest())
			
 
				         if not domain in meta_values: # first run
			
 
				             meta_values[domain] = { 'hash': '1' } # fake value
			
 
				         if not meta_values[domain]['hash'] == hash:      # Unterschiedliche Hashes
			
 
				-            meta_values[domain]['hash'] = hash
			
 
				-            if p:
			
 
				-                logger.debug(f"search result {p[1]} {p[2]}")
			
 
				-                string2hash = f"""p[2]"""
			
 
				-                m.update(string2hash.encode('utf-8'))
			
 
				-                # hash = (m.hexdigest())
			
 
				-                meta_values[domain] = { 'hash': hash,
			
 
				-                                        'timestamp': timestamp - i,
			
 
				-                                        'posttitle': p[2],
			
 
				-                                        'posturl':   p[1] }
			
 
				-                q = {}
			
 
				-                q[1] = p[1]
			
 
				-                q[2] = p[2]
			
 
				-                return (p, timestamp + i)
			
 
				-                # print(meta_values)
			
 
				-            else:
			
 
				-                pass
			
 
				-                #print('p is empty :(')
			
 
				+            logger.debug('unterschiedliche hashes')
			
 
				+            logger.debug(f"search result {p[1]} {p[2]}")
			
 
				+            string2hash = f"""p[2]"""
			
 
				+            m.update(string2hash.encode('utf-8'))
			
 
				+            # hash = (m.hexdigest())
			
 
				+            meta_values[domain] = { 'hash': hash,
			
 
				+                                    'timestamp': timestamp - i,
			
 
				+                                    'posttitle': p[2],
			
 
				+                                    'posturl':   p[1] }
			
 
				+            q = {}
			
 
				+            q[1] = p[1]
			
 
				+            q[2] = p[2]
			
 
				+            return (q, timestamp + i)
			
 
				         else:
			
 
				-            logger.debug('hashes are equal')
			
 
				+            logger.debug('gleiche hashes')
			
 
				             q = {}
			
 
				             q[1] = meta_values[domain]['posturl']
			
 
				             q[2] = meta_values[domain]['posttitle']
			
@@ -219,10 +220,10 @@ def treat_special_domain(domain, b, i):
 
				     # elif domain == 'jims-ramblings.blogspot.com':
			
 
				     #     print(f"special: {domain}")
			
 
				     elif domain == 'lfi-online.de':
			
 
				+        logger.debug(f"{b[1]} {b[2]}")
			
 
				         (b, ts)  = lfionlinede(b, domain, i)
			
 
				     elif domain == 'photoplacegallery.com':
			
 
				         (b, ts)  = photoplacegallery(b, domain, i)
			
 
				-        logger.debug(f"{b[1]} {b[2]}")
			
 
				     # elif domain == 'www.picturesfromthezone.com':
			
 
				     #     print(f"special: {domain}")
			
 
				     return (b, ts)
			
@@ -238,6 +239,10 @@ def read_spring2life_links():
 
				     for b in re.finditer(regex, bloglist, re.MULTILINE | re.DOTALL):
			
 
				         burl = b[1]
			
 
				         bdomain = re.sub( r"(https?://|/<?.*?$)", "", burl)
			
 
				+        # print(f"---->", bdomain)            
			
 
				+        if bdomain in alternative_blog_urls.keys():
			
 
				+            burl = burl.replace(bdomain, alternative_blog_urls[bdomain])
			
 
				+            # print(f"---->", burl)            
			
 
				         btitle = b[2]
			
 
				         z = re.search(r".*?href='(.*?)'.*?>\s*(.*?)<.*?item-time'>\s*(.*?)\s*<", b[3], re.MULTILINE | re.DOTALL)
			
 
				         if z:
			
@@ -248,9 +253,9 @@ def read_spring2life_links():
 
				             (z, ts) = treat_special_domain(bdomain, b, counter)
			
 
				             blogtimestamp = ts
			
 
				             if bdomain == 'lfi-online.de':
			
 
				-                logger.debug(f"into list: \n{b[1]} // {b[2]}\n{z[1]} // {z[2]}")
			
 
				+                logger.debug(f"into list: \n{burl} // {b[2]}\n{z[1]} // {z[2]}")
			
 
				         counter += 1
			
 
				-        list_of_blogs[int(blogtimestamp)] = (f"""<li><a href='{b[1]}' target='_blank'>{b[2]}</a>"""
			
 
				+        list_of_blogs[int(blogtimestamp)] = (f"""<li><a href='{burl}' target='_blank'>{b[2]}</a>"""
			
 
				                                              f"""&nbsp;//&nbsp;<a href='{z[1]}' target='_blank'>{z[2]}</a></li>""")
			
 
				 
			
 
				 def read_value_hash():
			
@@ -323,13 +328,13 @@ def main():
 
				         'formatters': {
			
 
				             'standard': {
			
 
				                 # 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
			
 
				-                'format': '[%(lineno)s - %(funcName)25s() ] %(message)s'
			
 
				+                'format': '[ %(lineno)s - %(funcName)25s() ] %(message)s'
			
 
				             },
			
 
				         },
			
 
				         'handlers': {
			
 
				             'default_handler': {'class': 'logging.StreamHandler',
			
 
				                                 'formatter': 'standard',
			
 
				-                                'level': logging.WARN},
			
 
				+                                'level': loglevel },
			
 
				             # {
			
 
				             #     'class': 'logging.FileHandler',
			
 
				             #     'level': 'DEBUG',