3 лет назад · 3fb1685925
--- a/blogsiread.py
+++ b/blogsiread.py
@@ -1,9 +1,12 @@
 
															 #!/usr/bin/python3
														
 
															-# 	$Id: blogsiread.py,v 1.4 2022/09/29 04:42:00 springm Exp springm $
														
 
															-#       $Revision: 1.4 $
														
 
															-#       $Date: 2022/09/29 04:42:00 $
														
 
															+# 	$Id: blogsiread.py,v 1.5 2022/10/01 11:36:32 springm Exp springm $
														
 
															+#       $Revision: 1.5 $
														
 
															+#       $Date: 2022/10/01 11:36:32 $
														
 
															 #       $Log: blogsiread.py,v $
														
 
															+#       Revision 1.5  2022/10/01 11:36:32  springm
														
 
															+#       Summary: Works
														
 
															+#
														
 
															 #       Revision 1.4  2022/09/29 04:42:00  springm
														
 
															 #       Summary: works, but LFI gets on top too often
														
 
															 #
														
@@ -11,8 +14,6 @@
 
															 #       Summary: photoplacegallery hash jetzt vom Titel der ersten Ausstellung
														
 
															 #
														
 
															-
														
 
															-
														
 
															 """
														
 
															 * if yes
														
 
															   * read the spring2life linklist on blogger,
														
@@ -38,13 +39,17 @@ from pathlib import Path
 
															 spring2life_links_url = 'http://spring2life-links.blogspot.com/'
														
 
															 html_file             = '/home/springm/docker/wordpress-nginx/wordpress/wp-content/themes/twentytwentyone-child-spring2life/cronlinks.html'
														
 
															 database_file         = '/home/springm/docker/wordpress-nginx/blogsiread.json'
														
 
															+loglevel              = logging.WARN
														
 
															 if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell':    # for development
														
 
															-    html_file             = 'cronlinks.html'
														
 
															-    database_file         = 'blogsiread.json'
														
 
															+    html_file     = 'cronlinks.html'
														
 
															+    database_file = 'blogsiread.json'
														
 
															+    loglevel      = logging.DEBUG
														
 
															 timestamp             = int(time.time())
														
 
															 list_of_blogs         = {}
														
 
															 last_separator        = ''
														
 
															+alternative_blog_urls = { 'jlfeixa.tumblr.com' : 'www.jeanlucfeixa.com' }
														
 
															+
														
 
															 def reduce_lines(html):
														
 
															     lines = html.split('\n')
														
 
															     i = 0
														
@@ -176,36 +181,32 @@ def lfionlinede(b, domain, i):
 
															     html = ""
														
 
															     ts = 0                      # timestamp
														
 
															     url = 'https://lfi-online.de/ceemes/de/blog/'
														
 
															-    # logger.debug(f"{b[1]}")
														
 
															+    logger.debug(f"{b[1]}")
														
 
															     with urllib.request.urlopen(b[1]) as response:
														
 
															         html = response.read()
														
 
															         p = re.search('titlebox30 cu-pointer[\'"] onclick=[\'"]window.location\s*=\s*[\'"](https://.*?)[\'"][\'"]>\s*<h1.*?>(.*?)</h1', html.decode('utf-8'), re.MULTILINE | re.DOTALL)
														
 
															+        logger.debug(f"{p[2]}")
														
 
															         string2hash = f"""p[2]"""
														
 
															         m.update(string2hash.encode('utf-8'))
														
 
															         hash = (m.hexdigest())
														
 
															         if not domain in meta_values: # first run
														
 
															             meta_values[domain] = { 'hash': '1' } # fake value
														
 
															         if not meta_values[domain]['hash'] == hash:      # Unterschiedliche Hashes
														
 
															-            meta_values[domain]['hash'] = hash
														
 
															-            if p:
														
 
															-                logger.debug(f"search result {p[1]} {p[2]}")
														
 
															-                string2hash = f"""p[2]"""
														
 
															-                m.update(string2hash.encode('utf-8'))
														
 
															-                # hash = (m.hexdigest())
														
 
															-                meta_values[domain] = { 'hash': hash,
														
 
															-                                        'timestamp': timestamp - i,
														
 
															-                                        'posttitle': p[2],
														
 
															-                                        'posturl':   p[1] }
														
 
															-                q = {}
														
 
															-                q[1] = p[1]
														
 
															-                q[2] = p[2]
														
 
															-                return (p, timestamp + i)
														
 
															-                # print(meta_values)
														
 
															-            else:
														
 
															-                pass
														
 
															-                #print('p is empty :(')
														
 
															+            logger.debug('unterschiedliche hashes')
														
 
															+            logger.debug(f"search result {p[1]} {p[2]}")
														
 
															+            string2hash = f"""p[2]"""
														
 
															+            m.update(string2hash.encode('utf-8'))
														
 
															+            # hash = (m.hexdigest())
														
 
															+            meta_values[domain] = { 'hash': hash,
														
 
															+                                    'timestamp': timestamp - i,
														
 
															+                                    'posttitle': p[2],
														
 
															+                                    'posturl':   p[1] }
														
 
															+            q = {}
														
 
															+            q[1] = p[1]
														
 
															+            q[2] = p[2]
														
 
															+            return (q, timestamp + i)
														
 
															         else:
														
 
															-            logger.debug('hashes are equal')
														
 
															+            logger.debug('gleiche hashes')
														
 
															             q = {}
														
 
															             q[1] = meta_values[domain]['posturl']
														
 
															             q[2] = meta_values[domain]['posttitle']
														
@@ -219,10 +220,10 @@ def treat_special_domain(domain, b, i):
 
															     # elif domain == 'jims-ramblings.blogspot.com':
														
 
															     #     print(f"special: {domain}")
														
 
															     elif domain == 'lfi-online.de':
														
 
															+        logger.debug(f"{b[1]} {b[2]}")
														
 
															         (b, ts)  = lfionlinede(b, domain, i)
														
 
															     elif domain == 'photoplacegallery.com':
														
 
															         (b, ts)  = photoplacegallery(b, domain, i)
														
 
															-        logger.debug(f"{b[1]} {b[2]}")
														
 
															     # elif domain == 'www.picturesfromthezone.com':
														
 
															     #     print(f"special: {domain}")
														
 
															     return (b, ts)
														
@@ -238,6 +239,10 @@ def read_spring2life_links():
 
															     for b in re.finditer(regex, bloglist, re.MULTILINE | re.DOTALL):
														
 
															         burl = b[1]
														
 
															         bdomain = re.sub( r"(https?://|/<?.*?$)", "", burl)
														
 
															+        # print(f"---->", bdomain)            
														
 
															+        if bdomain in alternative_blog_urls.keys():
														
 
															+            burl = burl.replace(bdomain, alternative_blog_urls[bdomain])
														
 
															+            # print(f"---->", burl)            
														
 
															         btitle = b[2]
														
 
															         z = re.search(r".*?href='(.*?)'.*?>\s*(.*?)<.*?item-time'>\s*(.*?)\s*<", b[3], re.MULTILINE | re.DOTALL)
														
 
															         if z:
														
@@ -248,9 +253,9 @@ def read_spring2life_links():
 
															             (z, ts) = treat_special_domain(bdomain, b, counter)
														
 
															             blogtimestamp = ts
														
 
															             if bdomain == 'lfi-online.de':
														
 
															-                logger.debug(f"into list: \n{b[1]} // {b[2]}\n{z[1]} // {z[2]}")
														
 
															+                logger.debug(f"into list: \n{burl} // {b[2]}\n{z[1]} // {z[2]}")
														
 
															         counter += 1
														
 
															-        list_of_blogs[int(blogtimestamp)] = (f"""<li><a href='{b[1]}' target='_blank'>{b[2]}</a>"""
														
 
															+        list_of_blogs[int(blogtimestamp)] = (f"""<li><a href='{burl}' target='_blank'>{b[2]}</a>"""
														
 
															                                              f"""&nbsp;//&nbsp;<a href='{z[1]}' target='_blank'>{z[2]}</a></li>""")
														
 
															 def read_value_hash():
														
@@ -323,13 +328,13 @@ def main():
 
															         'formatters': {
														
 
															             'standard': {
														
 
															                 # 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
														
 
															-                'format': '[%(lineno)s - %(funcName)25s() ] %(message)s'
														
 
															+                'format': '[ %(lineno)s - %(funcName)25s() ] %(message)s'
														
 
															             },
														
 
															         },
														
 
															         'handlers': {
														
 
															             'default_handler': {'class': 'logging.StreamHandler',
														
 
															                                 'formatter': 'standard',
														
 
															-                                'level': logging.WARN},
														
 
															+                                'level': loglevel },
														
 
															             # {
														
 
															             #     'class': 'logging.FileHandler',
														
 
															             #     'level': 'DEBUG',