Explorar o código

Summary: lfi repariert

Markus Spring %!s(int64=3) %!d(string=hai) anos
pai
achega
3fb1685925
Modificáronse 1 ficheiros con 37 adicións e 32 borrados
  1. 37 32
      blogsiread.py

+ 37 - 32
blogsiread.py

@@ -1,9 +1,12 @@
 #!/usr/bin/python3
 
-# 	$Id: blogsiread.py,v 1.4 2022/09/29 04:42:00 springm Exp springm $
-#       $Revision: 1.4 $
-#       $Date: 2022/09/29 04:42:00 $
+# 	$Id: blogsiread.py,v 1.5 2022/10/01 11:36:32 springm Exp springm $
+#       $Revision: 1.5 $
+#       $Date: 2022/10/01 11:36:32 $
 #       $Log: blogsiread.py,v $
+#       Revision 1.5  2022/10/01 11:36:32  springm
+#       Summary: Works
+#
 #       Revision 1.4  2022/09/29 04:42:00  springm
 #       Summary: works, but LFI gets on top too often
 #
@@ -11,8 +14,6 @@
 #       Summary: photoplacegallery hash jetzt vom Titel der ersten Ausstellung
 #
 
-
-
 """
 * if yes
   * read the spring2life linklist on blogger,
@@ -38,13 +39,17 @@ from pathlib import Path
 spring2life_links_url = 'http://spring2life-links.blogspot.com/'
 html_file             = '/home/springm/docker/wordpress-nginx/wordpress/wp-content/themes/twentytwentyone-child-spring2life/cronlinks.html'
 database_file         = '/home/springm/docker/wordpress-nginx/blogsiread.json'
+loglevel              = logging.WARN
 if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell':    # for development
-    html_file             = 'cronlinks.html'
-    database_file         = 'blogsiread.json'
+    html_file     = 'cronlinks.html'
+    database_file = 'blogsiread.json'
+    loglevel      = logging.DEBUG
 timestamp             = int(time.time())
 list_of_blogs         = {}
 last_separator        = ''
 
+alternative_blog_urls = { 'jlfeixa.tumblr.com' : 'www.jeanlucfeixa.com' }
+
 def reduce_lines(html):
     lines = html.split('\n')
     i = 0
@@ -176,36 +181,32 @@ def lfionlinede(b, domain, i):
     html = ""
     ts = 0                      # timestamp
     url = 'https://lfi-online.de/ceemes/de/blog/'
-    # logger.debug(f"{b[1]}")
+    logger.debug(f"{b[1]}")
     with urllib.request.urlopen(b[1]) as response:
         html = response.read()
         p = re.search('titlebox30 cu-pointer[\'"] onclick=[\'"]window.location\s*=\s*[\'"](https://.*?)[\'"][\'"]>\s*<h1.*?>(.*?)</h1', html.decode('utf-8'), re.MULTILINE | re.DOTALL)
+        logger.debug(f"{p[2]}")
         string2hash = f"""p[2]"""
         m.update(string2hash.encode('utf-8'))
         hash = (m.hexdigest())
         if not domain in meta_values: # first run
             meta_values[domain] = { 'hash': '1' } # fake value
         if not meta_values[domain]['hash'] == hash:      # Unterschiedliche Hashes
-            meta_values[domain]['hash'] = hash
-            if p:
-                logger.debug(f"search result {p[1]} {p[2]}")
-                string2hash = f"""p[2]"""
-                m.update(string2hash.encode('utf-8'))
-                # hash = (m.hexdigest())
-                meta_values[domain] = { 'hash': hash,
-                                        'timestamp': timestamp - i,
-                                        'posttitle': p[2],
-                                        'posturl':   p[1] }
-                q = {}
-                q[1] = p[1]
-                q[2] = p[2]
-                return (p, timestamp + i)
-                # print(meta_values)
-            else:
-                pass
-                #print('p is empty :(')
+            logger.debug('unterschiedliche hashes')
+            logger.debug(f"search result {p[1]} {p[2]}")
+            string2hash = f"""p[2]"""
+            m.update(string2hash.encode('utf-8'))
+            # hash = (m.hexdigest())
+            meta_values[domain] = { 'hash': hash,
+                                    'timestamp': timestamp - i,
+                                    'posttitle': p[2],
+                                    'posturl':   p[1] }
+            q = {}
+            q[1] = p[1]
+            q[2] = p[2]
+            return (q, timestamp + i)
         else:
-            logger.debug('hashes are equal')
+            logger.debug('gleiche hashes')
             q = {}
             q[1] = meta_values[domain]['posturl']
             q[2] = meta_values[domain]['posttitle']
@@ -219,10 +220,10 @@ def treat_special_domain(domain, b, i):
     # elif domain == 'jims-ramblings.blogspot.com':
     #     print(f"special: {domain}")
     elif domain == 'lfi-online.de':
+        logger.debug(f"{b[1]} {b[2]}")
         (b, ts)  = lfionlinede(b, domain, i)
     elif domain == 'photoplacegallery.com':
         (b, ts)  = photoplacegallery(b, domain, i)
-        logger.debug(f"{b[1]} {b[2]}")
     # elif domain == 'www.picturesfromthezone.com':
     #     print(f"special: {domain}")
     return (b, ts)
@@ -238,6 +239,10 @@ def read_spring2life_links():
     for b in re.finditer(regex, bloglist, re.MULTILINE | re.DOTALL):
         burl = b[1]
         bdomain = re.sub( r"(https?://|/<?.*?$)", "", burl)
+        # print(f"---->", bdomain)            
+        if bdomain in alternative_blog_urls.keys():
+            burl = burl.replace(bdomain, alternative_blog_urls[bdomain])
+            # print(f"---->", burl)            
         btitle = b[2]
         z = re.search(r".*?href='(.*?)'.*?>\s*(.*?)<.*?item-time'>\s*(.*?)\s*<", b[3], re.MULTILINE | re.DOTALL)
         if z:
@@ -248,9 +253,9 @@ def read_spring2life_links():
             (z, ts) = treat_special_domain(bdomain, b, counter)
             blogtimestamp = ts
             if bdomain == 'lfi-online.de':
-                logger.debug(f"into list: \n{b[1]} // {b[2]}\n{z[1]} // {z[2]}")
+                logger.debug(f"into list: \n{burl} // {b[2]}\n{z[1]} // {z[2]}")
         counter += 1
-        list_of_blogs[int(blogtimestamp)] = (f"""<li><a href='{b[1]}' target='_blank'>{b[2]}</a>"""
+        list_of_blogs[int(blogtimestamp)] = (f"""<li><a href='{burl}' target='_blank'>{b[2]}</a>"""
                                              f"""&nbsp;//&nbsp;<a href='{z[1]}' target='_blank'>{z[2]}</a></li>""")
 
 def read_value_hash():
@@ -323,13 +328,13 @@ def main():
         'formatters': {
             'standard': {
                 # 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
-                'format': '[%(lineno)s - %(funcName)25s() ] %(message)s'
+                'format': '[ %(lineno)s - %(funcName)25s() ] %(message)s'
             },
         },
         'handlers': {
             'default_handler': {'class': 'logging.StreamHandler',
                                 'formatter': 'standard',
-                                'level': logging.WARN},
+                                'level': loglevel },
             # {
             #     'class': 'logging.FileHandler',
             #     'level': 'DEBUG',