|
@@ -1,10 +1,14 @@
|
|
|
#!/usr/bin/python3
|
|
#!/usr/bin/python3
|
|
|
# -*- coding: utf-8 -*-
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
+# coding=utf8
|
|
|
|
|
|
|
|
-# $Id: blogsiread.py,v 1.8 2022/10/12 19:41:36 springm Exp springm $
|
|
|
|
|
-# $Revision: 1.8 $
|
|
|
|
|
-# $Date: 2022/10/12 19:41:36 $
|
|
|
|
|
|
|
+# $Id: blogsiread.py,v 1.9 2022/10/12 19:56:10 springm Exp springm $
|
|
|
|
|
+# $Revision: 1.9 $
|
|
|
|
|
+# $Date: 2022/10/12 19:56:10 $
|
|
|
# $Log: blogsiread.py,v $
|
|
# $Log: blogsiread.py,v $
|
|
|
|
|
+# Revision 1.9 2022/10/12 19:56:10 springm
|
|
|
|
|
+# Summary: coding utf-8 hinzugefuegt
|
|
|
|
|
+#
|
|
|
# Revision 1.8 2022/10/12 19:41:36 springm
|
|
# Revision 1.8 2022/10/12 19:41:36 springm
|
|
|
# Summary: lfionline zurückgestellt auf hash des gesamten html.
|
|
# Summary: lfionline zurückgestellt auf hash des gesamten html.
|
|
|
#
|
|
#
|
|
@@ -50,6 +54,8 @@ spring2life_links_url = 'http://spring2life-links.blogspot.com/'
|
|
|
html_file = '/home/springm/docker/wordpress-nginx/wordpress/wp-content/themes/twentytwentyone-child-spring2life/cronlinks.html'
|
|
html_file = '/home/springm/docker/wordpress-nginx/wordpress/wp-content/themes/twentytwentyone-child-spring2life/cronlinks.html'
|
|
|
database_file = '/home/springm/docker/wordpress-nginx/blogsiread.json'
|
|
database_file = '/home/springm/docker/wordpress-nginx/blogsiread.json'
|
|
|
loglevel = logging.WARN
|
|
loglevel = logging.WARN
|
|
|
|
|
+
|
|
|
|
|
+# ------------------------------------------ nothing to change below ---
|
|
|
if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell': # for development
|
|
if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell': # for development
|
|
|
html_file = 'cronlinks.html'
|
|
html_file = 'cronlinks.html'
|
|
|
database_file = 'blogsiread.json'
|
|
database_file = 'blogsiread.json'
|
|
@@ -57,6 +63,7 @@ if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell': #
|
|
|
timestamp = int(time.time())
|
|
timestamp = int(time.time())
|
|
|
list_of_blogs = {}
|
|
list_of_blogs = {}
|
|
|
last_separator = ''
|
|
last_separator = ''
|
|
|
|
|
+re_flags = re.MULTILINE | re.DOTALL | re.UNICODE | re.IGNORECASE
|
|
|
|
|
|
|
|
alternative_blog_urls = { 'jlfeixa.tumblr.com' : 'www.jeanlucfeixa.com' }
|
|
alternative_blog_urls = { 'jlfeixa.tumblr.com' : 'www.jeanlucfeixa.com' }
|
|
|
|
|
|
|
@@ -185,16 +192,19 @@ def photoplacegallery(b, domain, i):
|
|
|
return (q, meta_values[domain]['timestamp'])
|
|
return (q, meta_values[domain]['timestamp'])
|
|
|
return (b, meta_values[domain]['timestamp'])
|
|
return (b, meta_values[domain]['timestamp'])
|
|
|
|
|
|
|
|
-def lfionlinede(b, domain, i):
|
|
|
|
|
|
|
+def lfionlinede(matchgroup, domain, i):
|
|
|
global meta_values
|
|
global meta_values
|
|
|
m = hashlib.sha256()
|
|
m = hashlib.sha256()
|
|
|
html = ""
|
|
html = ""
|
|
|
ts = 0 # timestamp
|
|
ts = 0 # timestamp
|
|
|
- with urllib.request.urlopen(b[1]) as response:
|
|
|
|
|
|
|
+ with urllib.request.urlopen(matchgroup[1]) as response:
|
|
|
html = response.read()
|
|
html = response.read()
|
|
|
- string2hash = f"""html"""
|
|
|
|
|
- p = re.search('titlebox30 cu-pointer[\'"] onclick=[\'"]window.location\s*=\s*[\'"](https://.*?)[\'"][\'"]>\s*<h1.*?>(.*?)</h1', html.decode('utf-8'), re.MULTILINE | re.DOTALL)
|
|
|
|
|
- # logger.debug(f"{p[2]}")
|
|
|
|
|
|
|
+ logger.debug(f"{matchgroup[1]} -- {len(html.decode('utf-8'))}")
|
|
|
|
|
+ # string2hash = f"""html"""
|
|
|
|
|
+ regex = r"""<div class="titlebox30 cu-pointer" onclick="window.location = '(.*?)'">\s*<h1 class="typo-1">(.*?)</h1>.*?</div>"""
|
|
|
|
|
+ p = re.search( regex, html.decode('utf-8'), re_flags )
|
|
|
|
|
+ string2hash = p[0]
|
|
|
|
|
+ logger.debug(f"{p[0]}")
|
|
|
m.update(string2hash.encode('utf-8'))
|
|
m.update(string2hash.encode('utf-8'))
|
|
|
hash = (m.hexdigest())
|
|
hash = (m.hexdigest())
|
|
|
if not domain in meta_values: # first run
|
|
if not domain in meta_values: # first run
|
|
@@ -219,7 +229,7 @@ def lfionlinede(b, domain, i):
|
|
|
q[1] = meta_values[domain]['posturl']
|
|
q[1] = meta_values[domain]['posturl']
|
|
|
q[2] = meta_values[domain]['posttitle']
|
|
q[2] = meta_values[domain]['posttitle']
|
|
|
return (q, meta_values[domain]['timestamp'])
|
|
return (q, meta_values[domain]['timestamp'])
|
|
|
- return (b, meta_values[domain]['timestamp'])
|
|
|
|
|
|
|
+ return (matchgroup, meta_values[domain]['timestamp'])
|
|
|
|
|
|
|
|
def picturesfromthezone(b, domain, i):
|
|
def picturesfromthezone(b, domain, i):
|
|
|
global meta_values
|
|
global meta_values
|