|
@@ -1,9 +1,12 @@
|
|
|
#!/usr/bin/python3
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
-# $Id: blogsiread.py,v 1.4 2022/09/29 04:42:00 springm Exp springm $
|
|
|
|
|
-# $Revision: 1.4 $
|
|
|
|
|
-# $Date: 2022/09/29 04:42:00 $
|
|
|
|
|
|
|
+# $Id: blogsiread.py,v 1.5 2022/10/01 11:36:32 springm Exp springm $
|
|
|
|
|
+# $Revision: 1.5 $
|
|
|
|
|
+# $Date: 2022/10/01 11:36:32 $
|
|
|
# $Log: blogsiread.py,v $
|
|
# $Log: blogsiread.py,v $
|
|
|
|
|
+# Revision 1.5 2022/10/01 11:36:32 springm
|
|
|
|
|
+# Summary: Works
|
|
|
|
|
+#
|
|
|
# Revision 1.4 2022/09/29 04:42:00 springm
|
|
# Revision 1.4 2022/09/29 04:42:00 springm
|
|
|
# Summary: works, but LFI gets on top too often
|
|
# Summary: works, but LFI gets on top too often
|
|
|
#
|
|
#
|
|
@@ -11,8 +14,6 @@
|
|
|
# Summary: photoplacegallery hash jetzt vom Titel der ersten Ausstellung
|
|
# Summary: photoplacegallery hash jetzt vom Titel der ersten Ausstellung
|
|
|
#
|
|
#
|
|
|
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
"""
|
|
"""
|
|
|
* if yes
|
|
* if yes
|
|
|
* read the spring2life linklist on blogger,
|
|
* read the spring2life linklist on blogger,
|
|
@@ -38,13 +39,17 @@ from pathlib import Path
|
|
|
spring2life_links_url = 'http://spring2life-links.blogspot.com/'
|
|
spring2life_links_url = 'http://spring2life-links.blogspot.com/'
|
|
|
html_file = '/home/springm/docker/wordpress-nginx/wordpress/wp-content/themes/twentytwentyone-child-spring2life/cronlinks.html'
|
|
html_file = '/home/springm/docker/wordpress-nginx/wordpress/wp-content/themes/twentytwentyone-child-spring2life/cronlinks.html'
|
|
|
database_file = '/home/springm/docker/wordpress-nginx/blogsiread.json'
|
|
database_file = '/home/springm/docker/wordpress-nginx/blogsiread.json'
|
|
|
|
|
+loglevel = logging.WARN
|
|
|
if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell': # for development
|
|
if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell': # for development
|
|
|
- html_file = 'cronlinks.html'
|
|
|
|
|
- database_file = 'blogsiread.json'
|
|
|
|
|
|
|
+ html_file = 'cronlinks.html'
|
|
|
|
|
+ database_file = 'blogsiread.json'
|
|
|
|
|
+ loglevel = logging.DEBUG
|
|
|
timestamp = int(time.time())
|
|
timestamp = int(time.time())
|
|
|
list_of_blogs = {}
|
|
list_of_blogs = {}
|
|
|
last_separator = ''
|
|
last_separator = ''
|
|
|
|
|
|
|
|
|
|
+alternative_blog_urls = { 'jlfeixa.tumblr.com' : 'www.jeanlucfeixa.com' }
|
|
|
|
|
+
|
|
|
def reduce_lines(html):
|
|
def reduce_lines(html):
|
|
|
lines = html.split('\n')
|
|
lines = html.split('\n')
|
|
|
i = 0
|
|
i = 0
|
|
@@ -176,36 +181,32 @@ def lfionlinede(b, domain, i):
|
|
|
html = ""
|
|
html = ""
|
|
|
ts = 0 # timestamp
|
|
ts = 0 # timestamp
|
|
|
url = 'https://lfi-online.de/ceemes/de/blog/'
|
|
url = 'https://lfi-online.de/ceemes/de/blog/'
|
|
|
- # logger.debug(f"{b[1]}")
|
|
|
|
|
|
|
+ logger.debug(f"{b[1]}")
|
|
|
with urllib.request.urlopen(b[1]) as response:
|
|
with urllib.request.urlopen(b[1]) as response:
|
|
|
html = response.read()
|
|
html = response.read()
|
|
|
p = re.search('titlebox30 cu-pointer[\'"] onclick=[\'"]window.location\s*=\s*[\'"](https://.*?)[\'"][\'"]>\s*<h1.*?>(.*?)</h1', html.decode('utf-8'), re.MULTILINE | re.DOTALL)
|
|
p = re.search('titlebox30 cu-pointer[\'"] onclick=[\'"]window.location\s*=\s*[\'"](https://.*?)[\'"][\'"]>\s*<h1.*?>(.*?)</h1', html.decode('utf-8'), re.MULTILINE | re.DOTALL)
|
|
|
|
|
+ logger.debug(f"{p[2]}")
|
|
|
string2hash = f"""p[2]"""
|
|
string2hash = f"""p[2]"""
|
|
|
m.update(string2hash.encode('utf-8'))
|
|
m.update(string2hash.encode('utf-8'))
|
|
|
hash = (m.hexdigest())
|
|
hash = (m.hexdigest())
|
|
|
if not domain in meta_values: # first run
|
|
if not domain in meta_values: # first run
|
|
|
meta_values[domain] = { 'hash': '1' } # fake value
|
|
meta_values[domain] = { 'hash': '1' } # fake value
|
|
|
if not meta_values[domain]['hash'] == hash: # Unterschiedliche Hashes
|
|
if not meta_values[domain]['hash'] == hash: # Unterschiedliche Hashes
|
|
|
- meta_values[domain]['hash'] = hash
|
|
|
|
|
- if p:
|
|
|
|
|
- logger.debug(f"search result {p[1]} {p[2]}")
|
|
|
|
|
- string2hash = f"""p[2]"""
|
|
|
|
|
- m.update(string2hash.encode('utf-8'))
|
|
|
|
|
- # hash = (m.hexdigest())
|
|
|
|
|
- meta_values[domain] = { 'hash': hash,
|
|
|
|
|
- 'timestamp': timestamp - i,
|
|
|
|
|
- 'posttitle': p[2],
|
|
|
|
|
- 'posturl': p[1] }
|
|
|
|
|
- q = {}
|
|
|
|
|
- q[1] = p[1]
|
|
|
|
|
- q[2] = p[2]
|
|
|
|
|
- return (p, timestamp + i)
|
|
|
|
|
- # print(meta_values)
|
|
|
|
|
- else:
|
|
|
|
|
- pass
|
|
|
|
|
- #print('p is empty :(')
|
|
|
|
|
|
|
+ logger.debug('unterschiedliche hashes')
|
|
|
|
|
+ logger.debug(f"search result {p[1]} {p[2]}")
|
|
|
|
|
+ string2hash = f"""p[2]"""
|
|
|
|
|
+ m.update(string2hash.encode('utf-8'))
|
|
|
|
|
+ # hash = (m.hexdigest())
|
|
|
|
|
+ meta_values[domain] = { 'hash': hash,
|
|
|
|
|
+ 'timestamp': timestamp - i,
|
|
|
|
|
+ 'posttitle': p[2],
|
|
|
|
|
+ 'posturl': p[1] }
|
|
|
|
|
+ q = {}
|
|
|
|
|
+ q[1] = p[1]
|
|
|
|
|
+ q[2] = p[2]
|
|
|
|
|
+ return (q, timestamp + i)
|
|
|
else:
|
|
else:
|
|
|
- logger.debug('hashes are equal')
|
|
|
|
|
|
|
+ logger.debug('gleiche hashes')
|
|
|
q = {}
|
|
q = {}
|
|
|
q[1] = meta_values[domain]['posturl']
|
|
q[1] = meta_values[domain]['posturl']
|
|
|
q[2] = meta_values[domain]['posttitle']
|
|
q[2] = meta_values[domain]['posttitle']
|
|
@@ -219,10 +220,10 @@ def treat_special_domain(domain, b, i):
|
|
|
# elif domain == 'jims-ramblings.blogspot.com':
|
|
# elif domain == 'jims-ramblings.blogspot.com':
|
|
|
# print(f"special: {domain}")
|
|
# print(f"special: {domain}")
|
|
|
elif domain == 'lfi-online.de':
|
|
elif domain == 'lfi-online.de':
|
|
|
|
|
+ logger.debug(f"{b[1]} {b[2]}")
|
|
|
(b, ts) = lfionlinede(b, domain, i)
|
|
(b, ts) = lfionlinede(b, domain, i)
|
|
|
elif domain == 'photoplacegallery.com':
|
|
elif domain == 'photoplacegallery.com':
|
|
|
(b, ts) = photoplacegallery(b, domain, i)
|
|
(b, ts) = photoplacegallery(b, domain, i)
|
|
|
- logger.debug(f"{b[1]} {b[2]}")
|
|
|
|
|
# elif domain == 'www.picturesfromthezone.com':
|
|
# elif domain == 'www.picturesfromthezone.com':
|
|
|
# print(f"special: {domain}")
|
|
# print(f"special: {domain}")
|
|
|
return (b, ts)
|
|
return (b, ts)
|
|
@@ -238,6 +239,10 @@ def read_spring2life_links():
|
|
|
for b in re.finditer(regex, bloglist, re.MULTILINE | re.DOTALL):
|
|
for b in re.finditer(regex, bloglist, re.MULTILINE | re.DOTALL):
|
|
|
burl = b[1]
|
|
burl = b[1]
|
|
|
bdomain = re.sub( r"(https?://|/<?.*?$)", "", burl)
|
|
bdomain = re.sub( r"(https?://|/<?.*?$)", "", burl)
|
|
|
|
|
+ # print(f"---->", bdomain)
|
|
|
|
|
+ if bdomain in alternative_blog_urls.keys():
|
|
|
|
|
+ burl = burl.replace(bdomain, alternative_blog_urls[bdomain])
|
|
|
|
|
+ # print(f"---->", burl)
|
|
|
btitle = b[2]
|
|
btitle = b[2]
|
|
|
z = re.search(r".*?href='(.*?)'.*?>\s*(.*?)<.*?item-time'>\s*(.*?)\s*<", b[3], re.MULTILINE | re.DOTALL)
|
|
z = re.search(r".*?href='(.*?)'.*?>\s*(.*?)<.*?item-time'>\s*(.*?)\s*<", b[3], re.MULTILINE | re.DOTALL)
|
|
|
if z:
|
|
if z:
|
|
@@ -248,9 +253,9 @@ def read_spring2life_links():
|
|
|
(z, ts) = treat_special_domain(bdomain, b, counter)
|
|
(z, ts) = treat_special_domain(bdomain, b, counter)
|
|
|
blogtimestamp = ts
|
|
blogtimestamp = ts
|
|
|
if bdomain == 'lfi-online.de':
|
|
if bdomain == 'lfi-online.de':
|
|
|
- logger.debug(f"into list: \n{b[1]} // {b[2]}\n{z[1]} // {z[2]}")
|
|
|
|
|
|
|
+ logger.debug(f"into list: \n{burl} // {b[2]}\n{z[1]} // {z[2]}")
|
|
|
counter += 1
|
|
counter += 1
|
|
|
- list_of_blogs[int(blogtimestamp)] = (f"""<li><a href='{b[1]}' target='_blank'>{b[2]}</a>"""
|
|
|
|
|
|
|
+ list_of_blogs[int(blogtimestamp)] = (f"""<li><a href='{burl}' target='_blank'>{b[2]}</a>"""
|
|
|
f""" // <a href='{z[1]}' target='_blank'>{z[2]}</a></li>""")
|
|
f""" // <a href='{z[1]}' target='_blank'>{z[2]}</a></li>""")
|
|
|
|
|
|
|
|
def read_value_hash():
|
|
def read_value_hash():
|
|
@@ -323,13 +328,13 @@ def main():
|
|
|
'formatters': {
|
|
'formatters': {
|
|
|
'standard': {
|
|
'standard': {
|
|
|
# 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
|
|
# 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
|
|
|
- 'format': '[%(lineno)s - %(funcName)25s() ] %(message)s'
|
|
|
|
|
|
|
+ 'format': '[ %(lineno)s - %(funcName)25s() ] %(message)s'
|
|
|
},
|
|
},
|
|
|
},
|
|
},
|
|
|
'handlers': {
|
|
'handlers': {
|
|
|
'default_handler': {'class': 'logging.StreamHandler',
|
|
'default_handler': {'class': 'logging.StreamHandler',
|
|
|
'formatter': 'standard',
|
|
'formatter': 'standard',
|
|
|
- 'level': logging.WARN},
|
|
|
|
|
|
|
+ 'level': loglevel },
|
|
|
# {
|
|
# {
|
|
|
# 'class': 'logging.FileHandler',
|
|
# 'class': 'logging.FileHandler',
|
|
|
# 'level': 'DEBUG',
|
|
# 'level': 'DEBUG',
|