#!/usr/bin/python3 # -*- coding: utf-8 -*- # coding=utf8 # $Id: blogsiread.py,v 1.13 2023/12/07 20:36:12 springm Exp springm $ # $Revision: 1.13 $ # $Date: 2023/12/07 20:36:12 $ # # $Log: blogsiread.py,v $ # Revision 1.13 2023/12/07 20:36:12 springm # Summary: lfi logik an neues Layout angepasst # # Revision 1.12 2023/12/07 20:14:40 springm # Summary: lfi auskommentiert bis zur Reparatur # # Revision 1.11 2022/12/28 07:30:17 springm # Summary: added try...except to photoplacegallery # # Revision 1.10 2022/11/10 13:32:19 springm # Summary: lfi nochmal korrigiert; strin2hash war falsch # # Revision 1.9 2022/10/12 19:56:10 springm # Summary: coding utf-8 hinzugefuegt # # Revision 1.8 2022/10/12 19:41:36 springm # Summary: lfionline zurückgestellt auf hash des gesamten html. # # Revision 1.7 2022/10/10 15:16:29 springm # Summary: added special treatment for picturesfromthezone # # Revision 1.6 2022/10/10 14:30:28 springm # Summary: lfi repariert # # Revision 1.5 2022/10/01 11:36:32 springm # Summary: Works # # Revision 1.4 2022/09/29 04:42:00 springm # Summary: works, but LFI gets on top too often # # Revision 1.3 2022/09/02 05:06:33 springm # Summary: photoplacegallery hash jetzt vom Titel der ersten Ausstellung # """ * if yes * read the spring2life linklist on blogger, * special treatment for websites without feed * save list with timestamp into file * output list """ import json import hashlib import time import datetime import logging import logging.config import os import os.path import re import socket import time import urllib.request from pathlib import Path spring2life_links_url = 'http://spring2life-links.blogspot.com/' html_file = '/home/springm/docker/wordpress-nginx/wordpress/wp-content/themes/twentytwentyone-child-spring2life/cronlinks.html' database_file = '/home/springm/docker/wordpress-nginx/blogsiread.json' loglevel = logging.WARN # ------------------------------------------ nothing to change below --- if socket.gethostname() == 'denkbrett' or socket.gethostname() == 'kudell': # for development html_file = 'cronlinks.html' database_file = 'blogsiread.json' loglevel = logging.DEBUG timestamp = int(time.time()) list_of_blogs = {} last_separator = '' re_flags = re.MULTILINE | re.DOTALL | re.UNICODE | re.IGNORECASE alternative_blog_urls = { 'jlfeixa.tumblr.com' : 'www.jeanlucfeixa.com' } def reduce_lines(html): lines = html.split('\n') i = 0 j = 0 found = 0 bloglist = '' while i < len(lines): if lines[i] == "
.*?
STORIES.*?.*?
\s*STORIES.*?(.*?)
.*?src="(.*?)" """
p = re.search( regex, html.decode('utf-8'), re_flags )
if p[3].endswith('lfi-plus.svg'):
# print('brämium gondend, exiting')
quit()
string2hash = p[0]
logger.debug(f"{p[0]}")
m.update(string2hash.encode('utf-8'))
hash = (m.hexdigest())
if not domain in meta_values: # first run
meta_values[domain] = { 'hash': '1' } # fake value
if not meta_values[domain]['hash'] == hash: # Unterschiedliche Hashes
logger.debug('unterschiedliche hashes')
logger.debug(f"search result {p[1]} {p[2]}")
# string2hash = f"""p[2]"""
m.update(string2hash.encode('utf-8'))
# hash = (m.hexdigest())
meta_values[domain] = { 'hash': hash,
'timestamp': timestamp - i,
'posttitle': p[2],
'posturl': p[1] }
q = {}
q[1] = p[1]
q[2] = p[2]
return (q, timestamp + i)
else:
logger.debug('gleiche hashes')
q = {}
q[1] = meta_values[domain]['posturl']
q[2] = meta_values[domain]['posttitle']
return (q, meta_values[domain]['timestamp'])
return (matchgroup, meta_values[domain]['timestamp'])
def picturesfromthezone(b, domain, i):
global meta_values
m = hashlib.sha256()
html = ""
ts = 0 # timestamp
with urllib.request.urlopen(b[1]) as response:
html = response.read()
string2hash = f"""html"""
m.update(string2hash.encode('utf-8'))
hash = (m.hexdigest())
if not domain in meta_values: # first run
logger.debug(domain)
meta_values[domain] = { 'hash': '1' } # fake value
if not meta_values[domain]['hash'] == hash: # Unterschiedliche Hashes
logger.debug('unterschiedliche hashes')
meta_values[domain] = { 'hash': hash,
'timestamp': timestamp - i,
'posttitle': '',
'posturl': b[1] }
q = {}
q[2] = ''
q[1] = b[1]
return (q, timestamp + i)
else:
logger.debug('gleiche hashes')
q = {}
q[1] = meta_values[domain]['posturl']
q[2] = meta_values[domain]['posttitle']
return (q, meta_values[domain]['timestamp'])
# return (b, meta_values[domain]['timestamp'])
def treat_special_domain(domain, b, i):
ts = 0
try:
if domain == 'www.orengrad.com':
(b, ts) = orengradcom(b, domain, i)
elif domain == 'lfi-online.de':
(b, ts) = lfionlinede(b, domain, i)
elif domain == 'photoplacegallery.com':
(b, ts) = photoplacegallery(b, domain, i)
elif domain == 'www.picturesfromthezone.com':
(b, ts) = picturesfromthezone(b, domain, i)
except:
pass
return (b, ts)
def read_spring2life_links():
#print('read_spring2life_links')
with urllib.request.urlopen(spring2life_links_url) as response:
html = response.read().decode('utf-8')
bloglist = reduce_lines(html)
regex = r"'blog-title'>\s*\s*(.*?)<\/a>(.*?)")
f.write(f"\t{list_of_blogs[t]}\n")
f.write("
")
logger = logging.getLogger(__name__)
# ------------------------------------------------------------- main ---
def main():
logging_config = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'standard': {
# 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
'format': '[ %(lineno)s - %(funcName)25s() ] %(message)s'
},
},
'handlers': {
'default_handler': {'class': 'logging.StreamHandler',
'formatter': 'standard',
'level': loglevel },
# {
# 'class': 'logging.FileHandler',
# 'level': 'DEBUG',
# 'formatter': 'standard',
# 'filename': os.path.join('', 'application.log'),
# 'encoding': 'utf8'
# },
},
'loggers': {
'': {
'handlers': ['default_handler'],
'level': 'DEBUG',
'propagate': False
}
}
}
logging.config.dictConfig(logging_config)
read_value_hash()
read_spring2life_links()
output_list()
write_value_hash()
if __name__ == '__main__':
main()
# Local Variables:
# compile-command: "./blogsiread.py --log DEBUG"
# End: