|
@@ -2,10 +2,14 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
# -*- coding: utf-8 -*-
|
|
|
# coding=utf8
|
|
# coding=utf8
|
|
|
|
|
|
|
|
-# $Id: blogsiread.py,v 1.9 2022/10/12 19:56:10 springm Exp springm $
|
|
|
|
|
-# $Revision: 1.9 $
|
|
|
|
|
-# $Date: 2022/10/12 19:56:10 $
|
|
|
|
|
|
|
+# $Id: blogsiread.py,v 1.10 2022/11/10 13:32:19 springm Exp springm $
|
|
|
|
|
+# $Revision: 1.10 $
|
|
|
|
|
+# $Date: 2022/11/10 13:32:19 $
|
|
|
|
|
+#
|
|
|
# $Log: blogsiread.py,v $
|
|
# $Log: blogsiread.py,v $
|
|
|
|
|
+# Revision 1.10 2022/11/10 13:32:19 springm
|
|
|
|
|
+# Summary: lfi nochmal korrigiert; strin2hash war falsch
|
|
|
|
|
+#
|
|
|
# Revision 1.9 2022/10/12 19:56:10 springm
|
|
# Revision 1.9 2022/10/12 19:56:10 springm
|
|
|
# Summary: coding utf-8 hinzugefuegt
|
|
# Summary: coding utf-8 hinzugefuegt
|
|
|
#
|
|
#
|
|
@@ -157,40 +161,43 @@ def photoplacegallery(b, domain, i):
|
|
|
'Sec-Fetch-Site' : 'cross-site',
|
|
'Sec-Fetch-Site' : 'cross-site',
|
|
|
'Pragma' : 'no-cache',
|
|
'Pragma' : 'no-cache',
|
|
|
'Cache-Control' : 'no-cache' })
|
|
'Cache-Control' : 'no-cache' })
|
|
|
- r = urllib.request.urlopen(req)
|
|
|
|
|
- with r as response:
|
|
|
|
|
- html = response.read()
|
|
|
|
|
- # hash only from content-relevant part of website
|
|
|
|
|
- subset = re.search( '<div class="main">.*?<div class="widget-title"><a href=".*?" class="h3">(.*?)</a>.*?</div>\s*</div>\s*</div>', html.decode('utf-8'), re.MULTILINE | re.DOTALL )
|
|
|
|
|
- m.update(subset[1].encode('utf-8'))
|
|
|
|
|
- hash = (m.hexdigest())
|
|
|
|
|
- if not domain in meta_values: # first run
|
|
|
|
|
- meta_values[domain] = { 'hash': '1' } # fake value
|
|
|
|
|
- if not meta_values[domain]['hash'] == hash: # Unterschiedliche Hashes
|
|
|
|
|
- logger.debug(f"unterschiedliche hashes")
|
|
|
|
|
- meta_values[domain]['hash'] = hash
|
|
|
|
|
- p = re.search('<div class="widget-title"><a href="(/online-juried-shows/[^"]*?)" class="h3">([^<]*?)</a></div>', html.decode('utf-8'), re.MULTILINE | re.DOTALL )
|
|
|
|
|
- if p:
|
|
|
|
|
- logger.debug(f"re search erfolgreich: {p[1]} {p[2]}")
|
|
|
|
|
- meta_values[domain] = { 'hash': hash,
|
|
|
|
|
- 'timestamp': timestamp - i,
|
|
|
|
|
- 'posttitle': p[2],
|
|
|
|
|
- 'posturl': f"https://{domain}{p[1]}" }
|
|
|
|
|
- q = {}
|
|
|
|
|
- q[1] = f"https://{domain}{p[1]}"
|
|
|
|
|
- q[2] = p[2]
|
|
|
|
|
- return (q, timestamp + i)
|
|
|
|
|
- # print(meta_values)
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ r = urllib.request.urlopen(req)
|
|
|
|
|
+ with r as response:
|
|
|
|
|
+ html = response.read()
|
|
|
|
|
+ # hash only from content-relevant part of website
|
|
|
|
|
+ subset = re.search( '<div class="main">.*?<div class="widget-title"><a href=".*?" class="h3">(.*?)</a>.*?</div>\s*</div>\s*</div>', html.decode('utf-8'), re.MULTILINE | re.DOTALL )
|
|
|
|
|
+ m.update(subset[1].encode('utf-8'))
|
|
|
|
|
+ hash = (m.hexdigest())
|
|
|
|
|
+ if not domain in meta_values: # first run
|
|
|
|
|
+ meta_values[domain] = { 'hash': '1' } # fake value
|
|
|
|
|
+ if not meta_values[domain]['hash'] == hash: # Unterschiedliche Hashes
|
|
|
|
|
+ logger.debug(f"unterschiedliche hashes")
|
|
|
|
|
+ meta_values[domain]['hash'] = hash
|
|
|
|
|
+ p = re.search('<div class="widget-title"><a href="(/online-juried-shows/[^"]*?)" class="h3">([^<]*?)</a></div>', html.decode('utf-8'), re.MULTILINE | re.DOTALL )
|
|
|
|
|
+ if p:
|
|
|
|
|
+ logger.debug(f"re search erfolgreich: {p[1]} {p[2]}")
|
|
|
|
|
+ meta_values[domain] = { 'hash': hash,
|
|
|
|
|
+ 'timestamp': timestamp - i,
|
|
|
|
|
+ 'posttitle': p[2],
|
|
|
|
|
+ 'posturl': f"https://{domain}{p[1]}" }
|
|
|
|
|
+ q = {}
|
|
|
|
|
+ q[1] = f"https://{domain}{p[1]}"
|
|
|
|
|
+ q[2] = p[2]
|
|
|
|
|
+ return (q, timestamp + i)
|
|
|
|
|
+ # print(meta_values)
|
|
|
|
|
+ else:
|
|
|
|
|
+ pass
|
|
|
|
|
+ #print('p is empty :(')
|
|
|
else:
|
|
else:
|
|
|
- pass
|
|
|
|
|
- #print('p is empty :(')
|
|
|
|
|
- else:
|
|
|
|
|
- logger.debug(f"gleiche hashes")
|
|
|
|
|
- q = {}
|
|
|
|
|
- q[1] = meta_values[domain]['posturl']
|
|
|
|
|
- q[2] = meta_values[domain]['posttitle']
|
|
|
|
|
- return (q, meta_values[domain]['timestamp'])
|
|
|
|
|
- return (b, meta_values[domain]['timestamp'])
|
|
|
|
|
|
|
+ logger.debug(f"gleiche hashes")
|
|
|
|
|
+ q = {}
|
|
|
|
|
+ q[1] = meta_values[domain]['posturl']
|
|
|
|
|
+ q[2] = meta_values[domain]['posttitle']
|
|
|
|
|
+ return (q, meta_values[domain]['timestamp'])
|
|
|
|
|
+ except:
|
|
|
|
|
+ logger.debug('request to photogplacegallery failed')
|
|
|
|
|
+ return (b, meta_values[domain]['timestamp'])
|
|
|
|
|
|
|
|
def lfionlinede(matchgroup, domain, i):
|
|
def lfionlinede(matchgroup, domain, i):
|
|
|
global meta_values
|
|
global meta_values
|