|
|
@@ -101,6 +101,29 @@ def examine_photoplacegallery(soup, url, md5):
|
|
|
last_update = time.time()
|
|
|
return md5, post_title, post_url, last_update
|
|
|
|
|
|
+def examine_lfionline(soup, url, md5):
|
|
|
+ (post_title, post_url, last_update) = ['', '', 0]
|
|
|
+ logger.debug('examine_lfionline')
|
|
|
+ all_cards = soup.find_all(name="div", class_="card")
|
|
|
+ for card in all_cards:
|
|
|
+ if not card.find_all('img', src=lambda x: x.endswith('.svg')):
|
|
|
+ post_url = card.find('a')['href']
|
|
|
+ post_title = card.find(name="h3").text
|
|
|
+ break
|
|
|
+ if url in md5_sums:
|
|
|
+ logger.debug(f'found {url} in md5_sums')
|
|
|
+ if md5_sums[url]['md5'] != md5:
|
|
|
+ logger.debug('md5 not equal')
|
|
|
+ md5_sums[url]['md5'] = md5
|
|
|
+ last_update = time.time()
|
|
|
+ else:
|
|
|
+ logger.debug('md5 equal')
|
|
|
+ md5 = md5_sums[url]['md5']
|
|
|
+ last_update = md5_sums[url]['timestamp']
|
|
|
+ else:
|
|
|
+ last_update = time.time()
|
|
|
+ return md5, post_title, post_url, last_update
|
|
|
+
|
|
|
def examine_generic_website(soup, url, md5):
|
|
|
(post_title, post_url, last_update) = ['', '', 0]
|
|
|
prothost = re.search(r'^http[s]*:\/\/[\w\.]*', url).group()
|
|
|
@@ -127,7 +150,9 @@ def examine_url(url):
|
|
|
body = soup.find('body')
|
|
|
the_contents_of_body_without_body_tags = body.findChildren(recursive=False)
|
|
|
# if True:
|
|
|
- if "photoplacegallery.com" in url:
|
|
|
+ if 'lfi-online.de' in url:
|
|
|
+ (md5, post_title, post_url, last_update) = examine_lfionline(soup, url, md5)
|
|
|
+ elif "photoplacegallery.com" in url:
|
|
|
(md5, post_title, post_url, last_update) = examine_photoplacegallery(soup, url, md5)
|
|
|
elif "claudioturri.it" in url:
|
|
|
(md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)
|