Markus Spring 1 год назад
Родитель
Сommit
78ab4871b3
1 измененных файлов с 26 добавлено и 1 удалено
  1. 26 1
      blogs-i-read_v2.py

+ 26 - 1
blogs-i-read_v2.py

@@ -101,6 +101,29 @@ def examine_photoplacegallery(soup, url, md5):
         last_update = time.time()
     return md5, post_title, post_url, last_update
 
+def examine_lfionline(soup, url, md5):
+    (post_title, post_url, last_update) = ['', '', 0]
+    logger.debug('examine_lfionline')
+    all_cards = soup.find_all(name="div", class_="card")
+    for card in all_cards:
+        if not card.find_all('img', src=lambda x: x.endswith('.svg')):
+            post_url    = card.find('a')['href']
+            post_title  = card.find(name="h3").text
+            break
+    if url in md5_sums:
+        logger.debug(f'found {url} in md5_sums')
+        if md5_sums[url]['md5'] != md5:
+            logger.debug('md5 not equal')
+            md5_sums[url]['md5'] = md5
+            last_update = time.time()
+        else:
+            logger.debug('md5 equal')
+            md5 = md5_sums[url]['md5']
+            last_update = md5_sums[url]['timestamp']
+    else:
+        last_update = time.time()
+    return md5, post_title, post_url, last_update
+
 def examine_generic_website(soup, url, md5):
     (post_title, post_url, last_update) = ['', '', 0]
     prothost    = re.search(r'^http[s]*:\/\/[\w\.]*', url).group()
@@ -127,7 +150,9 @@ def examine_url(url):
         body = soup.find('body')
         the_contents_of_body_without_body_tags = body.findChildren(recursive=False)
     # if True:
-        if "photoplacegallery.com" in url:
+        if 'lfi-online.de' in url:
+            (md5, post_title, post_url, last_update) = examine_lfionline(soup, url, md5)
+        elif "photoplacegallery.com" in url:
             (md5, post_title, post_url, last_update) = examine_photoplacegallery(soup, url, md5)
         elif "claudioturri.it" in url:
             (md5, post_title, post_url, last_update) = examine_generic_website(soup, url, md5)