" counter = 0 global list_of_blogs for b in re.finditer(regex, bloglist, re.MULTILINE | re.DOTALL): burl = b[1] bdomain = re.sub( r"(https?://|/", bdomain) if bdomain in alternative_blog_urls.keys(): burl = burl.replace(bdomain, alternative_blog_urls[bdomain]) # print(f"---->", burl) btitle = b[2] z = re.search(r".*?href='(.*?)'.*?>\s*(.*?)<.*?item-time'>\s*(.*?)\s*<", b[3], re.MULTILINE | re.DOTALL) if z: purl = z[1] ptitle = z[2] blogtimestamp = timestamp_to_epoch_secs(z[3], counter) else: (z, ts) = treat_special_domain(bdomain, b, counter) blogtimestamp = ts counter += 1 list_of_blogs[int(blogtimestamp)] = (f"""

{b[2]}""" f""" // {z[2]}

""") def read_value_hash(): global meta_values try: f = open(database_file, 'r') meta_values = json.loads(f.read()) # meta_values['lfi-online.de']['hash'] = 'abc' # for testing, set false hash # print(meta_values) except: meta_values = {} def write_value_hash(): f = open(database_file, 'w+') f.write(json.dumps(meta_values)) def separator(t): global last_separator # print(f"{timestamp - t} -- {last_separator}") if ( timestamp - t ) > 10368000: if not last_separator == "From medieval times": # 24*30*24*600 last_separator = "From medieval times" return last_separator elif ( timestamp - t ) > 2592000: if not last_separator == "Quite old": # 6*30*24*600 last_separator = "Quite old" return last_separator elif ( timestamp - t ) > 432000: if not last_separator == "Less then a month": # 30*24*600 last_separator = "Less then a month" return last_separator elif ( timestamp - t ) > 100800: if not last_separator == "Less then a week": # 7*24*600 last_separator = "Less then a week" return last_separator elif ( timestamp - t ) > 86400: if not last_separator == "A day and older": # 24*600 last_separator = "A day and older" return last_separator elif ( timestamp - t ) < 86400: if not last_separator == "Hot from the Blogosphere": # 24*600 last_separator = "Hot from the Blogosphere" return last_separator return False def output_list(): # print(timestamp) with open(html_file, "w") as f: # f.write( f"{blogtimestamp};{bdomain};{burl};{btitle};{purl};{ptitle}\n" ) firstsep = True for t in sorted(list_of_blogs, reverse=True): sep = separator(t) if sep: if not firstsep: f.write("") else: firstsep = False f.write(f"

{sep}

") f.write(f"\t{list_of_blogs[t]}\n") f.write("") logger = logging.getLogger(__name__) # ------------------------------------------------------------- main --- def main(): logging_config = { 'version': 1, 'disable_existing_loggers': False, 'formatters': { 'standard': { # 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s' 'format': '[ %(lineno)s - %(funcName)25s() ] %(message)s' }, }, 'handlers': { 'default_handler': {'class': 'logging.StreamHandler', 'formatter': 'standard', 'level': loglevel }, # { # 'class': 'logging.FileHandler', # 'level': 'DEBUG', # 'formatter': 'standard', # 'filename': os.path.join('', 'application.log'), # 'encoding': 'utf8' # }, }, 'loggers': { '': { 'handlers': ['default_handler'], 'level': 'DEBUG', 'propagate': False } } } logging.config.dictConfig(logging_config) read_value_hash() read_spring2life_links() output_list() write_value_hash() if __name__ == '__main__': main() # Local Variables: # compile-command: "./blogsiread.py --log DEBUG" # End: