| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178 |
- #!/usr/bin/python3
- #coding: utf-8
- import json
- import logging
- import os
- from os import listdir
- from os.path import isdir, isfile, join
- import re
- from string import Template
- import sys
- import traceback
- from optparse import OptionParser
- __all__ = []
- __version__ = '0.1'
- __date__ = '2021-12-27'
- __updated__ = '2021-12-27'
- DEBUG = 1
- TESTRUN = 0
- PROFILE = 0
- def collect_tagged_images(searchindex, d, tagnr, image_metadata):
- title = ''
- with open(join(d, 'index.html'), mode="r") as file:
- logging.info(join(d, 'index.html'))
- for line in file:
- titlematch = re.search("h1>(.+?)<", line)
- if titlematch:
- title = titlematch.group(1)
- # print(title)
- y = re.search("\d\d\d\d\s+(\D.*)", title)
- if y:
- title = y.group(1)
- #print(title)
- found = next((i for i, item in enumerate(searchindex) if
- (item["name"] == title) and item["type"] == 'gallery'), None)
- if found:
- pass
- else:
- searchindex.append({'id':tagnr, 'name':title, 'type':'gallery', 'dir':d})
- tagnr = tagnr + 1
- z = re.search('href=[\'"](.*?)["\'].*data_index=["\'](.+?)["\']>', line)
- alltags = ''
- if z:
- img, data = z.groups()
- data = data.replace("'","")
- tags = data.split(',')
- if len(tags) == 0:
- tags = data
- for t in tags:
- [typus,val] = t.split('|')
- alltags = alltags + ', ' + val
- found = next((i for i, item in enumerate(searchindex) if
- (item["name"] == val) and item["type"] == typus), None)
- if found:
- searchindex[found]['images'].append(d + '/' + img)
- else:
- searchindex.append({'id':tagnr,'name':val,'type':typus,'images':[d + '/' + img]})
- tagnr = tagnr + 1
- image_metadata[d + '/' + img] = remove_prefix(alltags, ', ')
- return(searchindex, tagnr, image_metadata)
- # return ['','']
- def remove_prefix(text, prefix):
- return text[text.startswith(prefix) and len(prefix):]
- def get_directories_in_gallerydir(gallerydir):
- onlydirs = []
- for f in listdir(gallerydir):
- if isdir(join(gallerydir, f)):
- if str(f) != '_thumbnails':
- onlydirs.append(f)
- onlydirs.sort()
- return(onlydirs)
-
- def create_search_index(gallerydir, opts):
- startdir = os.getcwd()
- os.chdir(gallerydir)
- print('> ' + gallerydir)
- logging.info(f"Creating search index in {gallerydir}")
- dirs = get_directories_in_gallerydir(gallerydir)
- logging.info(dirs)
- searchindex = []
- image_metadata = {}
- i = 0
- tagnr = 0
- for d in dirs:
- print('>> ' + d)
- print('>>> ' + join(d, 'index.html'))
- if not os.path.isfile(join(d, 'index.html')):
- continue;
- print('>>>> ' + d)
- searchindex, tagnr, image_metadata = collect_tagged_images(searchindex, d, tagnr, image_metadata)
- if i == int(opts.limit) - 1:
- print('limit reached')
- break
- i = i + 1
- pass
- # print(json.dumps(searchindex, ensure_ascii=False))
- with open(join(gallerydir, 'searchdata.js'), 'w') as f:
- f.write("var data = '" + json.dumps(searchindex, ensure_ascii=False) + "';\n")
- f.write("var metadata = '" + json.dumps(image_metadata, ensure_ascii=False) + "';\n")
- os.chdir(startdir)
- def main(argv=None):
- '''Command line options.'''
- program_name = os.path.basename(sys.argv[0])
- program_version = "v0.1"
- program_build_date = "%s" % __updated__
- program_version_string = '%%prog %s (%s)' % (program_version, program_build_date)
- #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse
- program_longdesc = '''create index for directory of lightgallery galleries''' # optional - give further explanation about what the program does
- program_license = "Copyright 2021 Markus Spring (markus-spring.info). \
- Licensed under the Apache License 2.0\nhttp://www.apache.org/licenses/LICENSE-2.0"
- if argv is None:
- argv = sys.argv[1:]
- try:
- # setup option parser
- parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license)
- # parser.add_option("-i", "--in", dest="infile", help="set input path [default: %default]", metavar="FILE")
- # parser.add_option("-o", "--out", dest="outfile", help="set output path [default: %default]", metavar="FILE")
- parser.add_option("-l", "--limit", dest="limit", action="store", help="limit number of searched dirs [default: %default]")
- parser.add_option("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %default]")
- parser.add_option("-t", "--title", action="store", dest="title", help="set index title [default: %default]")
- parser.add_option("-w", "--webrootdir", action="store", dest="webrootdir", help="webserver root dir [default: %default]")
- parser.set_defaults( verbose=0, title='sowosamma', webrootdir='/media/thinc_webserver_data', limit=9999)
- (opts, args) = parser.parse_args(argv)
- if len(args) < 1:
- parser.error("directory argument missing")
-
- # MAIN BODY #
- logging.basicConfig(filename=os.path.basename(sys.argv[0]) + '.log', level=logging.INFO)
- create_search_index(args[0], opts)
- except BaseException as ex:
- ex_type, ex_value, ex_traceback = sys.exc_info() # Get current system exception
- trace_back = traceback.extract_tb(ex_traceback) # Extract unformatter stack traces as tuples
- stack_trace = list() # Format stacktrace
- for trace in trace_back:
- stack_trace.append("File : %s , Line : %d, Func.Name : %s, Message : %s" % (trace[0], trace[1], trace[2], trace[3]))
- print("Exception type : %s " % ex_type.__name__)
- print("Exception message : %s" %ex_value)
- print("Stack trace : %s" %stack_trace)
- if __name__ == "__main__":
- if DEBUG:
- sys.argv.append("-v")
- if TESTRUN:
- import doctest
- doctest.testmod()
- if PROFILE:
- import cProfile
- import pstats
- profile_filename = 'config_reader.config_reader_profile.txt'
- cProfile.run('main()', profile_filename)
- statsfile = open("profile_stats.txt", "wb")
- p = pstats.Stats(profile_filename, stream=statsfile)
- stats = p.strip_dirs().sort_stats('cumulative')
- stats.print_stats()
- statsfile.close()
- sys.exit(0)
- sys.exit(main())
- #Local Variables:
- #compile-command: "./create_gallery_search.py -l 5 /media/thinc_webserver_data/vaters_dias/"
- #End:
|