create_gallery_search.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. #!/usr/bin/python3
  2. #coding: utf-8
  3. import json
  4. import logging
  5. import os
  6. from os import listdir
  7. from os.path import isdir, isfile, join
  8. import re
  9. from string import Template
  10. import sys
  11. import traceback
  12. from optparse import OptionParser
  13. __all__ = []
  14. __version__ = '0.1'
  15. __date__ = '2021-12-27'
  16. __updated__ = '2021-12-27'
  17. DEBUG = 1
  18. TESTRUN = 0
  19. PROFILE = 0
  20. def collect_tagged_images(searchindex, d, tagnr, image_metadata):
  21. title = ''
  22. with open(join(d, 'index.html'), mode="r") as file:
  23. logging.info(join(d, 'index.html'))
  24. for line in file:
  25. titlematch = re.search("h1>(.+?)<", line)
  26. if titlematch:
  27. title = titlematch.group(1)
  28. # print(title)
  29. y = re.search("\d\d\d\d\s+(\D.*)", title)
  30. if y:
  31. title = y.group(1)
  32. #print(title)
  33. found = next((i for i, item in enumerate(searchindex) if
  34. (item["name"] == title) and item["type"] == 'gallery'), None)
  35. if found:
  36. pass
  37. else:
  38. searchindex.append({'id':tagnr, 'name':title, 'type':'gallery', 'dir':d})
  39. tagnr = tagnr + 1
  40. z = re.search('href=[\'"](.*?)["\'].*data_index=["\'](.+?)["\']>', line)
  41. alltags = ''
  42. if z:
  43. img, data = z.groups()
  44. data = data.replace("'","")
  45. tags = data.split(',')
  46. if len(tags) == 0:
  47. tags = data
  48. for t in tags:
  49. [typus,val] = t.split('|')
  50. alltags = alltags + ', ' + val
  51. found = next((i for i, item in enumerate(searchindex) if
  52. (item["name"] == val) and item["type"] == typus), None)
  53. if found:
  54. searchindex[found]['images'].append(d + '/' + img)
  55. else:
  56. searchindex.append({'id':tagnr,'name':val,'type':typus,'images':[d + '/' + img]})
  57. tagnr = tagnr + 1
  58. image_metadata[d + '/' + img] = remove_prefix(alltags, ', ')
  59. return(searchindex, tagnr, image_metadata)
  60. # return ['','']
  61. def remove_prefix(text, prefix):
  62. return text[text.startswith(prefix) and len(prefix):]
  63. def get_directories_in_gallerydir(gallerydir):
  64. onlydirs = []
  65. for f in listdir(gallerydir):
  66. if isdir(join(gallerydir, f)):
  67. if str(f) != '_thumbnails':
  68. onlydirs.append(f)
  69. onlydirs.sort()
  70. return(onlydirs)
  71. def create_search_index(gallerydir, opts):
  72. startdir = os.getcwd()
  73. os.chdir(gallerydir)
  74. print('> ' + gallerydir)
  75. logging.info(f"Creating search index in {gallerydir}")
  76. dirs = get_directories_in_gallerydir(gallerydir)
  77. logging.info(dirs)
  78. searchindex = []
  79. image_metadata = {}
  80. i = 0
  81. tagnr = 0
  82. for d in dirs:
  83. print('>> ' + d)
  84. print('>>> ' + join(d, 'index.html'))
  85. if not os.path.isfile(join(d, 'index.html')):
  86. continue;
  87. print('>>>> ' + d)
  88. searchindex, tagnr, image_metadata = collect_tagged_images(searchindex, d, tagnr, image_metadata)
  89. if i == int(opts.limit) - 1:
  90. print('limit reached')
  91. break
  92. i = i + 1
  93. pass
  94. # print(json.dumps(searchindex, ensure_ascii=False))
  95. with open(join(gallerydir, 'searchdata.js'), 'w') as f:
  96. f.write("var data = '" + json.dumps(searchindex, ensure_ascii=False) + "';\n")
  97. f.write("var metadata = '" + json.dumps(image_metadata, ensure_ascii=False) + "';\n")
  98. os.chdir(startdir)
  99. def main(argv=None):
  100. '''Command line options.'''
  101. program_name = os.path.basename(sys.argv[0])
  102. program_version = "v0.1"
  103. program_build_date = "%s" % __updated__
  104. program_version_string = '%%prog %s (%s)' % (program_version, program_build_date)
  105. #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse
  106. program_longdesc = '''create index for directory of lightgallery galleries''' # optional - give further explanation about what the program does
  107. program_license = "Copyright 2021 Markus Spring (markus-spring.info). \
  108. Licensed under the Apache License 2.0\nhttp://www.apache.org/licenses/LICENSE-2.0"
  109. if argv is None:
  110. argv = sys.argv[1:]
  111. try:
  112. # setup option parser
  113. parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license)
  114. # parser.add_option("-i", "--in", dest="infile", help="set input path [default: %default]", metavar="FILE")
  115. # parser.add_option("-o", "--out", dest="outfile", help="set output path [default: %default]", metavar="FILE")
  116. parser.add_option("-l", "--limit", dest="limit", action="store", help="limit number of searched dirs [default: %default]")
  117. parser.add_option("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %default]")
  118. parser.add_option("-t", "--title", action="store", dest="title", help="set index title [default: %default]")
  119. parser.add_option("-w", "--webrootdir", action="store", dest="webrootdir", help="webserver root dir [default: %default]")
  120. parser.set_defaults( verbose=0, title='sowosamma', webrootdir='/media/thinc_webserver_data', limit=9999)
  121. (opts, args) = parser.parse_args(argv)
  122. if len(args) < 1:
  123. parser.error("directory argument missing")
  124. # MAIN BODY #
  125. logging.basicConfig(filename=os.path.basename(sys.argv[0]) + '.log', level=logging.INFO)
  126. create_search_index(args[0], opts)
  127. except BaseException as ex:
  128. ex_type, ex_value, ex_traceback = sys.exc_info() # Get current system exception
  129. trace_back = traceback.extract_tb(ex_traceback) # Extract unformatter stack traces as tuples
  130. stack_trace = list() # Format stacktrace
  131. for trace in trace_back:
  132. stack_trace.append("File : %s , Line : %d, Func.Name : %s, Message : %s" % (trace[0], trace[1], trace[2], trace[3]))
  133. print("Exception type : %s " % ex_type.__name__)
  134. print("Exception message : %s" %ex_value)
  135. print("Stack trace : %s" %stack_trace)
  136. if __name__ == "__main__":
  137. if DEBUG:
  138. sys.argv.append("-v")
  139. if TESTRUN:
  140. import doctest
  141. doctest.testmod()
  142. if PROFILE:
  143. import cProfile
  144. import pstats
  145. profile_filename = 'config_reader.config_reader_profile.txt'
  146. cProfile.run('main()', profile_filename)
  147. statsfile = open("profile_stats.txt", "wb")
  148. p = pstats.Stats(profile_filename, stream=statsfile)
  149. stats = p.strip_dirs().sort_stats('cumulative')
  150. stats.print_stats()
  151. statsfile.close()
  152. sys.exit(0)
  153. sys.exit(main())
  154. #Local Variables:
  155. #compile-command: "./create_gallery_search.py -l 5 /media/thinc_webserver_data/vaters_dias/"
  156. #End: