"""Full-text indexing module for aether In your _code.py, insert something like from search import * and modify the location of the swish binary SWISH = "..." You'll also need to set up the indexing process, including the use of "aetherfilter", a C program that parses aether-markup files. """ SWISH = "/usr/local/bin/swish-e" import os, sys, time, string from __main__ import * __all__ = ['handle_search', 'handle_sitemap'] try; enumerate except NameError: def enumerate(i): return zip(range(len(i)), i) def age(f): seconds = time.time() - os.stat(f).st_mtime if seconds < 60: return "less than a minute" if seconds < 120: return "1 minute" if seconds < 60 * 60: return "%d minutes" % ((seconds + 30)/60) if seconds < 7200: return "1 hour" if seconds < 24 * 60 * 60: return "%d hours" % ((seconds + 30*60)/60/60) if seconds < 2*86400: return "1 day" return "%d days" % ((seconds + 30*60*24)/60/60/24) from sgmllib import SGMLParser class HTMLTextExtractor(SGMLParser): def __init__(self): SGMLParser.__init__(self) self.data = [] def handle_data(self, data): if data: self.data.append(data) def html2text(document): h = HTMLTextExtractor() h.feed(document) return " ".join(h.data) def handle_sitemap(name, query): search_text = query.get('search',u'') search = search_text.lower().split() result = [ ] def helper(name): if exists(name): text = load(name).lower() for term in search: if text.find(term) == -1: break else: result.append(u'[line [link ' + (quote_markup(name) or u'/') + u'[page '+quote_markup(name)+']]]') names = list_names(name) for name, full_name in names: helper(full_name) helper('') if not result: result.append('Search found nothing, sorry.') if search: result.insert(0, u'[title Search results]\n\n') result.insert(1, u'Searching for: [bold ' + quote_markup(string.join(search,' ')) + u']\n\n') else: result.insert(0, u'[title Site map]\n\n') return make_http_page(string.join(result,u'')) def popen(*args): reader, writer = os.pipe() p = os.fork() if p == 0: os.close(reader) os.dup2(writer, 1) os.close(writer) os.execvp(args[0], args) os._exit(99) os.close(writer) return os.fdopen(reader) def swish(db, term): p = popen(SWISH, "-x", "%p\n", "-f", db, "-w", term) result = [] for line in p: line = line.strip() if line.startswith("#"): continue if line == ".": continue result.append(line) return result def find_locations(words, text): locations = {} text = [t.strip(string.punctuation) for t in text] for w in words: if w.endswith("*"): w = w[:-1] for i, t in enumerate(text): if t.startswith(w): locations[i] = True else: for i, t in enumerate(text): if t == w: locations[i] = True ret = locations.keys() ret.sort() return ret def choose_locations(locations, count=2): ret = [] last = 0 for i in range(count): if not locations: break l = max(last, locations[0] - 5) m = l + 16 ret.append((l, m)) last = m locations = [l for l in locations if l > last] return ret def handle_search(name, query): search = search_text = query.get('search',u'') index = data_dir + u'/_swishindex' if not search: return handle_sitemap(name, query) result = [] for name in swish(index, search_text): if name.startswith("./"): name = name[2:] sys.stderr.write("load(%r)\n" % name) try: text = load(name) except Error: result.append(u'[line ' + quote_markup(name) + ']') else: entry_meta = {'name': name, 'outer_name': ''} entry_text = markup(text, entry_meta) entry_plain = html2text(entry_text) entry_split = entry_plain.split() locations = find_locations(search_text.lower().split(), entry_plain.lower().split()) extracts = choose_locations(locations) summary = entry_meta.get('summary', None) title = entry_meta.get('title', name) if title != name: result.append(u'[line [link ' + (quote_markup(title) or u'/') + u'[page '+quote_markup(name)+u']]' + u' \[' + name + u'\]]') else: result.append(u'[line [link ' + (quote_markup(title) or u'/') + u'[page '+quote_markup(name)+u']]]') if extracts: for a, b in extracts: result.append(u'[line ') if a != 0: result.append("... ") for i in range(a, b): try: e = entry_split[i] except IndexError: continue if i in locations: result.append(" [html ]") result.append(quote_markup(e)) result.append("[html ]") else: result.append(" " + quote_markup(e)) if b != len(entry_text)-1: result.append("... ") result.append("]") elif summary: result.append(u'[line [html ' + summary + ']]') result.append(u'[html
]') if not result: result.append('Search found nothing, sorry.') result.insert(0, u'[title Search results]\n\n') result.insert(1, u'Searching for: [bold ' + quote_markup(search) + u']\n\n') result.append(u'[line Search results from swish-e, index updated ' + age(index) + ' ago]') return make_http_page(string.join(result,u''))