"""Full-text indexing module for aether
In your _code.py, insert something like
from search import *
and modify the location of the swish binary SWISH = "..."
You'll also need to set up the indexing process, including the use of
"aetherfilter", a C program that parses aether-markup files.
"""
SWISH = "/usr/local/bin/swish-e"
import os, sys, time, string
from __main__ import *
__all__ = ['handle_search', 'handle_sitemap']
try; enumerate
except NameError:
def enumerate(i): return zip(range(len(i)), i)
def age(f):
seconds = time.time() - os.stat(f).st_mtime
if seconds < 60: return "less than a minute"
if seconds < 120: return "1 minute"
if seconds < 60 * 60: return "%d minutes" % ((seconds + 30)/60)
if seconds < 7200: return "1 hour"
if seconds < 24 * 60 * 60: return "%d hours" % ((seconds + 30*60)/60/60)
if seconds < 2*86400: return "1 day"
return "%d days" % ((seconds + 30*60*24)/60/60/24)
from sgmllib import SGMLParser
class HTMLTextExtractor(SGMLParser):
def __init__(self):
SGMLParser.__init__(self)
self.data = []
def handle_data(self, data):
if data: self.data.append(data)
def html2text(document):
h = HTMLTextExtractor()
h.feed(document)
return " ".join(h.data)
def handle_sitemap(name, query):
search_text = query.get('search',u'')
search = search_text.lower().split()
result = [ ]
def helper(name):
if exists(name):
text = load(name).lower()
for term in search:
if text.find(term) == -1:
break
else:
result.append(u'[line [link ' + (quote_markup(name) or u'/') +
u'[page '+quote_markup(name)+']]]')
names = list_names(name)
for name, full_name in names:
helper(full_name)
helper('')
if not result:
result.append('Search found nothing, sorry.')
if search:
result.insert(0, u'[title Search results]\n\n')
result.insert(1, u'Searching for: [bold ' + quote_markup(string.join(search,' ')) + u']\n\n')
else:
result.insert(0, u'[title Site map]\n\n')
return make_http_page(string.join(result,u''))
def popen(*args):
reader, writer = os.pipe()
p = os.fork()
if p == 0:
os.close(reader)
os.dup2(writer, 1)
os.close(writer)
os.execvp(args[0], args)
os._exit(99)
os.close(writer)
return os.fdopen(reader)
def swish(db, term):
p = popen(SWISH, "-x", "%p\n", "-f", db, "-w", term)
result = []
for line in p:
line = line.strip()
if line.startswith("#"): continue
if line == ".": continue
result.append(line)
return result
def find_locations(words, text):
locations = {}
text = [t.strip(string.punctuation) for t in text]
for w in words:
if w.endswith("*"):
w = w[:-1]
for i, t in enumerate(text):
if t.startswith(w):
locations[i] = True
else:
for i, t in enumerate(text):
if t == w: locations[i] = True
ret = locations.keys()
ret.sort()
return ret
def choose_locations(locations, count=2):
ret = []
last = 0
for i in range(count):
if not locations: break
l = max(last, locations[0] - 5)
m = l + 16
ret.append((l, m))
last = m
locations = [l for l in locations if l > last]
return ret
def handle_search(name, query):
search = search_text = query.get('search',u'')
index = data_dir + u'/_swishindex'
if not search: return handle_sitemap(name, query)
result = []
for name in swish(index, search_text):
if name.startswith("./"): name = name[2:]
sys.stderr.write("load(%r)\n" % name)
try:
text = load(name)
except Error:
result.append(u'[line ' + quote_markup(name) + ']')
else:
entry_meta = {'name': name, 'outer_name': ''}
entry_text = markup(text, entry_meta)
entry_plain = html2text(entry_text)
entry_split = entry_plain.split()
locations = find_locations(search_text.lower().split(),
entry_plain.lower().split())
extracts = choose_locations(locations)
summary = entry_meta.get('summary', None)
title = entry_meta.get('title', name)
if title != name:
result.append(u'[line [link ' +
(quote_markup(title) or u'/') +
u'[page '+quote_markup(name)+u']]' +
u' \[' + name + u'\]]')
else:
result.append(u'[line [link ' +
(quote_markup(title) or u'/') +
u'[page '+quote_markup(name)+u']]]')
if extracts:
for a, b in extracts:
result.append(u'[line ')
if a != 0:
result.append("... ")
for i in range(a, b):
try:
e = entry_split[i]
except IndexError:
continue
if i in locations:
result.append(" [html ]")
result.append(quote_markup(e))
result.append("[html ]")
else:
result.append(" " + quote_markup(e))
if b != len(entry_text)-1:
result.append("... ")
result.append("]")
elif summary:
result.append(u'[line [html ' + summary + ']]')
result.append(u'[html
]')
if not result:
result.append('Search found nothing, sorry.')
result.insert(0, u'[title Search results]\n\n')
result.insert(1, u'Searching for: [bold ' + quote_markup(search) + u']\n\n')
result.append(u'[line Search results from swish-e, index updated ' +
age(index) + ' ago]')
return make_http_page(string.join(result,u''))