#!/usr/bin/python
# vim: set fileencoding=utf-8 : sts=4 : et : sw=4
#    This is a component of mailpie, a full-text search for email
#
#    Copyright © 2008 Jeff Epler <jepler@unpythonic.net>
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import sys, os, subprocess, shutil, getopt, tempfile, re, shlex
import mailpie.swishfilter, mailpie.parsedate
sys.stdin.close()

def usage(result=0):
    print """mailpie-search: Put search results in a mailbox
Usage: %s [-o file|-p|-m] [-b date] [-a date] [-M mailreader] terms...
    -B dir, --base=dir      Choose the base location of the mailpie storage
                            Default: $HOME/.mailpie

Action:
    -c,      --count        Write number of matches
    -o FILE, --out=FILE     Write result to FILE
    -p,      --stdout       Write result to stdout
    -r,      --read:        Open mailbox in mailreader (default)

    -M prog, --mailreader=PROG
                            Mailreader to invoke (default: "mutt -R -f")

Searching:
    -b DATE, --before=DATE  Only include messages before DATE
    -a DATE, --after=DATE   Only include messages after DATE
                            Date formats are like "date -d=DATE"

    -l NUM, --limit=NUM     Return no more than NUM

    -t, --thread            Include full threads
    -n, --no-thread         Include full threads

    terms...                See SWISH-RUN(1) for details on search terms
""" % os.path.basename(sys.argv[0])
    raise SystemExit, result

def run_swish(indexes, args, before=None, after=None):
    if not args:
        return
    swishargs = ['swish-e', '-H0',
        '-x<swishdocpath>\t<in-reply-to>\t<references>\n',
        '-w', " ".join(args)] + ['-f%s' % f for f in indexes]
    if before or after:
        if before is None: before = sys.maxint
        if after is None: after = 0
        swishargs.extend(["-L", "date", str(after), str(before)])

    swish = subprocess.Popen(swishargs, stdout=subprocess.PIPE)
    for row in swish.stdout:
        split = row.rstrip("\n").split("\t")
        if len(split) != 3: raise ValueError, row
        yield split

class Count: pass

def main(args):
    before = after = target = None
    mailreader = ["mutt", "-R", "-f"]
    limit = 2000
    thread = False

    base = os.path.expanduser("~/.mailpie")

    try:
        opts, args = getopt.getopt(args, "co:prM:a:b:l:tnh?",
            ("count", "out=", "stdout", "read", "mailreader",
             "help", "after=", "before=", "limit=", "thread", "no-thread"))
    except getopt.GetoptError, detail:
        usage(detail)

    for k, v in opts:
        if k in ("-o", "--out"): target = v
        elif k in ("-p", "--stdout"): target = "-"
        elif k in ("-r", "--read"): target = None
        elif k in ("-c", "--count"): target = Count
        elif k in ("-M", "--mailreader"): mailreader = shlex.split(v)
        elif k in ("-l", "--limit"): limit = int(v)
        elif k in ("-a", "--after"): after = mailpie.parsedate.parsedate(v)
        elif k in ("-b", "--before"): before = mailpie.parsedate.parsedate(v)
        elif k in ("-t", "--thread"): thread = True
        elif k in ("-n", "--no-thread"): thread = False
        elif k in ("-l", "--limit"): limit = int(v)
        elif k in ("--help", "-h", "-?"): usage()

    indexes = mailpie.swishfilter.get_index_files(base)

    if target is None:
        f = tempfile.NamedTemporaryFile(prefix="mailpie", suffix=".mbox")
    elif target == "-":
        f = sys.stdout
    elif target is Count:
        f = None
    else:
        f = open(target, "w")

    count = 0
    msgids = set()
    hashes = set()

    def q(arg):
        if not " " in arg: return arg

        if "=" in arg:
            a, b = arg.split("=", 1)
            return '%s="%s"' % (a,b)

        return '"%s"' % arg

    args = [q(arg) for arg in args]
    for filename, irt, references in run_swish(indexes, args, before, after):
        path = os.path.join(base, filename[:2], filename[2:])
        if f: f.write(open(path).read())
        count = count + 1
        if count == limit: break
        if thread:
            hashes.add(filename)
            if irt: msgids.add(irt)
            msgids.update(set(references.split()))

    if thread:
        visited = set(["(null)"])  # This hurts swish
        while msgids and count < limit:
            cmdline = []
            while msgids and len(cmdline) < 5:
                p = msgids.pop()
                if p in visited: continue
                visited.add(p)
                cmdline.append("message-id=\"%s\"" % re.sub("[^a-z0-9]+", " ", p))
            for filename, irt, references in run_swish(indexes, cmdline, before, after):
                if filename in hashes: continue
                path = os.path.join(base, filename[:2], filename[2:])
                if f: f.write(open(path).read())
                count = count + 1
                if count == limit: break
                hashes.add(filename)
                if irt: msgids.add(irt)
                msgids.update(set(references.split()))

    if target is None:
        f.flush()
        os.spawnvp(os.P_WAIT, mailreader[0], mailreader + [f.name])

    if target is Count:
        print count
if __name__ == '__main__':
    main(sys.argv[1:])
