#!/usr/bin/python # -*- coding: utf-8 -*- # csv / sql query engine # Copyright © 2011 Jeff Epler # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import csv import optparse import os import sqlite3 import sys def sniff(csvfile): return csv.Sniffer().sniff(file(csvfile).read(40960)) def find_columns(csvfile): base, ext = os.path.splitext(csvfile) candidates = [ base + ".cols", os.path.basename(base) + ".cols" ] for c in candidates: if os.path.isfile(c): return c raise SystemExit, "Could not find columns for %r (Tried %s)" % ( csvfile, ", ".join(repr(c) for c in candidates)) parser = optparse.OptionParser(usage="%prog [options] csvfile sqlquery") parser.add_option("-c", "--columns", help="File specifying columns (default: guess based on input file)", metavar="example.cols", dest="columns") parser.add_option("-d", "--in-dialect", help="Dialect of input file (default: autodetected)", metavar="excel|excel-tab|auto", dest="in_dialect") parser.add_option("-D", "--out-dialect", help="Dialect of output file (default: same as input)", metavar="excel|excel-tab|input", dest="out_dialect") parser.set_defaults(columns=None, in_dialect='auto', out_dialect='input') options, args = parser.parse_args() if len(args) != 2: parser.print_help() raise SystemExit csvfile, sqlquery = args if options.in_dialect == 'auto': options.in_dialect = sniff(csvfile) elif options.in_dialect not in csv.list_dialects(): raise SystemExit, "Dialect %r not known" % options.in_dialect if options.out_dialect == 'input': options.out_dialect = options.in_dialect if options.columns is None: options.columns = find_columns(csvfile) csvfile = csv.reader(file(csvfile), options.in_dialect) columns = [line.strip() for line in file(options.columns)] conn = sqlite3.connect(':memory:') conn.text_factory = str c = conn.cursor() c.execute('create table t (' + ','.join(columns) + ')') toolong = [] tooshort = [] params = ','.join('?' * len(columns)) for row in csvfile: if len(row) < len(columns): tooshort.append(row) row = row + [None] * (len(columns) - len(row)) elif len(row) > len(columns): toolong.append(row) row = row[:len(columns)] assert len(row) == len(columns) c.execute('insert into t values (%s)' % params, row) if tooshort: print >>sys.stderr, "Warning: %d rows were shorter than expected" % len(tooshort) print >>sys.stderr, tooshort[0] if toolong: print >>sys.stderr, "Warning: %d rows were longer than expected" % len(toolong) print >>sys.stderr, toolong[0] writer = csv.writer(sys.stdout, dialect=options.out_dialect) c.execute(sqlquery) for row in c: writer.writerow(row)