#!/usr/bin/python
#    Copyright 2008 Jeff Epler <jepler@unpythonic.net>
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

def usage():
    print """Usage: %s [-#] [-F] [-f|-z|-q|-n|-q] [-A|-D] [-s sz] [-h]

    -#: Compute usage for a level # dump (default: level 0)

    -F: Short for -fs 0 (produce NUL-separated list of files to dump)

    -f: Print each line of output using the format "filename\\0"
    -z: Print each line of output using the format "size\\tfilename\\0"
    -q: Print each line of output using the format "size\\tqfilename\\n"
        where qfilename is the filename properly quoted for bash-family
        shells (default)
    -n: Print each line of output using the format "size\\tfilename\\n"
        with no special quoting for filename.
    -w: Print each line of output using the format "size\\twfilename\\n"
        where wfilename is the filename with whitespace replaced by underscores

    -A: Count all files, including those with the nodump flag set
    -U: Count only files with the nodump flag clear
    -D: Count only files with the nodump flag set

    -d: Do not print lines for directories
    -s sz: In addition to printing a line for each directory, also print a 
           line for any single file larger than 'sz' 512-byte blocks.  Default:
           -s 204800, which corresponds to 100 mebibytes

    -h: Print sizes in human readable format.  Suffix K=2^10, M=2^20, G=2^30
""" % os.path.basename(sys.argv[0])


import os, fcntl, sys, stat, string, struct, time, rfc822
EXT2_NODUMP_FL    =       0x40
O_NOATIME         =       01000000
if sys.maxint == 2147483647: 
    EXT2_IOC_GETFLAGS = -2147195391 #0x80046601
    buf = "\0" * 4
else:
    EXT2_IOC_GETFLAGS = -2146933247 #0x80086601
    buf = "\0" * 8
        
from subprocess import Popen, PIPE
def get_device_name(p):
    df = Popen(['df', '-P', p], stdout=PIPE).communicate()[0]
    df = df.split("\n")[1]
    return df.split()[0]

if os.path.exists("/etc/dumpdates"):
    dumpdates = "/etc/dumpdates"
else:
    dumpdates = "/var/lib/dumpdates"

def get_dumpdate(p, l):
    d = get_device_name(p)
    res = 0
    for line in open(dumpdates):
        dd, dl, dt = line.split(None, 2)    
        dl = int(dl)
        if d == dd and dl < l:
            dt = rfc822.mktime_tz(rfc822.parsedate_tz(dt))
            res = max(res, dt)
    return res
            
def nodump_flag_ext2(a):
    try:
        f = os.open(a, os.O_RDONLY | os.O_NONBLOCK | os.O_LARGEFILE | O_NOATIME)
        try:
            fl = fcntl.ioctl(f, EXT2_IOC_GETFLAGS, buf)
            fl = struct.unpack("l", fl)[0]
            return fl & EXT2_NODUMP_FL
        finally:
            os.close(f)
    except os.error, detail:
        print >>sys.stderr, detail
        return False

if sys.platform.startswith("linux"):
    nodump_flag = nodump_flag_ext2
nodump_flag_default = nodump_flag

def num(amt):
    return amt / 2

def f(number):
    s = "%.1f" % number
    if s.endswith(".0"): return s[:-2]
    return s

def humanize_number( number ):
    number = number * 512
    if number  > 1024 * 1024 * 1024:
        return ("% 6sG" % f(number / (1024.0 * 1024 * 1024)))
    if number  > 1024 * 1024:
        return ("% 6sM" % f(number / (1024.0 * 1024)))
    if number  > 1024:
        return ("% 6sK" % f(number / 1024.0))
    return ("% 6dB" % number)


breakout_size = 1024 * 1024 * 100 / 512 # 100 mebibytes in blocks
dir_flag = True

whitelist = string.lowercase + string.uppercase + string.digits + "_+-=@%^/.,:{}"
nulltrans = string.maketrans('', '')
wstounderscore = string.maketrans(string.whitespace, "_" * len(string.whitespace))
def quote(n):
    n1 = n.translate(nulltrans, whitelist)
    if not n1: return n
    b = n.lstrip(whitelist)
    return "%s'%s'" % (n[:-len(b)], b.replace("'", "'\\''"))

def du(args, since, dev=None):
    total = 0
    for a in args:
        try:
            st = os.lstat(a)
            if stat.S_ISREG(st.st_mode):
                if st.st_mtime < since and st.st_ctime < since:
                    continue
                if nodump_flag(a): continue
        except os.error, detail:
            print >>sys.stderr, detail
            continue
        if stat.S_ISDIR(st.st_mode) and (dev == None or st.st_dev == dev):
            if not nodump_flag(a):
                amt = st.st_blocks
            else:
                amt = 0
            try:
                ch = os.listdir(a)
            except os.error, detail:
                print >>sys.stderr, detail
            else:
                amt += du([os.path.join(a, b) for b in ch], since, st.st_dev)
            if amt and dir_flag:
                print_line(amt, a)
        else:
            amt = st.st_blocks
            if st.st_blocks > breakout_size:
                print_line(amt, a)
        total += amt
    return total

def printt(amt, a):
    try:
        sys.stdout.write(a + "\0")
    except IOError: raise SystemExit, 0
def printq(amt, a):
    try:
        print "%s\t%s" % (num(amt), quote(a))
    except IOError: raise SystemExit, 0
def printn(amt, a):
    try:
        print "%s\t%s" % (num(amt), a)
    except IOError: raise SystemExit, 0
def print0(amt, a):
    try:
        sys.stdout.write("%s\t%s\0" % (num(amt), a))
    except IOError: raise SystemExit, 0
def printw(amt, a):
    a = a.translate(wstounderscore)
    printn(amt, a)

print_line = printq
if __name__ == '__main__':
    import getopt

    try:
        opts, args = getopt.getopt(sys.argv[1:], "AUDFhfzqnws:0123456789")
    except getopt.error, message:
        usage()
        raise SystemExit, message

    dumplevel=0
    for k, v in opts:
        if k == '-F':
            print_line = printt
            breakout_size = 0
        if k == '-f': print_line = printt
        if k == '-z': print_line = print0
        if k == '-q': print_line = printq
        if k == '-n': print_line = printn
        if k == '-w': print_line = printw
        if k == '-A': nodump_flag = lambda a: False
        if k == '-U': nodump_flag = nodump_flag_default
        if k == '-D': nodump_flag = lambda a: not nodump_flag_default(a)
        if k == '-s': breakout_size = int(v)
        if k == '-d': dir_flag = False
        if k == '-h': num = humanize_number
        if k == '-0': dumplevel = 0
        if k == '-1': dumplevel = 1
        if k == '-2': dumplevel = 2
        if k == '-3': dumplevel = 3
        if k == '-4': dumplevel = 4
        if k == '-5': dumplevel = 5
        if k == '-6': dumplevel = 6
        if k == '-7': dumplevel = 7
        if k == '-8': dumplevel = 8
        if k == '-9': dumplevel = 9

    args = args or ['.']
    total = 0
    for a in args:
        since = get_dumpdate(a, dumplevel)
        total += du(args, since)
    if len(args) > 1 and print_line != printt:
        print_line(total, "total")
