#!/usr/bin/env python2
"""
The equivalent of sha1sum/sha256sum/md5sum, except that it can process whole
directory trees recursively.
"""
import sys, os, stat, hashlib, argparse

def dummy_hash(alg=hashlib.md5):
    return '?' * len(alg().hexdigest())

def process(path, alg=hashlib.md5, type_known=False, isdir=False):
    if type_known:
        isfile = not(isdir)
    else:
        try:
            s = os.stat(path)
            isfile = stat.S_ISREG(s.st_mode)
            isdir = stat.S_ISDIR(s.st_mode)
        except OSError:
            isfile = False
            isdir = False

    if isdir:
        try:
            items = os.listdir(path)
        except OSError:
            print dummy_hash(alg) + "\t" + path
            return
        dirs = []
        files = []
        for item in items:
            if item in (".", ".."):
                continue
            ipath = os.path.join(path, item)
            try:
                s = os.stat(ipath)
            except OSError:
                print dummy_hash(alg) + "\t" + ipath
                continue
            if stat.S_ISREG(s.st_mode):
                files.append(ipath)
            elif stat.S_ISDIR(s.st_mode):
                dirs.append(ipath)
            else:
                print dummy_hash(alg) + "\t" + ipath
                continue
        for d in sorted(dirs):
            process(d, alg, True, True)
        for f in sorted(files):
            process(f, alg, True, False)
                
    elif isfile:
        h = alg()
        try:
            f = open(path, "rb")
            while True:
                block = f.read(4 * 1024 * 1024)
                if not block:
                    break
                h.update(block)
            f.close()
            print h.hexdigest() + "\t" + path
        except IOError:
            print dummy_hash(alg) + "\t" + path

    else:
        print dummy_hash(alg) + "\t" + path
        return

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("sources", metavar="DIR|FILE", nargs='*',
                        help="directory or file to scan")
    parser.add_argument("-t", "--type", metavar="alg", default="sha1",
                        help="hash algorithm to use [default: %(default)s]")
    parser.add_argument("-l", "--list-types", action='store_true',
                        help="list supported algorithms (valid -t options)")
    args = parser.parse_args()

    if args.list_types:
        print ' '.join(hashlib.algorithms)
        sys.exit(0)

    alg = getattr(hashlib, args.type, None)
    if not alg:
        parser.error("unrecognized hash type")

    if not args.sources:
        parser.error("no files or directories specified")

    for s in args.sources:
        process(s, alg)
