#!/usr/bin/env python2
"""
Tool that helps in replicating file rename operations across machines.

Example: Alice has a directory containing some files. It gives a copy of
these files to Bob. Afterwards, she renames the files to follow a different
nomenclature. Bob wants to have these new file names, too, but he doesn't
want to copy the files again. So, what Alice does is
    %prog -R $directory >list.txt
She then sends list.txt to Bob, who does
    %prog -l list.txt -R $directory
to rename his copies of the files to the new names.

Technically this works by generating a list file that contains the new
name (encoded in platform-neutral UTF-8) and a hash of each file's size
as well as samples from the beginning, mid and end of the data. It's not
a full data hash, but enough to work in 99.9% of all situations.
"""
import sys, os, stat, re, glob, optparse, hashlib, struct, base64


def CE(x):
    if type(x) != unicode:
        x = unicode(x, sys.getfilesystemencoding(), 'replace')
    return x.encode(sys.getfilesystemencoding(), 'replace')


re_hash = re.compile(r'([A-Za-z0-9+/]{12})\s+(.*)')

def import_hashes(f):
    hash_map = {}
    dups = set()
    for line in f:
        line = line.strip()
        if not(line) or line.startswith('#'):
            continue
        m = re_hash.match(line)
        if not m:
            print >>sys.stderr, "Invalid line in list file: `%s'" % line
            continue
        h = m.group(1)
        if not(h in hash_map):
            hash_map[h] = unicode(m.group(2), 'utf-8', 'replace')
        elif not(h in dups):
            del hash_map[h]
            dups.add(h)
            print >>sys.stderr, "Warning: duplicate hash `%s'" % h
    return hash_map


def generate_hash(filename, size=None):
    f = open(filename, "rb")
    if not size:
        f.seek(0, 2)
        size = f.tell()
        f.seek(0)
    h = hashlib.md5(struct.pack("<Q", size))
    if size <= 65536:
        h.update(f.read(65536))
    else:
        h.update(f.read(21845))
        f.seek(size - 21845)
        h.update(f.read(21845))
        f.seek((size - 43690) / 2)
        h.update(f.read(21845))
    return base64.b64encode(h.digest()[:9])


def rename_file(filename, size):
    global opts, hash_map
    try:
        h = generate_hash(filename, size)
    except IOError, e:
        print >>sys.stderr, CE(filename), "[ERROR]", e
        return
    try:
        new_name = hash_map[h]
    except KeyError:
        print CE(filename), "[no match]"
        return
    if opts.OutputDir:
        new_name = os.path.join(opts.OutputDir, new_name)
    else:
        new_name = os.path.join(os.path.dirname(filename), new_name)
    try:
        os.rename(filename, new_name)
        print CE(filename), "=>", CE(new_name), "[OK]"
    except OSError, e:
        print >>sys.stderr, CE(filename), "=>", CE(new_name), "[FAILED]", e


def generate_list(filename, size):
    try:
        h = generate_hash(filename, size)
    except IOError, e:
        print >>sys.stderr, CE(filename), "[ERROR]", e
        return
    print h + '\t' + os.path.basename(filename).encode('utf-8', 'replace')


def process_item(item, action):
    global opts
    try:
        s = os.stat(item)
    except OSError, e:
        print >>sys.stderr, CE(item), "[ERROR]", e
        return
    if stat.S_ISREG(s.st_mode):
        action(item, s.st_size)
    elif stat.S_ISDIR(s.st_mode) and opts.Recursive:
        try:
            items = os.listdir(item)
        except OSError, e:
            print >>sys.stderr, CE(item), "[ERROR]", e
            return
        for subitem in items:
            if subitem.startswith('.'):
                continue
            process_item(os.path.join(item, subitem), action)
    else:
        print >>sys.stderr, CE(item), "[ignored]"


class MyOptionParser(optparse.OptionParser):
    def print_help(self, file=None):
        print __doc__.lstrip().replace('%prog', os.path.basename(sys.argv[0]))
        optparse.OptionParser.print_help(self, file)


if __name__ == "__main__":
    parser = MyOptionParser(usage="%prog [OPTIONS...] [FILES...]")
    parser.add_option("-l", "--listfile", dest="ListFile", metavar="FILE",
                      help="path to a directory listing file that was either created with this program; if this option is omitted, a list will be created on stdout")
    parser.add_option("-d", "--outdir", dest="OutputDir", metavar="DIR",
                      help="directory to move renamed files to")
    parser.add_option("-R", "--recursive", action="store_true", dest="Recursive",
                      help="descend into subdirectories")
    opts, args = parser.parse_args()
    if not args:
        parser.error("no files specified")

    # open and import list file
    if not opts.ListFile:
        action = generate_list
    elif opts.ListFile == '-':
        action = rename_file
        hash_map = import_hashes(sys.stdin)
    else:
        action = rename_file
        try:
            hash_map = import_hashes(open(opts.ListFile, "r"))
        except IOError, e:
            print >>sys.stderr, "Error opening the list file -", e
            sys.exit(1)

    # create output directory
    if (action == rename_file) and opts.OutputDir and not(os.path.isdir(opts.OutputDir)):
        try:
            os.makedirs(opts.OutputDir)
        except OSError, e:
            print >>sys.stderr, "Error creating the output directory -", e
            sys.exit(1)

    # process files
    for arg in args:
        arg = unicode(arg, sys.getfilesystemencoding(), 'replace')
        if os.name == 'nt':
            for item in glob.glob(arg):
                process_item(item, action)
        else:
            process_item(arg, action)
