#!/usr/bin/env python2
"tool that helps managing a queue of URLs to visit later"
import sys, os, time, BaseHTTPServer, urlparse, urllib
__help__ = """Usage: %prog [COMMANDS...]
URL Queue manager.

Available commands:
    -h, -help
        show this help text
    -f <FILENAME>, -file <FILENAME>
        use <FILENAME> as the queue file
    -cleanup
        remove lock file (DANGEROUS; use only if the queue file has been
        permanently locked by a hard system crash!)

    -new
        create a new queue file
    -show
        show the contents of the queue
    <URL>
        append an URL to the end the queue
    -n, -next
        return the first URL from the top of the queue and remove it
    -rm <URL>, -del <URL>, -remove <URL>, -delete <URL>
        remove an URL from the queue
    -promote <URL>
        move an URL to the top of the queue

    -showcfg
        show current configuration
    -c <KEY>=<VALUE>, -cfg <KEY>=<VALUE>, -config <KEY>=<VALUE>
        change one parameter of the current configuration

    -p <PORT>, -port <PORT>
        set port number to use for the Web Interface
    -server
        start the web server with the Web Interface
"""

def parse_qs(qs):
    res = {}
    for param in qs.split('&'):
        try:
            key, value = param.split('=', 1)
        except ValueError:
            continue
        res[key] = urllib.unquote(value)
    return res

URLQueueConfig = {
    'allow_non_urls':      (False, "allow adding strings which are not URLs"),
    'web_remove_on_click': (False, "Web Interface: remove URLs when clicking links"),
    'web_href_new_window': (True,  "Web Interface: open links in new window"),
    'web_default_port':    (8581,  "Web Interface: default listening port"),
}

class URLQueue(object):
    def __init__(self, path):
        self.path = path
        d, b = os.path.split(path)
        self.lockdir = os.path.join(d, ".lock." + b)
        self.locked = False
        self.mtime = 0
        self.config = {}
        self.queue = []

    def getcfg(self, key):
        value = None
        try:
            return self.config[key]
        except KeyError:
            try:
                return URLQueueConfig[key][0]
            except KeyError:
                return None

    def setcfg(self, key, value, update=True):
        key = key.strip().lower()
        if not key in URLQueueConfig:
            return False
        value = value.strip()
        lval = value.lower()
        if lval in ("no", "false", "off", "disable", "disabled"):
            value = False
        elif lval in ("yes", "true", "on", "enable", "enabled"):
            value = True
        else:
            try:
                value = int(value)
            except ValueError:
                pass
        if update:
            self.lock()
            try:
                self.load()
                self.config[key] = value
                self.save()
            finally:
                self.release()
        else:
            self.config[key] = value
        return True

    def get(self):
        self.load()
        return self.queue

    def __iter__(self):
        self.load()
        for item in self.queue:
            yield item

    def lock(self):
        if self.locked: return
        retries = 0
        while True:
            try:
                os.mkdir(self.lockdir)
                self.locked = True
                return
            except OSError:
                retries += 1
                if retries > 100:
                    raise IOError, "data file is locked permanently"
                time.sleep(0.01)
            
    def load(self):
        try:
            mtime = os.path.getmtime(self.path)
        except OSError:
            mtime = self.mtime + 1
        if mtime <= self.mtime:
            return
        ownlock = not(self.locked)
        if ownlock:
            self.lock()
        try:
            f = open(self.path, "r")
            self.config = {}
            self.queue = []
            for line in f:
                line = line.strip()
                if not(line) or line.startswith('#'):
                    continue
                if line.startswith('\\'):
                    parts = line.split()
                    self.setcfg(parts[0][1:], line[len(parts[0]):].split('#', 1)[0], False)
                else:
                    self.queue.append(line)
            f.close()
            try:
                self.mtime = os.path.getmtime(self.path)
            except OSError:
                self.mtime = 0
        finally:
            if ownlock:
                self.release()

    def save(self):
        self.mtime = 0
        ownlock = not(self.locked)
        if ownlock:
            self.lock()
        try:
            data  = "".join(["\\%s %s\n" % (key, str(self.getcfg(key))) for key in self.config])
            data += "".join([item + "\n" for item in self.queue])
            f = open(self.path, "w")
            f.write(data)
            f.close()   
            try:
                self.mtime = os.path.getmtime(self.path)
            except OSError:
                pass
        finally:
            if ownlock:
                self.release()

    def new(self):
        if os.path.exists(self.path):
            return False
        self.config = {}
        self.queue = []
        self.save()
        return True

    def add(self, url):
        self.lock()
        try:
            self.load()
            if not(self.getcfg('allow_non_urls')) and (url.find("://") < 0):
                return "non-URL not allowed"
            if url in self.queue:
                return "URL already in queue"
            self.queue.append(url)
            self.save()
        finally:
            self.release()
        return None

    def pop(self, url=None):
        self.lock()
        try:
            self.load()
            if url:
                try:
                    url = self.queue.pop(self.queue.index(url))
                except ValueError:
                    return url
            elif self.queue:
                url = self.queue.pop(0)
            else:
                return None
            self.save()
        finally:
            self.release()
        return url

    def promote(self, url):
        self.lock()
        try:
            self.load()
            try:
                url = self.queue.pop(self.queue.index(url))
            except ValueError:
                pass
            self.queue.insert(0, url)
            self.save()
        finally:
            self.release()
        return url

    def contains(self, url):
        self.load()
        return (url in self.queue)

    def release(self):
        if self.locked:
            self.cleanup()

    def cleanup(self):
        try:
            os.rmdir(self.lockdir)
        except OSError:
            pass

    def __del__(self):
        self.release()


class CommandLineParser(object):
    def __init__(self, argv=None):
        if argv:
            self.argv = argv[:]
        else:
            self.argv = sys.argv[:]
        self.me = os.path.basename(self.argv.pop(0))

    def errexit(self, msg, code=2):
        print >>sys.stderr, "Error:", msg
        sys.exit(code)

    def helpexit(self, helptext):
        sys.stdout.write(helptext.replace("%prog", self.me))
        sys.exit(0)

    def __nonzero__(self):
        return not(not(self.argv))

    def next(self):
        try:
            return self.argv.pop(0)
        except IndexError:
            self.errexit("unexpected end of command line")


class URLQueueRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
    def do_GET(self):
        global Q
        self.url = urlparse.urlparse(self.path)
        self.qp = parse_qs(self.url.query)
        func = self.url.path[1:].lower()
        if not func:
            func = "index"
        if func == "add":
            if self.expect('url'): return
            warn = Q.add(self.qp['url'])
            if warn:
                return self.redirect(msg = "URL not added: " + warn)
            else:
                return self.redirect(msg = "New URL added.")
        elif func == "promote":
            if self.expect('url'): return
            Q.promote(self.qp['url'])
            return self.redirect()
        elif func in ("rm", "remove", "del", "delete"):
            if self.expect('url'): return
            url = Q.pop(self.qp['url'])
            if 'visit' in self.qp:
                return self.redirect(url)
            else:
                return self.redirect()
        elif func == "next":
            url = Q.pop()
            if url:
                return self.redirect(url)
            else:
                return self.redirect(msg = "There are no URLs in the queue.")
        elif func == "config":
            if self.expect('key', 'value'): return
            Q.setcfg(self.qp['key'],self.qp['value'])
            return self.redirect()
        elif func == "redir":
            if self.expect('url'): return
            return self.redirect(self.qp['url'])
        elif func == "index":
            self.mainpage()
        else:
            self.send_error(404)

    def expect(self, *args):
        for arg in args:
            if not(arg in self.qp):
                self.send_response(400)
                self.end_headers()
                self.wfile.write("<h1>Bad Request</h1>\nExpected parameter '<code>%s</code>' missing." % arg)
                return True

    def redirect(self, url=None, msg=None):
        if not url:
            url = "/index"
            if msg:
                url += "?msg=" + urllib.quote(msg)
        if 'noredir' in self.qp:
            self.send_response(200)
            self.end_headers()
            self.wfile.write("Redirection to <a href=\"%s\">%s</a>.\n" % (url, url))
        else:
            self.send_response(302)
            self.send_header('Location', url)
            self.end_headers()

    def mainpage(self):
        self.send_response(200)
        self.end_headers()
        self.wfile.write('''<!DOCTYPE html>
<html><head>
<title>URL queue</title>
<style type="text/css">
table { border-collapse:collapse; }
td { padding:1px; margin:0; }
#msg { width:100%; padding:4px; border:solid 1px #666; background:#ddd;
       border-radius:3px; }
#cfg .btn a { display:block; font-weight:bold; width:3em; border-radius:3px;
              background:#eee; color:#444; text-decoration:none; text-align:center; }
#cfg .btn a.toggle:hover { background:#888; color:#eee; }
#cfg .btn a.on { background:#2a2; color:#fff; }
#cfg .btn a.off { background:#a22; color:#fff; }
#cfg .desc { margin-left:8px; }
#new, #new td, #new td input { width:100%; }
#new { margin-top:8px; }
#new th { white-space:nowrap; padding-right:8px; }
#urls { width:100%; }
#urls .url { width:100%; white-space:nowrap; overflow:ellipsis; }
#urls tr:hover { background:#eee; }
#urls .btn a { color:#444; text-decoration:none; }
#urls .btn a:hover { background:#666; color:#eee; text-decoration:none; }
</style></head><body>
<h1>URL Queue</h1>
''')
        if 'msg' in self.qp:
            self.wfile.write('<p id="msg">%s</p>\n' % self.qp['msg'])
        self.wfile.write('''<p>Bookmarklets:
[<a href=\'javascript:window.location.href="http://localhost:$port/add?url="+escape(window.location);\'>add to queue</a>]
[<a href=\'http://localhost:$port/next\'>visit next in queue</a>]
[<a href=\'javascript:x=function(){window.open("http://localhost:$port/next","_blank");};x();\'>visit next in new window</a>]
</p>
<table id="cfg">
'''.replace('$port', str(ServerPort)))
        cfg = []
        for key in URLQueueConfig:
            value = Q.getcfg(key)
            try:
                ival = int(value)
            except ValueError:
                ival = -1
            if (ival == 0) or (ival == 1):
                cfg.append((key, ival, URLQueueConfig[key][1]))
        cfg.sort()
        for key, ival, desc in cfg:
            if ival:
                on_class = 'on'
                off_class = 'toggle'
            else:
                on_class = 'toggle'
                off_class = 'off'
            self.wfile.write('<tr>\n<td class="btn"><a class="%s" href="/config?key=%s&value=1">ON</a></td>\n' % (on_class, key))
            self.wfile.write('<td class="btn"><a class="%s" href="/config?key=%s&value=0">OFF</a></td>\n' % (off_class, key))
            self.wfile.write('<td class="desc">%s</td>\n<tr>' % desc)
        self.wfile.write('''</table>
<form action="/add" method="get"><table id="new"><tr>
<th>Add new URL:</th>
<td><input type="text" name="url"></td>
</tr></table></form>
<p>%d URL(s) queued.</p>
<table id="urls">''' % len(Q.queue))
        autoremove = Q.getcfg('web_remove_on_click')
        extras = ""
        if Q.getcfg('web_href_new_window'):
            extras += ' target="_blank"'
        if autoremove:
            extras += ' onClick="window.location.reload();"'
        for url in Q.queue:
            qurl = urllib.quote(url)
            if autoremove:
                href = "/rm?visit=1&url=" + qurl
            else:
                href = url
            self.wfile.write('<tr>\n<td class="url"><a href="%s"%s>%s</a></td>\n' % (href, extras, url))
            self.wfile.write('<td class="btn">[<a href="/promote?url=%s">promote</a>]</td>' % qurl)
            self.wfile.write('<td class="btn">[<a href="/delete?url=%s">delete</a>]</td>\n</tr>' % qurl)
        self.wfile.write('</table></body></html>\n')

    def log_message(self, format, *args):
        pass

if __name__ == "__main__":
    Q = URLQueue(".urlqueue")
    ServerPort = None
    cmdline = CommandLineParser()
    if not cmdline:
        print >>sys.stderr, "No commands specified, doing nothing."
        print >>sys.stderr, "Use '%s -h' to get help." % cmdline.me
        sys.exit(2)
    try:
        while cmdline:
            opt = cmdline.next()
            if opt.startswith("--"):
                opt = opt[1:]
            if opt in ("-h", "-help"):
                cmdline.helpexit(__help__)
            elif opt in ("-f", "-file"):
                Q = URLQueue(cmdline.next())
            elif opt in ("-p", "-port"):
                try:
                    ServerPort = int(cmdline.next())
                except ValueError:
                    cmdline.errexit("integer argument expected")
            elif opt in ("-serve", "-server"):
                if cmdline:
                    print >>sys.stderr, "Warning: all command-line options following '-server' are ignored"
                Q.load()
                if not ServerPort:
                    ServerPort = Q.getcfg('web_default_port')
                print >>sys.stderr, "Starting Web Interface on http://localhost:%d/ ..." % ServerPort
                try:
                    BaseHTTPServer.HTTPServer(('localhost', ServerPort), URLQueueRequestHandler).serve_forever()
                except KeyboardInterrupt:
                    pass
                sys.exit(0)
            elif opt == "-new":
                if not Q.new():
                    print >>sys.stderr, "Error: queue file '%s' already exists" % Q.path
                    sys.exit(1)
            elif opt == "-show":
                Q.load()
                for url in Q.queue:
                    print url
            elif opt in ("-showconfig", "-showcfg"):
                Q.load()
                cfg = []
                for key in URLQueueConfig:
                    cfg.append((key, str(Q.getcfg(key)), URLQueueConfig[key][1]))
                cfg.sort()
                maxkey = max([len(x[0]) for x in cfg])
                maxval = max([len(x[1]) for x in cfg])
                for key, value, comment in cfg:
                    print key.ljust(maxkey), "=", value.ljust(maxval), " #", comment
            elif opt in ("-c", "-cfg", "-config"):
                try:
                    key, value = cmdline.next().split('=', 1)
                except ValueError:
                    cmdline.errexit("key=value format expected")
                if not Q.setcfg(key, value):
                    print >>sys.stderr, "Warning: unknown configuration option '%s' ignored" % key
            elif opt == "-cleanup":
                Q.cleanup()
            elif opt in ("-rm", "-remove", "-del", "-delete"):
                Q.pop(cmdline.next())
            elif opt in ("-n", "-next"):
                url = Q.pop()
                if url:
                    print url
                else:
                    print >>sys.stderr, "No URL in queue."
            elif opt == "-promote":
                Q.promote(cmdline.next())
            elif opt.startswith('-'):
                cmdline.errexit("unknown option '%s'" % opt)
            else:
                warn = Q.add(opt)
                if warn:
                    print >>sys.stderr, "Warning: URL '%s' ignored: %s" % (opt, warn)
    except IOError, e:
        if e.errno == 2:
            print >>sys.stderr, "Error: queue file '%s' does not exist, need to use the '-new' option first" % Q.path
        else:
            print >>sys.stderr, "Error:", e
        sys.exit(1)
