#! /usr/bin/env python
import os
import string

class error(Exception):
    pass

def unquote(s):
    """unquote('abc%20def') -> 'abc def'."""
    mychr = chr
    myatoi = int
    list = s.split('%')
    res = [list[0]]
    myappend = res.append
    del list[0]
    for item in list:
        if item[1:2]:
            try:
                myappend(mychr(myatoi(item[:2], 16))
                     + item[2:])
            except ValueError:
                myappend('%' + item)
        else:
            myappend('%' + item)
    return "".join(res)

def quote(s, safe):
    """quote('abc def') -> 'abc%20def'."""
    res = list(s)
    for i in range(len(res)):
        c = res[i]
        if c not in safe:
            res[i] = '%%%02X' % ord(c)
    return ''.join(res)

class FileDBM:
    """File Database class.

    This stores strings as files in a directory.

    Note, no locking is done.  It would be wise to make sure there is
    only one writer at any given time.

    """

    safe = string.letters + string.digits + ',!@#$^()-_+='

    def __init__(self, base, mode='r'):
        self.base = os.path.abspath(base)
        if mode in ('r', 'w'):
            if not os.path.isdir(base):
                raise error("need 'c' or 'n' flag to open new db")
            if mode == 'r':
                self.writable = True
            else:
                self.writable = False
        elif mode == 'c':
            if not os.path.isdir(base):
                os.mkdir(base)
            self.writable = True
        elif mode == 'n':
            if os.path.isdir(base):
                os.removedirs(base)
            os.mkdir(base)
            self.writable = True
        else:
            raise error("flags should be one of 'r', 'w', 'c', or 'n'")

    def key2path(self, key):
        """Transform key to a pathname.

        By default this does URL quoting on safe characters.
        Be sure to provide a path2key method if you override this.

        """

        return os.path.join(self.base,
                            quote(key, self.safe))

    def path2key(self, path):
        """Transform a pathname to a key."""

        if not path.startswith(self.base):
            raise error("Not a valid path")
        key = path[len(self.base) + 1:] # +1 gets the /
        if os.path.sep in key:
            raise error("Not a valid path")
        return unquote(key)

    def __len__(self):
        count = 0
        for i in self.iterkeys():
            count += 1
        return count

    def __getitem__(self, key):
        if not (type(key) == type('')):
            raise TypeError("keys must be strings")
        path = self.key2path(key)
        try:
            return file(path).read()
        except IOError:
            raise KeyError

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    def __setitem__(self, key, val):
        if not (type(key) == type(val) == type('')):
            raise TypeError("keys and values must be strings")
        path = self.key2path(key)
        file(path, 'w').write(val)

    def setdefault(self, key, default):
        try:
            return self[key]
        except KeyError:
            self[key] = default
            return default

    def __delitem__(self, key):
        path = self.key2path(key)
        try:
            os.remove(path)
        except OSError:
            raise KeyError()

    def __contains__(self, value):
        # This could be a lot slower than the user would expect.  If you
        # need it, use has_value.  Of course, you could make a derived
        # class that sets __contains__ = has_value
        raise error("You didn't really want to do this.")

    def has_key(self, key):
        return os.path.exists(self.key2path(key))

    def has_value(self, value):
        for val in self.itervalues():
            if val == value:
                return True
        return False

    def iterkeys(self):
        for root, dirs, files in os.walk(self.base):
            for f in files:
                path = os.path.join(root, f)
                try:
                    yield self.path2key(path)
                except error:
                    pass

    def __iter__(self):
        return self.iterkeys()

    def itervalues(self):
        for key, val in self.itervalues():
            yield val

    def iteritems(self):
        for k in self.iterkeys():
            yield (k, self[k])

    def keys(self):
        keys = []
        for k in self.iterkeys():
            keys.append(k)
        return keys

    def items(self):
        items = []
        for i in self.iteritems():
            items.append(i)
        return items

    def values(self):
        values = []
        for v in self.itervalues():
            values.append(v)
        return values


class LongFileDBM(FileDBM):
    """A file database supporting any-length keys.

    It does this by splitting keys up into directories.

    """

    # A special string to append to directories, so that no file will
    # ever have the same path as a directory
    dirsuffix = '%%'

    # In the worst case, quote makes the string 3x bigger.
    # So any key longer than 80 characters gets split up.  This
    # gives us plenty of room with a 255-character filename limit,
    # which seems to be the minimum limit on any OS these days.
    dirlen = 80

    def split(self, key):
        """Split a key into its path components.

        Each component in the list returned will be a directory.  Called
        before quoting parts.

        This is probably what you want to override.  You may need to do
        join() too.

        """

        parts = []
        while key:
            parts.append(key[:self.dirlen])
            key = key[self.dirlen:]
        return parts

    def join(self, parts):
        """Join directory parts into a single string.

        This is called after unquoting parts.

        """
        return ''.join(parts)

    def key2path(self, key, makedirs=False):
        parts = self.split(key)
        path = self.base

        for part in parts[:-1]:
            # Escape the part
            d = quote(part, self.safe)

            # Append a safe string so no shorter key can have this
            # path
            d = d + self.dirsuffix

            # Stick it on the end
            path = os.path.join(path, d)

            # Make directory if requested
            if makedirs and not os.path.isdir(path):
                os.mkdir(path)

        # Now we can add the filename
        path = os.path.join(path, quote(parts[-1], self.safe))

        return path

    def path2key(self, path):
        """Transform a pathname to a key."""

        if not path.startswith(self.base):
            raise error("Not a valid path")
        key = ""
        parts = path[len(self.base) + 1:].split(os.path.sep)
        parts_ = []
        for p in parts:
            # Strip the special string
            if p.endswith(self.dirsuffix):
                p = p[:-len(self.dirsuffix)]
            parts_.append(unquote(p))

        key = self.join(parts_)
        return key

    def __setitem__(self, key, val):
        if not self.writable:
            raise IOError('database was not opened writable')
        if not (type(key) == type(val) == type('')):
            raise TypeError("keys and values must be strings")
        path = self.key2path(key, True)
        file(path, 'w').write(val)

    def __delitem__(self, key):
        path = self.key2path(key)
        try:
            os.remove(path)
        except OSError:
            raise KeyError()

        # Now try to clean up any directories
        while True:
            path = os.path.dirname(path)
            if len(path) <= len(self.base):
                break
            try:
                os.rmdir(path)
            except OSError:
                # Guess it's not empty
                break

    def iterkeys(self):
        for root, dirs, files in os.walk(self.base):
            for f in files:
                path = os.path.join(root, f)
                try:
                    yield self.path2key(path)
                except error:
                    pass

class WordFileDBM(LongFileDBM):
    """A layout using the first word as the top-level directory.

    I use this in my firebot, but it's included here more as an example
    of how you could extend LongFileDBM.

    """

    # I like having spaces in my filenames
    safe = LongFileDBM.safe + ' '

    def split(self, key):
        # Three cases:
        #
        # 1. no_spaces,_short
        # 2. one/one or more spaces
        # 3. _long/really_really_really_really_..._long
        #
        # This means that keys beginning with "_long " will be filed
        # with long keys.
        #
        # In any case, the first directory, if any, can be stripped
        # completely.

        split = LongFileDBM.split(self, key)

        # Split up into words
        parts = key.split(' ', 1)
        if len(parts) == 1 and len(split) == 1:
            # No spaces
            return split
        elif len(parts[0]) <= self.dirlen:
            # >= 2 words, first word <= dirlen chars
            return [parts[0]] + split
        else:
            return ['_long'] + split

    def join(self, parts):
        # Two cases:
        #
        # ["one_part"]
        # ["more", "more than one part"]

        if len(parts) == 1:
            return parts[0]
        else:
            return LongFileDBM.join(self, parts[1:])

open = LongFileDBM

if __name__ == '__main__':
    def asserteq(a, b):
        assert a == b, "%s != %s" % (`a`, `b`)

    f = LongFileDBM('/tmp/db', 'n')
    asserteq(f.key2path('this is a thing'), '/tmp/db/this%20is%20a%20thing')
    asserteq(f.key2path('1234567890' * 8), '/tmp/db/12345678901234567890123456789012345678901234567890123456789012345678901234567890')
    asserteq(f.key2path('1234567890' * 20), '/tmp/db/12345678901234567890123456789012345678901234567890123456789012345678901234567890%%/12345678901234567890123456789012345678901234567890123456789012345678901234567890%%/1234567890123456789012345678901234567890')

    f = WordFileDBM('/tmp/db', 'n')
    asserteq(f.path2key(f.key2path('this is a thing')), 'this is a thing')
    asserteq(f.path2key(f.key2path('1234567890' * 8)), '1234567890' * 8)
    asserteq(f.path2key(f.key2path('1234567890' * 20)), '1234567890' * 20)

    asserteq(f.get('grape'), None)
    asserteq(f.setdefault('grape', 'red'), 'red')
    asserteq(f.get('grape'), 'red')
    asserteq(f.setdefault('grape', 'green'), 'red')

    longstr = '1234567890' * 10
    f[longstr] = '1'
    asserteq(f[longstr], '1')

    asserteq(f.keys(), ['grape', longstr])

    del f['grape']
    del f[longstr]
    asserteq(f.keys(), [])