#!/bin/python
# -*- coding: UTF-8 -*-
"""Item cache.

Between runs of Planet we need somewhere to store the feed information
we parsed, this is so we don't lose information when a particular feed
goes away or is too short to hold enough items.

This module provides the code to handle this cache transparently enough
that the rest of the code can take the persistance for granted.
"""

import os
import re


# Regular expressions to sanitise cache filenames
re_url_scheme    = re.compile(r'^[^:]*://')
re_slash         = re.compile(r'[?/]+')
re_initial_cruft = re.compile(r'^[,.]*')
re_final_cruft   = re.compile(r'[,.]*$')


class CachedInfo:
    """Cached information.

    This class is designed to hold information that is stored in a cache
    between instances.  It can act both as a dictionary (c['foo']) and
    as an object (c.foo) to get and set values and supports both string
    and date values.

    If you wish to support special fields you can derive a class off this
    and implement get_FIELD and set_FIELD functions which will be
    automatically called.
    """
    STRING = "string"
    DATE   = "date"
    NULL   = "null"

    def __init__(self, cache, id_, root=0):
        self._type = {}
        self._value = {}
        self._cached = {}

        self._cache = cache
        self._id = id_.replace(" ", "%20")
        self._root = root

    def cache_key(self, key):
        """Return the cache key name for the given key."""
        key = key.replace(" ", "_")
        if self._root:
            return key
        else:
            return self._id + " " + key

    def cache_read(self):
        """Read information from the cache."""
        if self._root:
            keys_key = " keys"
        else:
            keys_key = self._id

        if self._cache.has_key(keys_key):
            keys = self._cache[keys_key].split(" ")
        else:
            return

        for key in keys:
            cache_key = self.cache_key(key)
            if not self._cached.has_key(key) or self._cached[key]:
                # Key either hasn't been loaded, or is one for the cache
                self._value[key] = self._cache[cache_key]
                self._type[key] = self._cache[cache_key + " type"]
                self._cached[key] = 1

    def cache_write(self, sync=1):
        """Write information to the cache."""
        self.cache_clear(sync=0)

        keys = []
        for key in self.keys():
            cache_key = self.cache_key(key)
            if not self._cached[key]:
                if self._cache.has_key(cache_key):
                    # Non-cached keys need to be cleared
                    del(self._cache[cache_key])
                    del(self._cache[cache_key + " type"])
                continue

            keys.append(key)
            self._cache[cache_key] = self._value[key]
            self._cache[cache_key + " type"] = self._type[key]

        if self._root:
            keys_key = " keys"
        else:
            keys_key = self._id

        self._cache[keys_key] = " ".join(keys)
        if sync:
            self._cache.sync()

    def cache_clear(self, sync=1):
        """Remove information from the cache."""
        if self._root:
            keys_key = " keys"
        else:
            keys_key = self._id

        if self._cache.has_key(keys_key):
            keys = self._cache[keys_key].split(" ")
            del(self._cache[keys_key])
        else:
            return

        for key in keys:
            cache_key = self.cache_key(key)
            del(self._cache[cache_key])
            del(self._cache[cache_key + " type"])

        if sync:
            self._cache.sync()

    def has_key(self, key):
        """Check whether the key exists."""
        key = key.replace(" ", "_")
        return self._value.has_key(key)

    def key_type(self, key):
        """Return the key type."""
        key = key.replace(" ", "_")
        return self._type[key]

    def set(self, key, value, cached=1):
        """Set the value of the given key.

        If a set_KEY function exists that is called otherwise the
        string function is called and the date function if that fails
        (it nearly always will).
        """
        key = key.replace(" ", "_")

        try:
            func = getattr(self, "set_" + key)
        except AttributeError:
            pass
        else:
            return func(key, value)

        if value == None:
            return self.set_as_null(key, value)
        else:
            try:
                return self.set_as_string(key, value)
            except TypeError:
                return self.set_as_date(key, value)

    def get(self, key):
        """Return the value of the given key.

        If a get_KEY function exists that is called otherwise the
        correctly typed function is called if that exists.
        """
        key = key.replace(" ", "_")

        try:
            func = getattr(self, "get_" + key)
        except AttributeError:
            pass
        else:
            return func(key)

        try:
            func = getattr(self, "get_as_" + self._type[key])
        except AttributeError:
            pass
        else:
            return func(key)

        return self._value[key]

    def set_as_string(self, key, value, cached=1):
        """Set the key to the string value.

        The value is converted to UTF-8 if it is a Unicode string, otherwise
        it's assumed to have failed decoding (feedparser tries pretty hard)
        so has all non-ASCII characters stripped.
        """
        value = utf8(value)

        key = key.replace(" ", "_")
        self._value[key] = value
        self._type[key] = self.STRING
        self._cached[key] = cached

    def get_as_string(self, key):
        """Return the key as a string value."""
        key = key.replace(" ", "_")
        if not self.has_key(key):
            raise KeyError, key

        return self._value[key]

    def set_as_date(self, key, value, cached=1):
        """Set the key to the date value.

        The date should be a 9-item tuple as returned by time.gmtime().
        """
        value = " ".join([ str(s) for s in value ])

        key = key.replace(" ", "_")
        self._value[key] = value
        self._type[key] = self.DATE
        self._cached[key] = cached

    def get_as_date(self, key):
        """Return the key as a date value."""
        key = key.replace(" ", "_")
        if not self.has_key(key):
            raise KeyError, key

        value = self._value[key]
        return tuple([ int(i) for i in value.split(" ") ])

    def set_as_null(self, key, value, cached=1):
        """Set the key to the null value.

        This only exists to make things less magic.
        """
        key = key.replace(" ", "_")
        self._value[key] = ""
        self._type[key] = self.NULL
        self._cached[key] = cached

    def get_as_null(self, key):
        """Return the key as the null value."""
        key = key.replace(" ", "_")
        if not self.has_key(key):
            raise KeyError, key

        return None

    def del_key(self, key):
        """Delete the given key."""
        key = key.replace(" ", "_")
        if not self.has_key(key):
            raise KeyError, key

        del(self._value[key])
        del(self._type[key])
        del(self._cached[key])

    def keys(self):
        """Return the list of cached keys."""
        return self._value.keys()

    def __iter__(self):
        """Iterate the cached keys."""
        return iter(self._value.keys())

    # Special methods
    __contains__ = has_key
    __setitem__  = set_as_string
    __getitem__  = get
    __delitem__  = del_key
    __delattr__  = del_key

    def __setattr__(self, key, value):
        if key.startswith("_"):
            self.__dict__[key] = value
        else:
            self.set(key, value)

    def __getattr__(self, key):
        if self.has_key(key):
            return self.get(key)
        else:
            raise AttributeError, key


def filename(directory, filename):
    """Return a filename suitable for the cache.

    Strips dangerous and common characters to create a filename we
    can use to store the cache in.
    """
    filename = re_url_scheme.sub("", filename)
    filename = re_slash.sub(",", filename)
    filename = re_initial_cruft.sub("", filename)
    filename = re_final_cruft.sub("", filename)

    return os.path.join(directory, filename)

def utf8(value):
    """Return the value as a UTF-8 string."""
    if type(value) == type(u''):
        return value.encode("utf-8")
    else:
        try:
            return unicode(value, "utf-8").encode("utf-8")
        except UnicodeError:
            try:
                return unicode(value, "iso-8859-1").encode("utf-8")
            except UnicodeError:
                return unicode(value, "ascii", "replace").encode("utf-8")
