#!/usr/bin/env python
# -*- coding: utf-8 -*-

#########################################################################
#    Copyright (C) 2010, 2011 Sergio Villar Senin <svillar@igalia.com>
#
#    This file is part of ReSiStance
#
#    ReSiStance is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    ReSiStance is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with ReSiStance.  If not, see <http://www.gnu.org/licenses/>.
#########################################################################

import base64
import constants
import cPickle
import feedparser
import gobject
import gtk
import os
import urllib
import urllib2
import urlparse
import xmlrpclib

from settings import Settings
from sgmllib import SGMLParser
from threading import Thread
from xml.dom import minidom
from xml.dom.minidom import Document

# http://diveintomark.org/archives/2002/05/31/rss_autodiscovery_in_python
def getRSSLink(url):
    BUFFERSIZE = 1024

    try:
        usock = urllib.urlopen(url)
        parser = LinkParser()
        while 1:
            buffer = usock.read(BUFFERSIZE)
            parser.feed(buffer)
            if parser.nomoretags: break
            if len(buffer) < BUFFERSIZE: break
        usock.close()
        return urlparse.urljoin(url, parser.href)
    except IOError:
        print 'Could not establish a connection to ' + url
        return ''


class LinkParser(SGMLParser):
    def reset(self):
        SGMLParser.reset(self)
        self.href = ''

    def do_link(self, attrs):
        if not ('rel', 'alternate') in attrs:
            return
        if (not ('type', 'application/rss+xml') in attrs) and \
                (not ('type', 'application/atom+xml') in attrs):
            return
        hreflist = [e[1] for e in attrs if e[0]=='href']
        if hreflist:
            self.href = hreflist[0]
        self.setnomoretags()

    def end_head(self, attrs):
        self.setnomoretags()
    start_body = end_head

class ReSiStanceFeedDict(feedparser.FeedParserDict):

    def __init__(self, feed_data, sync=None):
        super(ReSiStanceFeedDict, self).__init__()

        self.update(feed_data)

        if not 'visits' in self:
            self['visits'] = 0

        # Initialize data
        for entry in self.entries:
            if not 'read' in entry:
                entry['read'] = False

        if not 'sync' in self:
            self['sync'] = sync if sync != None else False

class FeedManager(gobject.GObject):

    def __init__(self, settings, conn_manager):
        super(FeedManager, self).__init__()

        self.feed_data_list = []
        self.settings = settings
        self._conn_manager = conn_manager
        self._conn_manager.connect('connection-changed', self._on_connection_changed)
        self._google_reader_auth = None
        self._google_reader_auth_token = None

    def _on_connection_changed(self, conn_manager):
        pass

    def get_feed_list(self):
        return self.feed_data_list

    def _safe_callback(self, callback, *args):
        ''' Calls a callback if exists using gtk guards '''
        if callback:
            gtk.gdk.threads_enter()
            callback(*args)
            gtk.gdk.threads_leave()

    def add_feed(self, url, sync, save, callback, data=None):

        # Create a new thread to update from network
        adding_thread = Thread(target=self._add_feed_in_thread,
                               args=(url, sync, save, callback, data))
        adding_thread.start()

    def update_feed(self, feed, callback, data=None):

        # Create a new thread to update from network
        updating_thread = Thread(target=self._update_feeds_in_thread,
                                 args=([feed], callback, data))
        updating_thread.start()

    def update_all(self, callback, data):

        # Create a new thread to update from network
        updating_thread = Thread(target=self._update_feeds_in_thread,
                                 args=(self.feed_data_list, callback, data))
        updating_thread.start()

    def remove_feed(self, feed):
        self.feed_data_list.remove(feed)

        # Save to disk
        try:
            self.save(None)
        except IOError:
            pass

    def export_opml(self, file_path, callback, data=None):

        # Create a new thread to update from network
        exporting_thread = Thread(target=self._export_opml_in_thread,
                               args=(file_path, callback, data))
        try:
            exporting_thread.start()
        except IOError:
            callback(False)

    def _export_opml_in_thread(self, file_path, callback, user_data):
        # Create the minidom document
        opml_doc = Document()

        # Add tags and create the body of the document
        root = opml_doc.createElement('opml')
        root.setAttribute('version','1.0')
        root.appendChild(opml_doc.createElement('head'))
        body = opml_doc.createElement('body')
        for feed_data in self.feed_data_list:
            outline = opml_doc.createElement('outline')
            outline.setAttribute('title',feed_data.feed.title)
            if 'subtitle' in feed_data.feed:
                 outline.setAttribute('text',feed_data.feed.subtitle)
            outline.setAttribute('xmlUrl',feed_data['href'])
            body.appendChild(outline)

        root.appendChild(body)
        opml_doc.appendChild(root)

        try:
            retval = True
            file_opml = open(file_path,"w")
            opml_doc.writexml(file_opml, "    ", "", "\n", "UTF-8")
        except IOError :
            print 'Error exporting OPML, could not write to ' + file_path
            retval = False

        self._safe_callback(callback, retval)

    def import_opml(self, file_path, callback, data=None):

        # Create a new thread to update from network
        importing_thread = Thread(target=self._import_opml_in_thread,
                               args=(file_path, callback, data))
        try:
            importing_thread.start()
        except IOError:
            callback(None, None)

    def _import_opml_in_thread(self, file_path, callback, user_data):

        feed_url_list = [];

        if (not (os.path.exists(file_path) and os.path.isfile(file_path)) or
            (os.path.splitext(file_path)[1] != '.opml')):
            self._safe_callback(callback, feed_url_list, user_data)
            return

        try:
            doc=open(file_path,'r')
            opml_doc = minidom.parse(doc)
            opml_doc.getElementsByTagName('outline')

            feed_url_list = [node.attributes['xmlUrl'].value \
                             for node in opml_doc.getElementsByTagName('outline') \
                             if node.getAttribute('xmlUrl') != '']
        except IOError :
            self._safe_callback(callback, feed_url_list, user_data)
            return

        doc.close()
        self._safe_callback(callback, feed_url_list, user_data)

    def find_feed(self, key_words, dialog, callback):
        # Create a new thread to search
        find_thread = Thread(target=self._find_feed_in_thread,
                               args=(key_words, dialog, callback))

        return find_thread.start()

    def _find_feed_in_thread(self, key_words, dialog, callback):
        try:
            server = xmlrpclib.Server('http://www.syndic8.com/xmlrpc.php')
            feedids = server.syndic8.FindFeeds(key_words,'last_pubdate',25,0)
            infolist = server.syndic8.GetFeedInfo(feedids, ['imageurl','sitename','dataurl'])
        except:
            infolist = None
            print 'Error while accessing syndic8.com'

        self._safe_callback(callback, key_words, dialog, infolist)

    def _authenticate_google_reader(self):
        ''' Returns a pair of authentication headers and the authentication token '''
        authenticated = True
        if self._google_reader_auth == None:
            try:
                params = urllib.urlencode({"service": "reader",
                                           "Email": self.settings.user,
                                           "Passwd": self.settings.password})
                url = constants.URL_LOGIN
                content = urllib2.urlopen(url,params).read()
            except:
                self._google_reader_auth = None
                self._google_reader_auth_token = None
                return None, None

            pos_begin = content.find('Auth=')
            pos_end = content.find('\n', pos_begin)
            self._google_reader_auth = content[pos_begin+len('Auth='):pos_end]

        auth_headers = { 'Authorization' : 'GoogleLogin auth=' + self._google_reader_auth }
        if self._google_reader_auth_token == None:
            try:
                # Get auth token
                token_request = urllib2.Request(constants.URL_TOKEN, headers = auth_headers)
                token_response = urllib2.urlopen(token_request)
                self._google_reader_auth_token = token_response.read()
            except:
                self._google_reader_auth = None
                self._google_reader_auth_token = None

        return auth_headers, self._google_reader_auth_token

    def mark_as_read_synchronize(self, feed_data_base, item_ref, read_item=True, callback=None, data=None):
        # Create a new thread to synchronize
        synchronize_read_thread = Thread(target=self._mark_as_read_synchronize_in_thread,
                                         args=(feed_data_base, item_ref, read_item, callback, data))

        return synchronize_read_thread.start()

    def _mark_as_read_synchronize_in_thread(self, feed_data_base, item_ref, read_item, callback, user_data):
        synced = False
        read_mark = "a" if read_item else "r"

        auth_headers, auth_token = self._authenticate_google_reader()
        if not auth_token:
            self._safe_callback(callback, synced, user_data)
            return

        try:
            # Get feed
            feed_request = constants.URL_FEED+feed_data_base
            req = urllib2.Request(feed_request, headers = auth_headers)
            feed_response = urllib2.urlopen(req)
        except:
            self._safe_callback(callback, synced, user_data)
            return

        response_data=feed_response.read()

        while not synced:
            try:
                doc = minidom.parseString(response_data)
            except:
                self._safe_callback(callback, synced, user_data)
                return
            #retrieve following 20 items
            nodes = doc.getElementsByTagName('gr:continuation')
            nodesEntries = doc.getElementsByTagName('entry')
            if nodes==[]:
                break
            try:
                for node in nodesEntries:
                    if node.getElementsByTagName('link')[0].attributes['href'].value == item_ref:
                        # Do the actual request for sync'ing the read status
                        itemId = node.getElementsByTagName('id')[0].firstChild.data
                        postparams = urllib.urlencode({read_mark: "user/-/state/com.google/read", "async" : "true", "ac" : "edit", "s" : "feed/"+feed_data_base, "i" : itemId, "T" : auth_token})
                        req = urllib2.Request(constants.URL_EDIT, postparams, auth_headers)
                        response = urllib2.urlopen(req)
                        synced = True
                        break
                if not synced:
                    # Get the next 20 items
                    continuation = nodes[0].firstChild.data
                    req = urllib2.Request(feed_request+'?c='+continuation, headers = auth_headers)
                    r = urllib2.urlopen(req)
                    response_data = feed_response.read()
            except:
                self._safe_callback(callback, synced, user_data)
                return

        # Call user callback
        self._safe_callback(callback, synced, user_data)

    def download_item(self, url, path_file, callback, data=None):
        # Create a new thread to download from network
        downloading_thread = Thread(target=self._download_items_in_thread,
                               args=([url], [path_file], callback, data))
        try:
            downloading_thread.start()
        except IOError:
            callback(False)

    def download_all_items(self, urls, paths_files, callback, data=None):
        # Create a new thread to download items from network
        downloading_all_thread = Thread(target=self._download_items_in_thread,
                               args=(urls, paths_files, callback, data))
        try:
            downloading_all_thread.start()
        except IOError:
            callback(False)

    def _download_items_in_thread(self, urls, paths_files, callback, data):
        path_file = iter(paths_files)
        retvalues = []
        for url in urls:
            try:
                opener = urllib.FancyURLopener
                urlretrieve = opener().retrieve
                path = path_file.next()
                f = urlretrieve(url, path)
            except IOError :
                retvalues.append(False)
                break

        self._safe_callback(callback, False not in retvalues)

    def sync_with_google_reader(self, callback, data=None):
        # Create a new thread to search
        sync_with_google_reader_thread = Thread(target=self._sync_with_google_reader_in_thread,
                                                args=(callback, data))

        return sync_with_google_reader_thread.start()

    def _sync_with_google_reader_in_thread(self, callback, user_data):

        auth_headers, auth_token = self._authenticate_google_reader()
        if not auth_token:
            self._safe_callback(callback, None, user_data)
            return

        urls = []
        try:
            # Get subscriptions
            subs_request = urllib2.Request(constants.URL_SUBSCRIPTION_LIST, headers = auth_headers)
            subs_response = urllib2.urlopen(subs_request)

            doc = minidom.parse(subs_response)
            # The xml looks like this
            # <object>
            #    <list name="subscriptions">
            #       <object><string name="id">feed/http://somefeed.com</string>....</object>
            #       <object><string name="id">feed/http://someotherfeed.com</string>....</object>
            #    </list>
            # <object>
            nodes = doc.documentElement.childNodes[0].childNodes
            for node in nodes:
                feed_url = node.firstChild.firstChild.data
                # Google allows also some other kind of
                # subscriptions. Skip them for the moment. I saw for
                # example feeds like
                # <string name="id">webfeed/someidreturnedbygoogle</string>
                # <string name="id">user/someuserid/label/SomeLabel</string>
                if feed_url.startswith('feed/'):
                    print 'Importing: ' + feed_url[5:]
                    urls.append(feed_url[5:])
        except:
            pass

        self._safe_callback(callback, urls, user_data)

    def save(self, callback=None, data=None):
        # TODO: migrate to "with" statement when available
        try:
            db_file = open(constants.RSS_DB_FILE, 'w')
        except IOError:
            print 'Cannot write to', constants.RSS_DB_FILE
            raise
        else:
            # Create a new thread to store in disk
            saving_thread = Thread(target=self._save_in_thread,
                                   args=(db_file, callback, data))
            saving_thread.start()

    def _save_in_thread(self, db_file, callback, data):
        try:
            cPickle.dump(self.feed_data_list, db_file)
        except cPickle.UnpickleableError:
            print 'Cannot serialize to', constants.RSS_DB_FILE
            raise

        self._safe_callback(callback, data)

    def load(self, callback, data=None):
        # TODO: migrate to "with" statement when available
        try:
            db_file = open(constants.RSS_DB_FILE, 'r')
        except IOError:
            print 'Cannot open', constants.RSS_DB_FILE
            raise
        else:
            # Create a new thread to load from disk
            loading_thread = Thread(target=self._load_in_thread,
                                   args=(db_file, callback, data))
            loading_thread.start()

    def _load_in_thread(self, db_file, callback, data):
        feed_data_list = cPickle.load(db_file)
        self.feed_data_list = [ReSiStanceFeedDict(feed_data) for feed_data in feed_data_list]

        self._safe_callback(callback, data)

    def get_favicon(self, url, callback, data=None):
        get_favicon_thread = Thread(target=self._get_favicon_in_thread,
                                    args=(url, callback, data))
        get_favicon_thread.start()

    def _get_favicon_in_thread(self, url, callback, data):
        favicon = self._get_favicon_sync(url)

        self._safe_callback(callback, favicon, data)

    def _get_favicon_sync(self, url):
        # Check that user dir exists
        user_path = os.path.join (constants.RSS_CONF_FOLDER, 'icons')

        if os.path.exists(user_path) == False:
            os.makedirs(user_path, 0700)

        file_name = os.path.join (user_path, base64.b64encode(url) + '.favicon.ico')
        if os.path.exists(file_name) == False:
            parsed_url = urlparse.urlsplit(url)
            try:
                localfile, headers = urllib.urlretrieve(parsed_url.scheme + '://' +
                                                        parsed_url.netloc + '/favicon.ico',
                                                        file_name)
            except:
                return gtk.gdk.pixbuf_new_from_file(constants.DEFAULT_FAVICON)

            # Try with a more general address. If we got a text/html then we most
            # likely requested an invalid address. It's better this than to check
            # for something like "image/" because I noticed that some servers return
            # icons with funny content types like text/plain. No comment
            if headers['Content-type'].startswith('text/html') == True:
                domains = parsed_url.netloc.rsplit('.',2)
                # Do not retry if domains == 2 because it will be
                # the same address we tried before
                if len(domains) > 2:
                    try:
                        localfile, headers = urllib.urlretrieve(parsed_url.scheme + '://' +
                                                                domains[-2] + '.' + domains[-1] +
                                                                '/favicon.ico', file_name)
                    except:
                        return gtk.gdk.pixbuf_new_from_file(constants.DEFAULT_FAVICON)

                if headers['Content-type'].startswith('image/') == False:
                    os.remove(localfile)
        try:
            pixbuf = gtk.gdk.pixbuf_new_from_file(file_name)
        except:
            return gtk.gdk.pixbuf_new_from_file(constants.DEFAULT_FAVICON)

        # Scale pixbuf. TODO: do not use hard-coded values
        if (pixbuf.get_width() != 32):
            pixbuf = pixbuf.scale_simple(32,32,gtk.gdk.INTERP_BILINEAR)
            pixbuf.save(file_name, 'png')

        return pixbuf

    def subscribe_feed_google(self, feed_data, feed_url, is_add_subcription, callback=None, user_data=None):
        # Create a new thread to synchronize(subscribe and unsubscribe)
        subscribe_feed_google_thread = Thread(target=self._subscribe_feed_google_in_thread,
                                              args=(feed_data, feed_url, is_add_subcription, callback, user_data))

        return subscribe_feed_google_thread.start()

    def _subscribe_feed_google_in_thread(self, feed_data, feed_url, is_add_subcription, callback, data):
        synced = False
        feed_data.sync = is_add_subcription

        auth_headers, auth_token = self._authenticate_google_reader()
        if not auth_token:
            self._safe_callback(callback, synced, user_data)
            return

        action = 'subscribe' if is_add_subcription else 'unsubscribe'
        try:
            # Edit subscriptions
            postparams = urllib.urlencode({"s":"feed/"+feed_url, "ac": action, "T": auth_token})
            edit_request = urllib2.Request(constants.URL_SUBSCRIPTION_EDIT, postparams, auth_headers)
            urllib2.urlopen(edit_request)
        except:
            pass
        else:
            synced = True

        self._safe_callback(callback, synced, data)

    def sync_google_reader_read_status(self, callback=None, user_data=None):
        ''' Synchronize the read/unread status of all (mandatory by
        Google Reader API) subscribed feeds with Google Reader '''
        sync_read_status_thread = Thread(target=self._sync_google_reader_read_status_in_thread,
                                         args=(callback, user_data))

        return sync_read_status_thread.start()

    def _sync_google_reader_read_status_in_thread(self, callback, data):
        auth_headers, auth_token = self._authenticate_google_reader()
        if not auth_token:
            self._safe_callback(callback, False, data)
            return

        try:
            # Get items from feed excluding those with read status. It
            # indeed really sucks that we can only ask for the unread
            # items from *ALL* of our subscriptions
            unread_url = constants.URL_USER + constants.STATE_SUFFIX + 'reading-list' + '?' + 'xt=user/-/' + constants.STATE_SUFFIX + 'read'
            unread_request = urllib2.Request(unread_url, None, auth_headers)
            unread_response = urllib2.urlopen(unread_request)
            unread_data = unread_response.read()
        except:
            self._safe_callback(callback, False, data)
            return

        unread_dict = {}
        while True:
            doc = minidom.parseString(unread_data)
            nodesEntries = doc.getElementsByTagName('entry')

            for node in nodesEntries:
                entry_link = node.getElementsByTagName('link')[0].attributes['href'].value
                if not node.getElementsByTagName('source')[0].getAttribute('gr:stream-id').startswith('feed/'):
                    continue

                feed_source = node.getElementsByTagName('source')[0].getElementsByTagName('link')[0].attributes['href'].value
                if not feed_source in unread_dict:
                    unread_dict[feed_source] = [ entry_link ]
                else:
                    unread_dict[feed_source].append(entry_link)
            try:
                nodes = doc.getElementsByTagName('gr:continuation')
                if nodes==[]:
                    break
                unread_request = urllib2.Request(unread_url+'&c='+nodes[0].firstChild.data, None, auth_headers)
                unread_response = urllib2.urlopen(unread_request)
                unread_data = unread_response.read()
            except:
                self._safe_callback(callback, False, data)
                return

        for key in unread_dict.keys():
            # This could happen if there are unread items in feeds
            # that are not synchronized to ReSiStance. If that's the
            # case just ignore them
            feed_data_list = [feed_data for feed_data in self.feed_data_list if feed_data.feed.link == key]
            if not feed_data_list:
                continue

            feed_data = feed_data_list[0]

            # Sync read/unread status. Ideally this should prioritize
            # the last action. Meanwhile prioritize the read status
            for entry in feed_data.entries:
                # Sometimes entry links include queries, discard them for the comparison
                if entry.link in unread_dict[key] or entry.link[:entry.link.find('?')] in unread_dict[key]:
                    # If read in ReSiStance update Google Reader (most likely read while offline)
                    if entry.read:
                        self._mark_as_read_synchronize_in_thread(feed_data.feed.link, entry.link, True, None, None)
                else:
                    # If read in Google Reader update ReSiStance (most likely read in Web)
                    if not entry.read:
                        entry.read = True

        # Mark all entries as read for feeds synced with Google Reader
        # with no unread items
        sync_feeds = [feed for feed in self.feed_data_list if feed.sync and (feed.feed.link not in unread_dict)]
        for feed_data in sync_feeds:
            for entry in feed_data.entries:
                if not entry.read:
                    entry.read = True

        self._safe_callback(callback, True, data)

    def _add_feed_in_thread(self, url, sync, save, callback, user_data):

        parsed_url = urlparse.urlsplit(url)
        if parsed_url.scheme == '':
            url = 'http://' + url

        if not url.endswith('xml') and not url.endswith('opml'):
            url = getRSSLink(url)

        # Return if we cannot get the feed URL
        if url == '':
            self._safe_callback(callback, None, None, user_data)
            return

        new_feed_data = ReSiStanceFeedDict(feedparser.parse(url), sync)

        # 200 == OK, and 3xx are redirections.
        # On the other hand bozo==1 if there was some problem parsing the feed
        if (new_feed_data.status!=200 and new_feed_data.status/100 != 3) or \
                new_feed_data.bozo:
            self._safe_callback(callback, None, None, user_data)
            return

        self.feed_data_list.append(new_feed_data)

        if 'link' in new_feed_data.feed:
            pixbuf = self._get_favicon_sync(new_feed_data.feed.link)
        else:
            pixbuf = self._get_favicon_sync(new_feed_data.href)

        # Call user callback
        self._safe_callback(callback, pixbuf, new_feed_data, user_data)

        # Save to disk
        if save:
            try:
                self.save(None)
            except IOError:
                pass

    def _update_feeds_in_thread(self, feed_data_list, callback, user_data):

        for feed_data in feed_data_list:
            updated_feed_data = ReSiStanceFeedDict(feedparser.parse(feed_data.href))

            # In case of network failure
            if updated_feed_data == None or updated_feed_data.entries == None or \
                    len(updated_feed_data.entries) == 0:
                continue

            updated_feed_date = updated_feed_data.feed.get('updated_parsed') or \
                updated_feed_data.entries[0].get('updated_parsed')
            feed_date = feed_data.feed.get('updated_parsed') or \
                feed_data.entries[0].get('updated_parsed')

            if updated_feed_date > feed_date:
                old_entry_ids = [entry.id for entry in feed_data.entries]
                updated_entry_ids = [entry.id for entry in updated_feed_data.entries]
                new_entries = [entry for entry in updated_feed_data.entries if entry.id not in old_entry_ids]
                del old_entry_ids, updated_entry_ids

                # Autodownload enclosures
                if self.settings.auto_download:
                    to_download_urls = [entry.enclosures[0].href for entry in new_entries if 'enclosures' in entry]
                    to_download_paths = [self.settings.auto_download_folder+os.path.basename(urllib.url2pathname(url)) for url in to_download_urls]
                    self._download_items_in_thread(to_download_urls, to_download_paths, None, None)

                # In order to keep read/unread status, just add
                # new_entries to the old ones while updating the rest
                # of the information
                old_entries = feed_data.entries
                feed_data.update(updated_feed_data)
                feed_data.entries = old_entries + new_entries

        # Call user callback. Call it just once, if callers want a different behaviour
        # they can always call update_feed() for each one
        self._safe_callback(callback, user_data)

        # Save to disk
        try:
            self.save(None)
        except IOError:
            pass
