from lxml import etree
import os

class KRecipes:
    def initiation(self, filename):

        if os.path.splitext(filename)[1] == '.kre':
            import gzip
            f = gzip.open(filename, 'rb')
            data = data = f.read().rsplit('\x00')
            f.close()
            for item in data:
                if item.startswith("<?xml"):
                    xmlstring = item
                    xml_document = etree.XML(xmlstring)
        else:
            f = open(filename, 'r')
            doc = f.read()
            f.close()
            xml_document = etree.XML(doc)

        title_list, category_list, preptime_list, servings_list, ing_list, \
                instructions_list, picture_list = self.xml_load(xml_document)
        #print category_list

        recipes_info = [title_list, category_list, preptime_list, '0 minutes', servings_list,
                                ing_list, instructions_list, '', picture_list]

        print recipes_info[4]

        n_items = len(title_list)
        for lista in recipes_info:
            if lista == instructions_list:
                lista = self.do_treatment_to_lists(lista, n_items, True, False)
            elif lista == ing_list:
                pass
            elif lista == '0 minutes' or lista == '':
                nlist = []
                for i in range(n_items):
                    nlist.append([i+1, lista])
                if lista == '':
                    #recipes_info[1] = nlist
                    recipes_info[7] = nlist
                elif lista == '0 minutes':
                    recipes_info[3] = nlist

            else:
                lista = self.do_treatment_to_lists(lista, n_items, False, False)

        #print 'antes:', ing_list
        #print 'despues: ', ing_list
        recipes_info[5] = ing_list
        #print ing_list
        #print 'lista: ', ing_list

        #print recipes_info[1]

        return recipes_info

    def xml_load(self, xml_document):
        ing_list = []

        title_list = []
        category_list = []
        preptime_list = []
        servings_list = []
        instructions_list = []
        picture_list = []
        idd = 0

        for i in xml_document.getchildren():
            if 'krecipes-recipe' in i.tag:
                for j in i.getchildren():
                    if 'krecipes-description' in j.tag:
                        for k in j.getchildren():
                            if 'title' in k.tag:
                                idd += 1
                                tup = [idd, k.text]
                                title_list.append(tup)
                            elif 'category' in k.tag:
                                for l in k.getchildren():
                                    if 'cat' in l.tag:
                                        if l.text is not None:
                                            tup = [idd, l.text]
                                            category_list.append(tup)
                                            #Just get the first available category
                                            break
                            elif 'preparation-time' in k.tag:
                                if k.text is not None:
                                    tup = [idd, k.text]
                                    preptime_list.append(tup)
                            elif 'yield' in k.tag:
                                for l in k.getchildren():
                                    if "amount" in l.tag:
                                        amserv = l.text
                                    if "Servings" in l.tag:
                                        tup = [idd, amserv]
                                        servings_list.append(tup)
                            elif 'pictures' in k.tag:
                                for l in k.getchildren():
                                    if 'pic' in l.tag:
                                        if l.text is not None:
                                            tup = [idd, l.text]
                                            picture_list.append(tup)
                                            #Just get the first available pic
                                            break
                    elif 'krecipes-ingredients' in j.tag:
                        one_item = ''
                        new_list = []
                        for k in j.getchildren():
                            if 'ingredient' in k.tag:
                                for l in k.getchildren():
                                    if 'name' in l.tag:
                                        if l.text is not None:
                                            item = l.text.replace('\t', '').replace('\n', '')
                                            one_item = one_item + item + '\n'
                                    elif 'amount' in l.tag:
                                        if l.text is not None:
                                            amount = l.text.replace('\t', '').replace('\n', '')
                                            one_item = amount + '||<unit>||' + one_item
                                    elif 'unit' in l.tag:
                                        if l.text is not None:
                                            unit = l.text.replace('\t', '').replace('\n', '')
                                            one_item = one_item.replace('<unit>', unit)
                                one_item = self.tratar_one_ing(one_item)
                                new_list.append(one_item)
                                one_item = ''
                        tup = [idd, ('').join(new_list)]
                        ing_list.append(tup)
                    elif 'krecipes-instructions' in j.tag:
                        if j.text is not None:
                            tup = [idd, j.text]
                            instructions_list.append(tup)

        return title_list, category_list, preptime_list, \
                servings_list, ing_list, instructions_list, picture_list


    def tratar_one_ing(self, one_item):
        import re
        #print 'antes: ', one_item
        one_item = one_item.replace('<unit>', '')
        hele = re.compile(r'\||')
        if hele.findall(one_item).count('|') == 4: #tiene cuatro |||| no hacer nada
            pass
        elif hele.findall(one_item).count('|') == 2: #tiene solo dos ||, reemplazar || por ||||
            one_item  = one_item.replace('||', '||||')
        elif hele.findall(one_item).count('|') == 0: ## anadir |||| al principio.
            one_item = '||||' + one_item

        #print 'despues: ', one_item
        return one_item

    def do_treatment_to_lists(self, lista, n_items, text_list, categlist):
        if categlist:
            pass

        else:
            for i in range(len(lista)):
                if text_list:
                    new_item = lista[i][1].replace('\t', '')
                else:
                    if lista[i][1] == None:
                        lista[i][1] = ''
                    new_item = lista[i][1].replace('\t', '').replace('\n', '')
                lista[i][1] = new_item

            lista2 = []
            for i in range(len(lista)):
                lista2.append(lista[i][0])

            lista1 = []
            for i in range(n_items):
                lista1.append(i+1)

            for i in lista1:
                if i in lista2:
                    pass
                else:
                    lista.append([i, ''])

            lista = lista.sort()

        return lista
