from lxml import etree

class GourmetXML:
    def initiation(self, filename):
        f = open(filename, 'r')
        doc = f.read()
        f.close()
        xmldocument = etree.XML(doc)

        #print doc

        title_list, category_list, preptime_list, cooktime_list, \
                    servings_list, ing_list, instructions_list, \
                    notes_list, pic_list = self.xml_load(xmldocument)

        recipes_info = [title_list, category_list, preptime_list,
                        cooktime_list, servings_list, ing_list,
                        instructions_list, notes_list, pic_list]


        #print recipes_info

        n_items = len(title_list)
        for lista in recipes_info:
            if lista == instructions_list or lista == notes_list:
                lista = self.do_treatment_to_lists(lista, n_items, True, False)
            elif lista == ing_list:
                pass
            else:
                lista = self.do_treatment_to_lists(lista, n_items, False, False)

        #print 'antes:', ing_list
        #print 'despues: ', ing_list
        recipes_info[5] = ing_list
        #print ing_list
        #print 'lista: ', ing_list

        #print recipes_info[6]

        return recipes_info

    def xml_load(self, xmldocument):
        ing_list = []

        title_list = []
        category_list = []
        preptime_list = []
        cooktime_list = []
        servings_list = []
        instructions_list = []
        notes_list = []
        pic_list = []
        idd = 0

        for i in xmldocument.getchildren():
            if 'recipe' in i.tag:
                for j in i.getchildren():
                    if 'title' in j.tag:
                        idd += 1
                        tup = [idd, j.text]
                        title_list.append(tup)
                    elif 'category' in j.tag:
                        if j.text is not None:
                            tup = [idd, j.text]
                            category_list.append(tup)
                    elif 'preptime' in j.tag:
                        if j.text is not None:
                            tup = [idd, j.text]
                            preptime_list.append(tup)
                    elif 'cooktime' in j.tag:
                        if j.text is not None:
                            tup = [idd, j.text]
                            cooktime_list.append(tup)
                    elif 'servings' in j.tag:
                        if j.text is not None:
                            tup = [idd, j.text]
                            servings_list.append(tup)
                    elif 'image' in j.tag:
                        if j.text is not None:
                            tup = [idd, j.text]
                            pic_list.append(tup)
                    elif 'ingredient-list' in j.tag:
                        one_item = ''
                        new_list = []
                        for k in j.getchildren():
                            if 'ingredient' in k.tag:
                                for l in k.getchildren():
                                    if 'amount' in l.tag:
                                        if l.text is not None:
                                            amount = l.text.replace('\t', '').replace('\n', '')
                                            one_item = one_item + amount + '||'
                                    elif 'unit' in l.tag:
                                        if l.text is not None:
                                            unit = l.text.replace('\t', '').replace('\n', '')
                                            one_item = one_item + unit + '||'
                                    elif 'item' in l.tag:
                                        if l.text is not None:
                                            item = l.text.replace('\t', '').replace('\n', '')
                                            one_item = one_item + item + '\n'
                                one_item = self.tratar_one_ing(one_item)
                                new_list.append(one_item)
                                one_item = ''

                            elif 'inggroup' in k.tag:
                                for l in k.getchildren():
                                    if 'ingredient' in l.tag:
                                        for m in l.getchildren():
                                            if 'amount' in m.tag:
                                                if m.text is not None:
                                                    amount = m.text.replace('\t', '').replace('\n', '')
                                                    one_item = one_item + amount + '||'
                                            elif 'unit' in m.tag:
                                                if m.text is not None:
                                                    unit = m.text.replace('\t', '').replace('\n', '')
                                                    one_item = one_item + unit + '||'
                                            elif 'item' in m.tag:
                                                if m.text is not None:
                                                    item = m.text.replace('\t', '').replace('\n', '')
                                                    one_item = one_item + item + '\n'
                                        one_item = self.tratar_one_ing(one_item)
                                        new_list.append(one_item)
                                        one_item = ''


                        tup = [idd, ('').join(new_list)]
                        ing_list.append(tup)

                    elif 'instructions' in j.tag:
                        if j.text is not None:
                            tup = [idd, j.text]
                            instructions_list.append(tup)
                    elif 'modifications' in j.tag:
                        if j.text is not None:
                            tup = [idd,j.text ]
                            notes_list.append(tup)


        return title_list, category_list, preptime_list, cooktime_list, \
                servings_list, ing_list, instructions_list, notes_list, pic_list


    def tratar_one_ing(self, one_item):
        import re
        #print 'antes: ', one_item
        hele = re.compile(r'\||')
        if hele.findall(one_item).count('|') == 4: #tiene cuatro |||| no hacer nada
            pass
        elif hele.findall(one_item).count('|') == 2: #tiene solo dos ||, reemplazar || por ||||
            one_item  = one_item.replace('||', '||||')
        elif hele.findall(one_item).count('|') == 0: ## anadir |||| al principio.
            one_item = '||||' + one_item

        #print 'despues: ', one_item
        return one_item

    def do_treatment_to_lists(self, lista, n_items, text_list, inglist):
        if inglist:
            pass

        else:
            for i in range(len(lista)):
                if text_list:
                    new_item = lista[i][1].replace('\t', '')
                    if new_item[:1] == '\n':
                        new_item = new_item[1:]
                else:
                    #print lista[i]
                    new_item = lista[i][1].replace('\t', '').replace('\n', '')
                lista[i][1] = new_item

            lista2 = []
            for i in range(len(lista)):
                lista2.append(lista[i][0])

            lista1 = []
            for i in range(n_items):
                lista1.append(i+1)

            for i in lista1:
                if i in lista2:
                    pass
                else:
                    lista.append([i, ''])

            lista = lista.sort()

        return lista
