## This file is part of MLPY.
## Borda count.
   
## This code is written by Davide Albanese, <albanese@fbk.eu>.
## (C) 2008 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.


from numpy import *

__all__ = ['borda']


def mod(lists, modules):
    """Arrange 'lists'
    """
    
    ret = lists.copy()
    
    for m in modules:
        tmp = sort(lists[:, m])
        ret[:, m] = tmp

    return ret


def borda(lists, k, modules=None):
    """
    Compute the number of extractions on top-k sublists and
    the mean position on lists for each element.
    Sort the element ids with decreasing number of extractions,
    AND element ids with equal number of extractions should be
    sorted with increasing mean positions. 

    Input
    
      * *lists* - [2D numpy array integer] ranked feature-id lists.
        Feature-id must be in [0, #elems-1].
      * *k*     - [integer] on top-k sublists
      * *modules* - [list] modules (list of group indicies)

    Output
    
      * *borda* - (feature-id, number of extractions,  mean positions)

    Example:
    
    >>> from numpy import *
    >>> from mlpy import *
    >>> lists = array([[2,4,1,3,0],  # first ranked feature-id list
    ...                [3,4,1,2,0],  # second ranked feature-id list
    ...                [2,4,3,0,1],  # third ranked feature-id list
    ...                [0,1,4,2,3]]) # fourth ranked feature-id list
    >>> borda(lists, 3)
    (array([4, 1, 2, 3, 0]), array([4, 3, 2, 2, 1]), array([ 1.25      ,  1.66666667,  0.        ,  1.        ,  0.        ]))

      * Element 4 is in the first position with 4 extractions and mean position 1.25.
      * Element 1 is in the first position with 3 extractions and mean position 1.67.
      * Element 2 is in the first position with 2 extractions and mean position 0.00.
      * Element 3 is in the first position with 2 extractions and mean position 1.00.
      * Element 0 is in the first position with 1 extractions and mean position 0.00.
    """

    if modules != None:
        poslists = argsort(lists)
        newposlists = mod(poslists, modules)
        newlists = argsort(newposlists)       
    else:
        newlists = lists
    
    ext = empty(newlists.shape[1], dtype = int)
    pos = empty(newlists.shape[1], dtype = float)

    lk = newlists[:, :k]
    for e in range(newlists.shape[1]):

        # Extractions
        ext[e] = lk[lk == e].shape[0]
        
        # Mean positions
        tmp = where(lk == e)[1]
        if not tmp.shape[0] == 0: pos[e] = tmp.mean()
        else:                     pos[e] = inf

    # Sort the element ids with decreasing ext, _AND_
    # element ids with equal ext should be sorted with increasing pos
    invpos = 1 / (pos + 1) # pos + 1 to avoid zero division
    indices = lexsort(keys = (invpos, ext))[::-1]       
    
    return indices, ext[indices], pos[indices]
