Mapytex/pymath/stat/weightedDataset.py

#/usr/bin/env python
# -*- coding:Utf-8 -*-

#
#
# Ensemble de fonction rendant beaucoup plus pratique la résolution et l'élaboration des exercices de stat au lycée
#
#

from math import sqrt, ceil
from collections import Counter
from .dataset import Dataset
from ..calculus.generic import flatten_list


class WeightedDataset(dict):
    """ A weighted dataset with statistics and latex rendering methods

    >>> w = WeightedDataset([1, 2, 3, 4], "Enfants", [10, 11, 12, 13])
    >>> print(w)
    {1: 10, 2: 11, 3: 12, 4: 13}
    >>> w.effectif_total()
    46
    >>> w.sum()
    120
    >>> w.mean()
    2.608695652173913
    >>> w.deviation()
    56.95652173913044
    >>> w.variance()
    1.2381852551984878
    >>> w.sd()
    1.1127377297451937

    """

    def __init__(self, datas = [], data_name = "Valeurs", weights = [], weight_name = "Effectifs"):
        """
        Initiate the WeightedDataset
        """
        if datas and not weights:
            weightedDatas = Counter(datas)
        elif datas and weights:
            if len(datas) != len(weights):
                raise ValueError("Datas and weights should have same length")
            else:
                weightedDatas = {i[0]:i[1] for i in zip(datas, weights)}

        dict.__init__(self, weightedDatas)

        self.data_name = data_name
        self.weight_name = weight_name

    def add_data(self, data, weight = 1):
        try:
            self[data] += weight
        except KeyError:
            self[data] = weight

    def total_weight(self):
        return sum(self.values())

    def effectif_total(self):
        return self.total_weight()

    def sum(self):
        """ Not really a sum but the sum of the product of key and values """
        return sum([k*v for (k,v) in self.items()])

    def mean(self):
        return self.sum()/self.effectif_total()

    def deviation(self):
        """ Compute the deviation (not normalized) """
        mean = self.mean()
        return sum([v*(k - mean)**2 for (k,v) in self.items()])

    def variance(self):
        return self.deviation()/self.effectif_total()

    def sd(self):
        """ Compute the standard deviation """
        return sqrt(self.variance())

    def quartiles(self):
        """
        Calcul les quartiles de la série.

        :return: un tuple avec (min, Q1, Me, Q3, Max)

        : Exemple:

        >>> w = WeightedDataset(flatten_list([i*[i] for i in range(5)]))
        >>> w.quartiles()
        (1, 2, 3.0, 4, 4)
        >>> w = WeightedDataset(flatten_list([i*[i] for i in range(6)]))
        >>> w.quartiles()
        (1, 3, 4, 5, 5)

        """
        return (min(self.keys()) , self.quartile(1) , self.quartile(2) , self.quartile(3), max(self.keys()))

    def quartile(self, quartile = 1):
        """
        Calcul un quartile de la série.

        :param quartile: quartile à calculer (par defaut 1 -> Q1)

        :return: le quartile demandé

        : Example:

        >>> w = WeightedDataset(flatten_list([i*[i] for i in range(5)]))
        >>> w.quartile(1)
        2
        >>> w.quartile(2)
        3.0
        >>> w.quartile(3)
        4
        >>> w = WeightedDataset(flatten_list([i*[i] for i in range(6)]))
        >>> w.quartile(1)
        3
        >>> w.quartile(2)
        4
        >>> w.quartile(3)
        5

        """
        # -1 to match with list indexing
        position = self.posi_quartile(quartile) - 1
        expanded_values = flatten_list([v*[k] for (k,v) in self.items()])
        if position.is_integer():
            return (expanded_values[int(position)] + expanded_values[int(position)+1])/2
        else:
            return expanded_values[ceil(position)]

    def posi_quartile(self, quartile = 1):
        """
        Calcul la position du quartile

        :param quartile: le quartile concerné

        :return : la position du quartile (arondis à l'entier suppérieur, non arrondis)
        """
        return quartile * self.effectif_total() / 4


    # --------------------------
    # Rendu latex

    def tabular_latex(self):
        """ Renvoie le code latex pour afficher le tableau

        :return : le code latex pour afficher le tableau
        """
        latex = "\\begin{{center}} \n \t \\begin{{tabular}}{{|c|*{{{nbr_col}}}{{c|}}}} \n".format(nbr_col = len(self.serie))
        latex += "\t\t \hline \n"
        latex += "\t\t {val_name} ".format(val_name = self.val_name)
        eff_ligne = "\t\t {eff_name} ".format(eff_name = self.eff_name)

        for (v,e) in self.serieCouple:
            latex += "& {val:.2f} ".format(val = v)
            eff_ligne += "& {eff:.2f} ".format(eff = e)

        latex += "\\\\ \hline \n"
        latex += eff_ligne + " \\\\ \hline \n"
        latex += "\t \\end{tabular} \n \\end{center}"

        return latex

if __name__ == '__main__':
    valeurs = [65, 75, 85, 95, 105, 115, 125, 135]
    valeurs.sort()
    print(valeurs)
    effectifs = [ 15, 2, 21, 24, 12, 9, 5, 2]


    s = Serie()

    s.set_values(valeurs, effectifs)
    print(s.effectif_total)

    print(s.valeurs)
    print(s.effectifs)
    print(s.serie)

    # print(s.tabular_latex())

    # print("Moyenne ", s.moyenne())
    # print(s.variance())
    # print(s.ecart_type())

    print("\n-----------------------\n")
    print(s.moyenne_latex())
    print("\n-----------------------\n")
    print(s.variance_latex())
    print("\n-----------------------\n")
    print(s.ecart_type_latex())

    # print(s.quartiles())
    print("\n-----------------------\n")
    print(s.quartile_latex())
    print(s.quartile_latex(1))
    print(s.quartile_latex(3))


# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del