Mapytex/pymath/stat/dataset.py

#/usr/bin/env python
# -*- coding:Utf-8 -*-

#
#
# Ensemble de fonction rendant beaucoup plus pratique la résolution et l'élaboration des exercices de stat au lycée
#
#

# TODO: Rendre toutes les réponses Explicable!! |mar. janv. 12 09:41:00 EAT 2016

from math import sqrt, ceil
from .number_tools import number_factory
from .random_generator import random_generator

class Dataset(list):
    """ A dataset (a list) with statistics and latex rendering methods
    
    >>> s = Dataset(range(100))
    >>> s.sum()
    4950
    >>> s.mean()
    49.5
    >>> s.deviation()
    83325
    >>> s.variance()
    833.25
    >>> s.sd()
    28.87
    """

    @classmethod
    def random(cls, length, data_name = "Valeurs", \
            distrib = "gauss", rd_args = (0,1),  \
            nbr_format = lambda x:round(x,2), \
            v_min = None, v_max = None, \
            exact_mean = None):
        """ Generate a random list of value

        :param length: length of the dataset
        :param distrib: Distribution of the data set. It can be a function or string from ["randint", "uniform", "gauss", "choice"]
        :param rd_args: arguments to pass to distrib
        :param nbr_format: function which format value
        :param v_min: minimum accepted value
        :param v_max: maximum accepted value
        :param exact_mean: if set, the last generated number will be create in order that the computed mean is exacly equal to "exact_mean"
        """
        data = random_generator(length,\
            distrib, rd_args,  \
            nbr_format, \
            v_min, v_max, \
            exact_mean)

        return cls(data, data_name = data_name)
    
    def __init__(self, data = [], data_name = "Valeurs"):
        """ 
        Create a numeric data set
        
        :param data: values of the data set
        :param data_name: name of the data set
        """
        list.__init__(self, data)

        self_name = data_name

    def add_data(self, data):
        """Add datas to the data set

        :param data: datas
        """
        try:
            self += data
        except TypeError:
            self += [data]

    # --------------------------
    # Stat tools

    def effectif_total(self):
        return len(self)

    @number_factory
    def sum(self):
        return sum(self)
    
    @number_factory
    def mean(self):
        return self.sum()/self.effectif_total() 

    @number_factory
    def deviation(self):
        """ Compute the deviation (not normalized) """
        mean = self.mean()
        return sum([(x - mean)**2 for x in self])

    @number_factory
    def variance(self):
        return self.deviation()/self.effectif_total()
    
    @number_factory
    def sd(self):
        """ Compute the standard deviation """
        return sqrt(self.variance())

    def quartiles(self):
        """
        Calcul les quartiles de la série.

        :return: un tuple avec (min, Q1, Me, Q3, Max)

        >>> w = Dataset(range(12))
        >>> w.quartiles()
        (0, 2.5, 5.5, 8.5, 11)
        """
        return (min(self) , self.quartile(1) , self.quartile(2) , self.quartile(3), max(self))

    @number_factory
    def quartile(self, quartile = 1):
        """
        Calcul un quartile de la série.

        :param quartile: quartile à calculer (par defaut 1 -> Q1)

        :return: le quartile demandé

        : Example:

        >>> w = Dataset(range(12))
        >>> w.quartile(1)
        2.5
        >>> w.quartile(2)
        5.5
        >>> w.quartile(3)
        8.5
        >>> w = Dataset(range(14))
        >>> w.quartile(1)
        3
        >>> w.quartile(2)
        6.5
        >>> w.quartile(3)
        10

        """
        # -1 to match with list indexing
        position = self.posi_quartile(quartile) - 1
        if position.is_integer():
            return (self[int(position)] + self[int(position)+1])/2
        else:
            return self[ceil(position)]

    def posi_quartile(self, quartile = 1):
        """ 
        Calcul la position du quartile

        :param quartile: le quartile concerné
        
        :return : la position du quartile (arondis à l'entier suppérieur, non arrondis)
        """
        return quartile * self.effectif_total() / 4
    
    # --------------------------
    # Rendu latex
    
    def tabular_latex(self, nbr_lines = 1):
        """ Latex code to display dataset as a tabular """
        d_per_line = self.effectif_total() // nbr_lines
        d_last_line = self.effectif_total() % d_per_line
        splited_data = [self[x:x+d_per_line] for x in range(0, self.effectif_total(), d_per_line)]
        # On ajoute les éléments manquant pour la dernière line
        if d_last_line:
            splited_data[-1] += [' ']*(d_per_line - d_last_line)

        # Construction du tableau
        latex = "\\begin{{tabular}}{{|c|*{{{nbr_col}}}{{c|}}}} \n".format(nbr_col = d_per_line)
        latex += "\t\t \hline \n"

        d_lines = [' & '.join(map(str,l)) for l in splited_data]
        latex += " \\\\ \n \\hline \n".join(d_lines)

        latex += " \\\\ \n \\hline \n"
        latex += "\\end{tabular}"

        return latex


# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`#/usr/bin/env python`
			`# -- coding:Utf-8 --`

			`#`
			`#`
			`# Ensemble de fonction rendant beaucoup plus pratique la résolution et l'élaboration des exercices de stat au lycée`
			`#`
			`#`

allow import of txt and add todo on database 2016-01-12 06:51:31 +00:00			`# TODO: Rendre toutes les réponses Explicable!! \|mar. janv. 12 09:41:00 EAT 2016`

Dataset heritate from list 2016-01-08 14:01:39 +00:00			`from math import sqrt, ceil`
decorator to control returned values 2016-01-09 15:40:02 +00:00			`from .number_tools import number_factory`
move random_generator for his own file 2016-01-09 15:51:20 +00:00			`from .random_generator import random_generator`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
Dataset heritate from list 2016-01-08 14:01:39 +00:00			`class Dataset(list):`
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`""" A dataset (a list) with statistics and latex rendering methods`

			`>>> s = Dataset(range(100))`
			`>>> s.sum()`
			`4950`
			`>>> s.mean()`
			`49.5`
			`>>> s.deviation()`
decorator to control returned values 2016-01-09 15:40:02 +00:00			`83325`
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`>>> s.variance()`
			`833.25`
			`>>> s.sd()`
decorator to control returned values 2016-01-09 15:40:02 +00:00			`28.87`
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`"""`
random generation for Dataset 2016-01-09 15:14:18 +00:00
			`@classmethod`
			`def random(cls, length, data_name = "Valeurs", \`
move random_generator for his own file 2016-01-09 15:51:20 +00:00			`distrib = "gauss", rd_args = (0,1), \`
random generation for Dataset 2016-01-09 15:14:18 +00:00			`nbr_format = lambda x:round(x,2), \`
			`v_min = None, v_max = None, \`
			`exact_mean = None):`
move random_generator for his own file 2016-01-09 15:51:20 +00:00			`""" Generate a random list of value`
random generation for Dataset 2016-01-09 15:14:18 +00:00
			`:param length: length of the dataset`
Add choice in allowed distrib 2016-01-12 08:14:44 +00:00			`:param distrib: Distribution of the data set. It can be a function or string from ["randint", "uniform", "gauss", "choice"]`
random generation for Dataset 2016-01-09 15:14:18 +00:00			`:param rd_args: arguments to pass to distrib`
			`:param nbr_format: function which format value`
			`:param v_min: minimum accepted value`
			`:param v_max: maximum accepted value`
			`:param exact_mean: if set, the last generated number will be create in order that the computed mean is exacly equal to "exact_mean"`
			`"""`
move random_generator for his own file 2016-01-09 15:51:20 +00:00			`data = random_generator(length,\`
			`distrib, rd_args, \`
			`nbr_format, \`
			`v_min, v_max, \`
			`exact_mean)`
random generation for Dataset 2016-01-09 15:14:18 +00:00
			`return cls(data, data_name = data_name)`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
			`def __init__(self, data = [], data_name = "Valeurs"):`
			`"""`
			`Create a numeric data set`

			`:param data: values of the data set`
			`:param data_name: name of the data set`
			`"""`
Dataset heritate from list 2016-01-08 14:01:39 +00:00			`list.__init__(self, data)`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
Dataset heritate from list 2016-01-08 14:01:39 +00:00			`self_name = data_name`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
			`def add_data(self, data):`
			`"""Add datas to the data set`

			`:param data: datas`
			`"""`
			`try:`
Dataset heritate from list 2016-01-08 14:01:39 +00:00			`self += data`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`except TypeError:`
Dataset heritate from list 2016-01-08 14:01:39 +00:00			`self += [data]`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
random generation for Dataset 2016-01-09 15:14:18 +00:00			`# --------------------------`
			`# Stat tools`

Corr quartiles computation 2016-01-09 09:22:32 +00:00			`def effectif_total(self):`
			`return len(self)`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
decorator to control returned values 2016-01-09 15:40:02 +00:00			`@number_factory`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`def sum(self):`
Dataset heritate from list 2016-01-08 14:01:39 +00:00			`return sum(self)`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
decorator to control returned values 2016-01-09 15:40:02 +00:00			`@number_factory`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`def mean(self):`
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`return self.sum()/self.effectif_total()`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
decorator to control returned values 2016-01-09 15:40:02 +00:00			`@number_factory`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`def deviation(self):`
			`""" Compute the deviation (not normalized) """`
			`mean = self.mean()`
Dataset heritate from list 2016-01-08 14:01:39 +00:00			`return sum([(x - mean)**2 for x in self])`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
decorator to control returned values 2016-01-09 15:40:02 +00:00			`@number_factory`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`def variance(self):`
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`return self.deviation()/self.effectif_total()`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
decorator to control returned values 2016-01-09 15:40:02 +00:00			`@number_factory`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`def sd(self):`
			`""" Compute the standard deviation """`
			`return sqrt(self.variance())`

			`def quartiles(self):`
			`"""`
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`Calcul les quartiles de la série.`

			`:return: un tuple avec (min, Q1, Me, Q3, Max)`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`>>> w = Dataset(range(12))`
			`>>> w.quartiles()`
			`(0, 2.5, 5.5, 8.5, 11)`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`"""`
Dataset heritate from list 2016-01-08 14:01:39 +00:00			`return (min(self) , self.quartile(1) , self.quartile(2) , self.quartile(3), max(self))`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
decorator to control returned values 2016-01-09 15:40:02 +00:00			`@number_factory`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`def quartile(self, quartile = 1):`
			`"""`
			`Calcul un quartile de la série.`

			`:param quartile: quartile à calculer (par defaut 1 -> Q1)`

			`:return: le quartile demandé`

			`: Example:`

Corr quartiles computation 2016-01-09 09:22:32 +00:00			`>>> w = Dataset(range(12))`
			`>>> w.quartile(1)`
			`2.5`
			`>>> w.quartile(2)`
			`5.5`
			`>>> w.quartile(3)`
			`8.5`
			`>>> w = Dataset(range(14))`
			`>>> w.quartile(1)`
			`3`
			`>>> w.quartile(2)`
			`6.5`
			`>>> w.quartile(3)`
			`10`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
			`"""`
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`# -1 to match with list indexing`
			`position = self.posi_quartile(quartile) - 1`
			`if position.is_integer():`
			`return (self[int(position)] + self[int(position)+1])/2`
			`else:`
			`return self[ceil(position)]`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
			`def posi_quartile(self, quartile = 1):`
			`"""`
			`Calcul la position du quartile`

			`:param quartile: le quartile concerné`

			`:return : la position du quartile (arondis à l'entier suppérieur, non arrondis)`
			`"""`
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`return quartile * self.effectif_total() / 4`

Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`# --------------------------`
			`# Rendu latex`

			`def tabular_latex(self, nbr_lines = 1):`
remove center and clean tabular_latex for Dataset 2016-01-09 10:20:20 +00:00			`""" Latex code to display dataset as a tabular """`
Corr quartiles computation 2016-01-09 09:22:32 +00:00			`d_per_line = self.effectif_total() // nbr_lines`
			`d_last_line = self.effectif_total() % d_per_line`
			`splited_data = [self[x:x+d_per_line] for x in range(0, self.effectif_total(), d_per_line)]`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`# On ajoute les éléments manquant pour la dernière line`
			`if d_last_line:`
			`splited_data[-1] += [' ']*(d_per_line - d_last_line)`

			`# Construction du tableau`
remove center and clean tabular_latex for Dataset 2016-01-09 10:20:20 +00:00			`latex = "\\begin{{tabular}}{{\|c\|*{{{nbr_col}}}{{c\|}}}} \n".format(nbr_col = d_per_line)`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00			`latex += "\t\t \hline \n"`

			`d_lines = [' & '.join(map(str,l)) for l in splited_data]`
			`latex += " \\\\ \n \\hline \n".join(d_lines)`

			`latex += " \\\\ \n \\hline \n"`
remove center and clean tabular_latex for Dataset 2016-01-09 10:20:20 +00:00			`latex += "\\end{tabular}"`
Start working on dataset for Stat 2016-01-08 13:58:21 +00:00
			`return latex`



			`# -----------------------------`
			`# Reglages pour 'vim'`
			`# vim:set autoindent expandtab tabstop=4 shiftwidth=4:`
			`# cursor: 16 del`