Change names and pass tests

2017-04-17 16:48:52 +03:00
parent 30a6402e40
commit 500426bf82
54 changed files with 103 additions and 87 deletions
--- a/mapytex/stat/init.py
+++ b/mapytex/stat/init.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from .dataset import Dataset
+from .weightedDataset import WeightedDataset
+
+# -----------------------------
+# Reglages pour 'vim'
+# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
+# cursor: 16 del
--- a/mapytex/stat/dataset.py
+++ b/mapytex/stat/dataset.py
@@ -0,0 +1,199 @@
+#/usr/bin/env python
+# -*- coding:Utf-8 -*-
+
+#
+#
+# Ensemble de fonction rendant beaucoup plus pratique la résolution et l'élaboration des exercices de stat au lycée
+#
+#
+
+# TODO: Rendre toutes les réponses Explicable!! |mar. janv. 12 09:41:00
+# EAT 2016
+
+from math import sqrt, ceil
+from .number_tools import number_factory
+from .random_generator import random_generator
+
+
+class Dataset(list):
+    """ A dataset (a list) with statistics and latex rendering methods
+
+    >>> s = Dataset(range(100))
+    >>> s.sum()
+    4950
+    >>> s.mean()
+    49.5
+    >>> s.deviation()
+    83325
+    >>> s.variance()
+    833.25
+    >>> s.sd()
+    28.87
+    """
+
+    @classmethod
+    def random(cls, length, data_name="Valeurs",
+               distrib="gauss", rd_args=(0, 1),
+               nbr_format=lambda x: round(x, 2),
+               v_min=None, v_max=None,
+               exact_mean=None):
+        """ Generate a random list of value
+
+        :param length: length of the dataset
+        :param distrib: Distribution of the data set. It can be a function or string from ["randint", "uniform", "gauss", "choice"]
+        :param rd_args: arguments to pass to distrib
+        :param nbr_format: function which format value
+        :param v_min: minimum accepted value
+        :param v_max: maximum accepted value
+        :param exact_mean: if set, the last generated number will be create in order that the computed mean is exacly equal to "exact_mean"
+        """
+        data = random_generator(length,
+                                distrib, rd_args,
+                                nbr_format,
+                                v_min, v_max,
+                                exact_mean)
+
+        return cls(data, data_name=data_name)
+
+    def __init__(self, data=[], data_name="Valeurs"):
+        """
+        Create a numeric data set
+
+        :param data: values of the data set
+        :param data_name: name of the data set
+        """
+        list.__init__(self, data)
+
+        self_name = data_name
+
+    def add_data(self, data):
+        """Add datas to the data set
+
+        :param data: datas
+        """
+        try:
+            self += data
+        except TypeError:
+            self += [data]
+
+    # --------------------------
+    # Stat tools
+
+    def effectif_total(self):
+        return len(self)
+
+    @number_factory
+    def sum(self):
+        return sum(self)
+
+    @number_factory
+    def mean(self):
+        return self.sum() / self.effectif_total()
+
+    @number_factory
+    def deviation(self):
+        """ Compute the deviation (not normalized) """
+        mean = self.mean()
+        return sum([(x - mean)**2 for x in self])
+
+    @number_factory
+    def variance(self):
+        return self.deviation() / self.effectif_total()
+
+    @number_factory
+    def sd(self):
+        """ Compute the standard deviation """
+        return sqrt(self.variance())
+
+    def quartiles(self):
+        """
+        Calcul les quartiles de la série.
+
+        :return: un tuple avec (min, Q1, Me, Q3, Max)
+
+        >>> w = Dataset(range(12))
+        >>> w.quartiles()
+        (0, 2.5, 5.5, 8.5, 11)
+        """
+        return (
+            min(self),
+            self.quartile(1),
+            self.quartile(2),
+            self.quartile(3),
+            max(self))
+
+    @number_factory
+    def quartile(self, quartile=1):
+        """
+        Calcul un quartile de la série.
+
+        :param quartile: quartile à calculer (par defaut 1 -> Q1)
+
+        :return: le quartile demandé
+
+        : Example:
+
+        >>> w = Dataset(range(12))
+        >>> w.quartile(1)
+        2.5
+        >>> w.quartile(2)
+        5.5
+        >>> w.quartile(3)
+        8.5
+        >>> w = Dataset(range(14))
+        >>> w.quartile(1)
+        3
+        >>> w.quartile(2)
+        6.5
+        >>> w.quartile(3)
+        10
+
+        """
+        # -1 to match with list indexing
+        position = self.posi_quartile(quartile) - 1
+        if position.is_integer():
+            return (self[int(position)] + self[int(position) + 1]) / 2
+        else:
+            return self[ceil(position)]
+
+    def posi_quartile(self, quartile=1):
+        """
+        Calcul la position du quartile
+
+        :param quartile: le quartile concerné
+
+        :return : la position du quartile (arondis à l'entier suppérieur, non arrondis)
+        """
+        return quartile * self.effectif_total() / 4
+
+    # --------------------------
+    # Rendu latex
+
+    def tabular_latex(self, nbr_lines=1):
+        """ Latex code to display dataset as a tabular """
+        d_per_line = self.effectif_total() // nbr_lines
+        d_last_line = self.effectif_total() % d_per_line
+        splited_data = [self[x:x + d_per_line]
+                        for x in range(0, self.effectif_total(), d_per_line)]
+        # On ajoute les éléments manquant pour la dernière line
+        if d_last_line:
+            splited_data[-1] += [' '] * (d_per_line - d_last_line)
+
+        # Construction du tableau
+        latex = "\\begin{{tabular}}{{|c|*{{{nbr_col}}}{{c|}}}} \n".format(
+            nbr_col=d_per_line)
+        latex += "\t\t \hline \n"
+
+        d_lines = [' & '.join(map(str, l)) for l in splited_data]
+        latex += " \\\\ \n \\hline \n".join(d_lines)
+
+        latex += " \\\\ \n \\hline \n"
+        latex += "\\end{tabular}"
+
+        return latex
+
+
+# -----------------------------
+# Reglages pour 'vim'
+# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
+# cursor: 16 del
--- a/mapytex/stat/number_tools.py
+++ b/mapytex/stat/number_tools.py
@@ -0,0 +1,25 @@
+#/usr/bin/env python
+# -*- coding:Utf-8 -*-
+
+from functools import wraps
+
+
+def number_factory(fun):
+    """ Decorator which format returned value """
+    @wraps(fun)
+    def wrapper(*args, **kwargs):
+        ans = fun(*args, **kwargs)
+        try:
+            if ans.is_integer():
+                return int(ans)
+            else:
+                return round(ans, 2)
+        except AttributeError:
+            return ans
+    return wrapper
+
+
+# -----------------------------
+# Reglages pour 'vim'
+# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
+# cursor: 16 del
--- a/mapytex/stat/random_generator.py
+++ b/mapytex/stat/random_generator.py
@@ -0,0 +1,78 @@
+#/usr/bin/env python
+# -*- coding:Utf-8 -*-
+
+from random import randint, uniform, gauss, choice
+
+
+def random_generator(length,
+                     distrib=gauss, rd_args=(0, 1),
+                     nbr_format=lambda x: round(x, 2),
+                     v_min=None, v_max=None,
+                     exact_mean=None):
+    """ Generate a random list of value
+
+    :param length: length of the dataset
+    :param distrib: Distribution of the data set. It can be a function or string from ["randint", "uniform", "gauss", "choice"]
+    :param rd_args: arguments to pass to distrib
+    :param nbr_format: function which format value
+    :param v_min: minimum accepted value
+    :param v_max: maximum accepted value
+    :param exact_mean: if set, the last generated number will be create in order that the computed mean is exacly equal to "exact_mean"
+
+    >>> random_generator(10)
+    >>> random_generator(10, distrib = uniform, rd_args = (5, 10))
+    >>> random_generator(10, distrib = "uniform", rd_args = (5, 10))
+    >>> random_generator(10, v_min = 0)
+    >>> random_generator(10, exact_mean = 0)
+    >>> random_generator(10, distrib = gauss, rd_args = (50,20), nbr_format = int)
+
+    """
+    # if exact_mean is set, we create automaticaly only length-1 value
+    if exact_mean is not None:
+        length = length - 1
+
+    # build function to test created values
+    if v_min is None:
+        v1 = lambda x: True
+    else:
+        v1 = lambda x: x >= v_min
+    if v_max is None:
+        v2 = lambda x: True
+    else:
+        v2 = lambda x: x <= v_max
+    validate = lambda x: v1(x) and v2(x)
+
+    # get distrib function
+    distribs = {
+        "gauss": gauss,
+        "uniform": uniform,
+        "randint": randint,
+        "choice": choice}
+    try:
+        distrib(*rd_args)
+    except TypeError:
+        distrib = distribs[distrib]
+
+    # building values
+    data = []
+    for _ in range(length):
+        valid = False
+        while not valid:
+            v = nbr_format(distrib(*rd_args))
+            valid = validate(v)
+        data.append(v)
+
+    # Build last value
+    if exact_mean is not None:
+        last_v = nbr_format((length + 1) * exact_mean - sum(data))
+        if not validate(last_v):
+            raise ValueError(
+                "Can't build the last value. Conflict between v_min/v_max and exact_mean")
+        data.append(last_v)
+
+    return data
+
+# -----------------------------
+# Reglages pour 'vim'
+# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
+# cursor: 16 del
--- a/mapytex/stat/weightedDataset.py
+++ b/mapytex/stat/weightedDataset.py
@@ -0,0 +1,190 @@
+#/usr/bin/env python
+# -*- coding:Utf-8 -*-
+
+#
+#
+# Ensemble de fonction rendant beaucoup plus pratique la résolution et l'élaboration des exercices de stat au lycée
+#
+#
+
+from math import sqrt, ceil
+from collections import Counter
+from .dataset import Dataset
+from ..calculus.generic import flatten_list
+from .number_tools import number_factory
+
+
+class WeightedDataset(dict):
+    """ A weighted dataset with statistics and latex rendering methods
+
+    >>> w = WeightedDataset([1, 2, 3, 4], "Enfants", [10, 11, 12, 13])
+    >>> print(w)
+    {1: 10, 2: 11, 3: 12, 4: 13}
+    >>> w.effectif_total()
+    46
+    >>> w.sum()
+    120
+    >>> w.mean()
+    2.61
+    >>> w.deviation()
+    56.96
+    >>> w.variance()
+    1.24
+    >>> w.sd()
+    1.11
+
+    """
+
+    def __init__(
+            self,
+            datas=[],
+            data_name="Valeurs",
+            weights=[],
+            weight_name="Effectifs"):
+        """
+        Initiate the WeightedDataset
+        """
+        if datas and not weights:
+            weightedDatas = Counter(datas)
+        elif datas and weights:
+            if len(datas) != len(weights):
+                raise ValueError("Datas and weights should have same length")
+            else:
+                weightedDatas = {i[0]: i[1] for i in zip(datas, weights)}
+
+        dict.__init__(self, weightedDatas)
+
+        self.data_name = data_name
+        self.weight_name = weight_name
+
+    def add_data(self, data, weight=1):
+        try:
+            self[data] += weight
+        except KeyError:
+            self[data] = weight
+
+    @number_factory
+    def total_weight(self):
+        return sum(self.values())
+
+    def effectif_total(self):
+        return self.total_weight()
+
+    @number_factory
+    def sum(self):
+        """ Not really a sum but the sum of the product of key and values """
+        return sum([k * v for (k, v) in self.items()])
+
+    @number_factory
+    def mean(self):
+        return self.sum() / self.effectif_total()
+
+    @number_factory
+    def deviation(self):
+        """ Compute the deviation (not normalized) """
+        mean = self.mean()
+        return sum([v * (k - mean)**2 for (k, v) in self.items()])
+
+    @number_factory
+    def variance(self):
+        return self.deviation() / self.effectif_total()
+
+    @number_factory
+    def sd(self):
+        """ Compute the standard deviation """
+        return sqrt(self.variance())
+
+    def quartiles(self):
+        """
+        Calcul les quartiles de la série.
+
+        :return: un tuple avec (min, Q1, Me, Q3, Max)
+
+        >>> w = WeightedDataset(flatten_list([i*[i] for i in range(5)]))
+        >>> w.quartiles()
+        (1, 2, 3, 4, 4)
+        >>> w = WeightedDataset(flatten_list([i*[i] for i in range(6)]))
+        >>> w.quartiles()
+        (1, 3, 4, 5, 5)
+
+        """
+        return (min(self.keys()),
+                self.quartile(1),
+                self.quartile(2),
+                self.quartile(3),
+                max(self.keys()))
+
+    @number_factory
+    def quartile(self, quartile=1):
+        """
+        Calcul un quartile de la série.
+
+        :param quartile: quartile à calculer (par defaut 1 -> Q1)
+
+        :return: le quartile demandé
+
+        : Example:
+
+        >>> w = WeightedDataset(flatten_list([i*[i] for i in range(5)]))
+        >>> w.quartile(1)
+        2
+        >>> w.quartile(2)
+        3
+        >>> w.quartile(3)
+        4
+        >>> w = WeightedDataset(flatten_list([i*[i] for i in range(6)]))
+        >>> w.quartile(1)
+        3
+        >>> w.quartile(2)
+        4
+        >>> w.quartile(3)
+        5
+
+        """
+        # -1 to match with list indexing
+        position = self.posi_quartile(quartile) - 1
+        expanded_values = flatten_list([v * [k] for (k, v) in self.items()])
+        if position.is_integer():
+            return (expanded_values[int(position)] +
+                    expanded_values[int(position) + 1]) / 2
+        else:
+            return expanded_values[ceil(position)]
+
+    def posi_quartile(self, quartile=1):
+        """
+        Calcul la position du quartile
+
+        :param quartile: le quartile concerné
+
+        :return : la position du quartile (arondis à l'entier suppérieur, non arrondis)
+        """
+        return quartile * self.effectif_total() / 4
+
+    # --------------------------
+    # Rendu latex
+
+    def tabular_latex(self):
+        """ Latex code to display dataset as a tabular """
+        latex = "\\begin{{tabular}}{{|c|*{{{nbr_col}}}{{c|}}}} \n".format(
+            nbr_col=len(self.keys()))
+        latex += "\t \hline \n"
+        data_line = "\t {data_name} ".format(data_name=self.data_name)
+        weight_line = "\t {weight_name} ".format(weight_name=self.weight_name)
+
+        # TODO: Il faudra trouver une solution pour le formatage des données
+        # |sam. janv.  9 13:14:26 EAT 2016
+        for (v, e) in self.items():
+            data_line += "& {val} ".format(val=v)
+            weight_line += "& {eff} ".format(eff=e)
+
+        latex += data_line + "\\\\ \n \t \\hline \n"
+        latex += weight_line + "\\\\ \n \t \\hline \n"
+        latex += "\\end{tabular}"
+
+        return latex
+
+
+# -----------------------------
+# Reglages pour 'vim'
+# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
+# cursor: 16 del