Change names and pass tests
This commit is contained in:
10
mapytex/stat/__init__.py
Normal file
10
mapytex/stat/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
from .dataset import Dataset
|
||||
from .weightedDataset import WeightedDataset
|
||||
|
||||
# -----------------------------
|
||||
# Reglages pour 'vim'
|
||||
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
||||
# cursor: 16 del
|
||||
199
mapytex/stat/dataset.py
Normal file
199
mapytex/stat/dataset.py
Normal file
@@ -0,0 +1,199 @@
|
||||
#/usr/bin/env python
|
||||
# -*- coding:Utf-8 -*-
|
||||
|
||||
#
|
||||
#
|
||||
# Ensemble de fonction rendant beaucoup plus pratique la résolution et l'élaboration des exercices de stat au lycée
|
||||
#
|
||||
#
|
||||
|
||||
# TODO: Rendre toutes les réponses Explicable!! |mar. janv. 12 09:41:00
|
||||
# EAT 2016
|
||||
|
||||
from math import sqrt, ceil
|
||||
from .number_tools import number_factory
|
||||
from .random_generator import random_generator
|
||||
|
||||
|
||||
class Dataset(list):
|
||||
""" A dataset (a list) with statistics and latex rendering methods
|
||||
|
||||
>>> s = Dataset(range(100))
|
||||
>>> s.sum()
|
||||
4950
|
||||
>>> s.mean()
|
||||
49.5
|
||||
>>> s.deviation()
|
||||
83325
|
||||
>>> s.variance()
|
||||
833.25
|
||||
>>> s.sd()
|
||||
28.87
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def random(cls, length, data_name="Valeurs",
|
||||
distrib="gauss", rd_args=(0, 1),
|
||||
nbr_format=lambda x: round(x, 2),
|
||||
v_min=None, v_max=None,
|
||||
exact_mean=None):
|
||||
""" Generate a random list of value
|
||||
|
||||
:param length: length of the dataset
|
||||
:param distrib: Distribution of the data set. It can be a function or string from ["randint", "uniform", "gauss", "choice"]
|
||||
:param rd_args: arguments to pass to distrib
|
||||
:param nbr_format: function which format value
|
||||
:param v_min: minimum accepted value
|
||||
:param v_max: maximum accepted value
|
||||
:param exact_mean: if set, the last generated number will be create in order that the computed mean is exacly equal to "exact_mean"
|
||||
"""
|
||||
data = random_generator(length,
|
||||
distrib, rd_args,
|
||||
nbr_format,
|
||||
v_min, v_max,
|
||||
exact_mean)
|
||||
|
||||
return cls(data, data_name=data_name)
|
||||
|
||||
def __init__(self, data=[], data_name="Valeurs"):
|
||||
"""
|
||||
Create a numeric data set
|
||||
|
||||
:param data: values of the data set
|
||||
:param data_name: name of the data set
|
||||
"""
|
||||
list.__init__(self, data)
|
||||
|
||||
self_name = data_name
|
||||
|
||||
def add_data(self, data):
|
||||
"""Add datas to the data set
|
||||
|
||||
:param data: datas
|
||||
"""
|
||||
try:
|
||||
self += data
|
||||
except TypeError:
|
||||
self += [data]
|
||||
|
||||
# --------------------------
|
||||
# Stat tools
|
||||
|
||||
def effectif_total(self):
|
||||
return len(self)
|
||||
|
||||
@number_factory
|
||||
def sum(self):
|
||||
return sum(self)
|
||||
|
||||
@number_factory
|
||||
def mean(self):
|
||||
return self.sum() / self.effectif_total()
|
||||
|
||||
@number_factory
|
||||
def deviation(self):
|
||||
""" Compute the deviation (not normalized) """
|
||||
mean = self.mean()
|
||||
return sum([(x - mean)**2 for x in self])
|
||||
|
||||
@number_factory
|
||||
def variance(self):
|
||||
return self.deviation() / self.effectif_total()
|
||||
|
||||
@number_factory
|
||||
def sd(self):
|
||||
""" Compute the standard deviation """
|
||||
return sqrt(self.variance())
|
||||
|
||||
def quartiles(self):
|
||||
"""
|
||||
Calcul les quartiles de la série.
|
||||
|
||||
:return: un tuple avec (min, Q1, Me, Q3, Max)
|
||||
|
||||
>>> w = Dataset(range(12))
|
||||
>>> w.quartiles()
|
||||
(0, 2.5, 5.5, 8.5, 11)
|
||||
"""
|
||||
return (
|
||||
min(self),
|
||||
self.quartile(1),
|
||||
self.quartile(2),
|
||||
self.quartile(3),
|
||||
max(self))
|
||||
|
||||
@number_factory
|
||||
def quartile(self, quartile=1):
|
||||
"""
|
||||
Calcul un quartile de la série.
|
||||
|
||||
:param quartile: quartile à calculer (par defaut 1 -> Q1)
|
||||
|
||||
:return: le quartile demandé
|
||||
|
||||
: Example:
|
||||
|
||||
>>> w = Dataset(range(12))
|
||||
>>> w.quartile(1)
|
||||
2.5
|
||||
>>> w.quartile(2)
|
||||
5.5
|
||||
>>> w.quartile(3)
|
||||
8.5
|
||||
>>> w = Dataset(range(14))
|
||||
>>> w.quartile(1)
|
||||
3
|
||||
>>> w.quartile(2)
|
||||
6.5
|
||||
>>> w.quartile(3)
|
||||
10
|
||||
|
||||
"""
|
||||
# -1 to match with list indexing
|
||||
position = self.posi_quartile(quartile) - 1
|
||||
if position.is_integer():
|
||||
return (self[int(position)] + self[int(position) + 1]) / 2
|
||||
else:
|
||||
return self[ceil(position)]
|
||||
|
||||
def posi_quartile(self, quartile=1):
|
||||
"""
|
||||
Calcul la position du quartile
|
||||
|
||||
:param quartile: le quartile concerné
|
||||
|
||||
:return : la position du quartile (arondis à l'entier suppérieur, non arrondis)
|
||||
"""
|
||||
return quartile * self.effectif_total() / 4
|
||||
|
||||
# --------------------------
|
||||
# Rendu latex
|
||||
|
||||
def tabular_latex(self, nbr_lines=1):
|
||||
""" Latex code to display dataset as a tabular """
|
||||
d_per_line = self.effectif_total() // nbr_lines
|
||||
d_last_line = self.effectif_total() % d_per_line
|
||||
splited_data = [self[x:x + d_per_line]
|
||||
for x in range(0, self.effectif_total(), d_per_line)]
|
||||
# On ajoute les éléments manquant pour la dernière line
|
||||
if d_last_line:
|
||||
splited_data[-1] += [' '] * (d_per_line - d_last_line)
|
||||
|
||||
# Construction du tableau
|
||||
latex = "\\begin{{tabular}}{{|c|*{{{nbr_col}}}{{c|}}}} \n".format(
|
||||
nbr_col=d_per_line)
|
||||
latex += "\t\t \hline \n"
|
||||
|
||||
d_lines = [' & '.join(map(str, l)) for l in splited_data]
|
||||
latex += " \\\\ \n \\hline \n".join(d_lines)
|
||||
|
||||
latex += " \\\\ \n \\hline \n"
|
||||
latex += "\\end{tabular}"
|
||||
|
||||
return latex
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Reglages pour 'vim'
|
||||
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
||||
# cursor: 16 del
|
||||
25
mapytex/stat/number_tools.py
Normal file
25
mapytex/stat/number_tools.py
Normal file
@@ -0,0 +1,25 @@
|
||||
#/usr/bin/env python
|
||||
# -*- coding:Utf-8 -*-
|
||||
|
||||
from functools import wraps
|
||||
|
||||
|
||||
def number_factory(fun):
|
||||
""" Decorator which format returned value """
|
||||
@wraps(fun)
|
||||
def wrapper(*args, **kwargs):
|
||||
ans = fun(*args, **kwargs)
|
||||
try:
|
||||
if ans.is_integer():
|
||||
return int(ans)
|
||||
else:
|
||||
return round(ans, 2)
|
||||
except AttributeError:
|
||||
return ans
|
||||
return wrapper
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Reglages pour 'vim'
|
||||
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
||||
# cursor: 16 del
|
||||
78
mapytex/stat/random_generator.py
Normal file
78
mapytex/stat/random_generator.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#/usr/bin/env python
|
||||
# -*- coding:Utf-8 -*-
|
||||
|
||||
from random import randint, uniform, gauss, choice
|
||||
|
||||
|
||||
def random_generator(length,
|
||||
distrib=gauss, rd_args=(0, 1),
|
||||
nbr_format=lambda x: round(x, 2),
|
||||
v_min=None, v_max=None,
|
||||
exact_mean=None):
|
||||
""" Generate a random list of value
|
||||
|
||||
:param length: length of the dataset
|
||||
:param distrib: Distribution of the data set. It can be a function or string from ["randint", "uniform", "gauss", "choice"]
|
||||
:param rd_args: arguments to pass to distrib
|
||||
:param nbr_format: function which format value
|
||||
:param v_min: minimum accepted value
|
||||
:param v_max: maximum accepted value
|
||||
:param exact_mean: if set, the last generated number will be create in order that the computed mean is exacly equal to "exact_mean"
|
||||
|
||||
>>> random_generator(10)
|
||||
>>> random_generator(10, distrib = uniform, rd_args = (5, 10))
|
||||
>>> random_generator(10, distrib = "uniform", rd_args = (5, 10))
|
||||
>>> random_generator(10, v_min = 0)
|
||||
>>> random_generator(10, exact_mean = 0)
|
||||
>>> random_generator(10, distrib = gauss, rd_args = (50,20), nbr_format = int)
|
||||
|
||||
"""
|
||||
# if exact_mean is set, we create automaticaly only length-1 value
|
||||
if exact_mean is not None:
|
||||
length = length - 1
|
||||
|
||||
# build function to test created values
|
||||
if v_min is None:
|
||||
v1 = lambda x: True
|
||||
else:
|
||||
v1 = lambda x: x >= v_min
|
||||
if v_max is None:
|
||||
v2 = lambda x: True
|
||||
else:
|
||||
v2 = lambda x: x <= v_max
|
||||
validate = lambda x: v1(x) and v2(x)
|
||||
|
||||
# get distrib function
|
||||
distribs = {
|
||||
"gauss": gauss,
|
||||
"uniform": uniform,
|
||||
"randint": randint,
|
||||
"choice": choice}
|
||||
try:
|
||||
distrib(*rd_args)
|
||||
except TypeError:
|
||||
distrib = distribs[distrib]
|
||||
|
||||
# building values
|
||||
data = []
|
||||
for _ in range(length):
|
||||
valid = False
|
||||
while not valid:
|
||||
v = nbr_format(distrib(*rd_args))
|
||||
valid = validate(v)
|
||||
data.append(v)
|
||||
|
||||
# Build last value
|
||||
if exact_mean is not None:
|
||||
last_v = nbr_format((length + 1) * exact_mean - sum(data))
|
||||
if not validate(last_v):
|
||||
raise ValueError(
|
||||
"Can't build the last value. Conflict between v_min/v_max and exact_mean")
|
||||
data.append(last_v)
|
||||
|
||||
return data
|
||||
|
||||
# -----------------------------
|
||||
# Reglages pour 'vim'
|
||||
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
||||
# cursor: 16 del
|
||||
190
mapytex/stat/weightedDataset.py
Normal file
190
mapytex/stat/weightedDataset.py
Normal file
@@ -0,0 +1,190 @@
|
||||
#/usr/bin/env python
|
||||
# -*- coding:Utf-8 -*-
|
||||
|
||||
#
|
||||
#
|
||||
# Ensemble de fonction rendant beaucoup plus pratique la résolution et l'élaboration des exercices de stat au lycée
|
||||
#
|
||||
#
|
||||
|
||||
from math import sqrt, ceil
|
||||
from collections import Counter
|
||||
from .dataset import Dataset
|
||||
from ..calculus.generic import flatten_list
|
||||
from .number_tools import number_factory
|
||||
|
||||
|
||||
class WeightedDataset(dict):
|
||||
""" A weighted dataset with statistics and latex rendering methods
|
||||
|
||||
>>> w = WeightedDataset([1, 2, 3, 4], "Enfants", [10, 11, 12, 13])
|
||||
>>> print(w)
|
||||
{1: 10, 2: 11, 3: 12, 4: 13}
|
||||
>>> w.effectif_total()
|
||||
46
|
||||
>>> w.sum()
|
||||
120
|
||||
>>> w.mean()
|
||||
2.61
|
||||
>>> w.deviation()
|
||||
56.96
|
||||
>>> w.variance()
|
||||
1.24
|
||||
>>> w.sd()
|
||||
1.11
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
datas=[],
|
||||
data_name="Valeurs",
|
||||
weights=[],
|
||||
weight_name="Effectifs"):
|
||||
"""
|
||||
Initiate the WeightedDataset
|
||||
"""
|
||||
if datas and not weights:
|
||||
weightedDatas = Counter(datas)
|
||||
elif datas and weights:
|
||||
if len(datas) != len(weights):
|
||||
raise ValueError("Datas and weights should have same length")
|
||||
else:
|
||||
weightedDatas = {i[0]: i[1] for i in zip(datas, weights)}
|
||||
|
||||
dict.__init__(self, weightedDatas)
|
||||
|
||||
self.data_name = data_name
|
||||
self.weight_name = weight_name
|
||||
|
||||
def add_data(self, data, weight=1):
|
||||
try:
|
||||
self[data] += weight
|
||||
except KeyError:
|
||||
self[data] = weight
|
||||
|
||||
@number_factory
|
||||
def total_weight(self):
|
||||
return sum(self.values())
|
||||
|
||||
def effectif_total(self):
|
||||
return self.total_weight()
|
||||
|
||||
@number_factory
|
||||
def sum(self):
|
||||
""" Not really a sum but the sum of the product of key and values """
|
||||
return sum([k * v for (k, v) in self.items()])
|
||||
|
||||
@number_factory
|
||||
def mean(self):
|
||||
return self.sum() / self.effectif_total()
|
||||
|
||||
@number_factory
|
||||
def deviation(self):
|
||||
""" Compute the deviation (not normalized) """
|
||||
mean = self.mean()
|
||||
return sum([v * (k - mean)**2 for (k, v) in self.items()])
|
||||
|
||||
@number_factory
|
||||
def variance(self):
|
||||
return self.deviation() / self.effectif_total()
|
||||
|
||||
@number_factory
|
||||
def sd(self):
|
||||
""" Compute the standard deviation """
|
||||
return sqrt(self.variance())
|
||||
|
||||
def quartiles(self):
|
||||
"""
|
||||
Calcul les quartiles de la série.
|
||||
|
||||
:return: un tuple avec (min, Q1, Me, Q3, Max)
|
||||
|
||||
>>> w = WeightedDataset(flatten_list([i*[i] for i in range(5)]))
|
||||
>>> w.quartiles()
|
||||
(1, 2, 3, 4, 4)
|
||||
>>> w = WeightedDataset(flatten_list([i*[i] for i in range(6)]))
|
||||
>>> w.quartiles()
|
||||
(1, 3, 4, 5, 5)
|
||||
|
||||
"""
|
||||
return (min(self.keys()),
|
||||
self.quartile(1),
|
||||
self.quartile(2),
|
||||
self.quartile(3),
|
||||
max(self.keys()))
|
||||
|
||||
@number_factory
|
||||
def quartile(self, quartile=1):
|
||||
"""
|
||||
Calcul un quartile de la série.
|
||||
|
||||
:param quartile: quartile à calculer (par defaut 1 -> Q1)
|
||||
|
||||
:return: le quartile demandé
|
||||
|
||||
: Example:
|
||||
|
||||
>>> w = WeightedDataset(flatten_list([i*[i] for i in range(5)]))
|
||||
>>> w.quartile(1)
|
||||
2
|
||||
>>> w.quartile(2)
|
||||
3
|
||||
>>> w.quartile(3)
|
||||
4
|
||||
>>> w = WeightedDataset(flatten_list([i*[i] for i in range(6)]))
|
||||
>>> w.quartile(1)
|
||||
3
|
||||
>>> w.quartile(2)
|
||||
4
|
||||
>>> w.quartile(3)
|
||||
5
|
||||
|
||||
"""
|
||||
# -1 to match with list indexing
|
||||
position = self.posi_quartile(quartile) - 1
|
||||
expanded_values = flatten_list([v * [k] for (k, v) in self.items()])
|
||||
if position.is_integer():
|
||||
return (expanded_values[int(position)] +
|
||||
expanded_values[int(position) + 1]) / 2
|
||||
else:
|
||||
return expanded_values[ceil(position)]
|
||||
|
||||
def posi_quartile(self, quartile=1):
|
||||
"""
|
||||
Calcul la position du quartile
|
||||
|
||||
:param quartile: le quartile concerné
|
||||
|
||||
:return : la position du quartile (arondis à l'entier suppérieur, non arrondis)
|
||||
"""
|
||||
return quartile * self.effectif_total() / 4
|
||||
|
||||
# --------------------------
|
||||
# Rendu latex
|
||||
|
||||
def tabular_latex(self):
|
||||
""" Latex code to display dataset as a tabular """
|
||||
latex = "\\begin{{tabular}}{{|c|*{{{nbr_col}}}{{c|}}}} \n".format(
|
||||
nbr_col=len(self.keys()))
|
||||
latex += "\t \hline \n"
|
||||
data_line = "\t {data_name} ".format(data_name=self.data_name)
|
||||
weight_line = "\t {weight_name} ".format(weight_name=self.weight_name)
|
||||
|
||||
# TODO: Il faudra trouver une solution pour le formatage des données
|
||||
# |sam. janv. 9 13:14:26 EAT 2016
|
||||
for (v, e) in self.items():
|
||||
data_line += "& {val} ".format(val=v)
|
||||
weight_line += "& {eff} ".format(eff=e)
|
||||
|
||||
latex += data_line + "\\\\ \n \t \\hline \n"
|
||||
latex += weight_line + "\\\\ \n \t \\hline \n"
|
||||
latex += "\\end{tabular}"
|
||||
|
||||
return latex
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Reglages pour 'vim'
|
||||
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
||||
# cursor: 16 del
|
||||
Reference in New Issue
Block a user