2016-01-08 13:58:21 +00:00
|
|
|
#/usr/bin/env python
|
|
|
|
# -*- coding:Utf-8 -*-
|
|
|
|
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# Ensemble de fonction rendant beaucoup plus pratique la résolution et l'élaboration des exercices de stat au lycée
|
|
|
|
#
|
|
|
|
#
|
|
|
|
|
2016-01-12 06:51:31 +00:00
|
|
|
# TODO: Rendre toutes les réponses Explicable!! |mar. janv. 12 09:41:00 EAT 2016
|
|
|
|
|
2016-01-08 14:01:39 +00:00
|
|
|
from math import sqrt, ceil
|
2016-01-09 15:40:02 +00:00
|
|
|
from .number_tools import number_factory
|
2016-01-09 15:51:20 +00:00
|
|
|
from .random_generator import random_generator
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-08 14:01:39 +00:00
|
|
|
class Dataset(list):
|
2016-01-09 09:22:32 +00:00
|
|
|
""" A dataset (a list) with statistics and latex rendering methods
|
|
|
|
|
|
|
|
>>> s = Dataset(range(100))
|
|
|
|
>>> s.sum()
|
|
|
|
4950
|
|
|
|
>>> s.mean()
|
|
|
|
49.5
|
|
|
|
>>> s.deviation()
|
2016-01-09 15:40:02 +00:00
|
|
|
83325
|
2016-01-09 09:22:32 +00:00
|
|
|
>>> s.variance()
|
|
|
|
833.25
|
|
|
|
>>> s.sd()
|
2016-01-09 15:40:02 +00:00
|
|
|
28.87
|
2016-01-09 09:22:32 +00:00
|
|
|
"""
|
2016-01-09 15:14:18 +00:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def random(cls, length, data_name = "Valeurs", \
|
2016-01-09 15:51:20 +00:00
|
|
|
distrib = "gauss", rd_args = (0,1), \
|
2016-01-09 15:14:18 +00:00
|
|
|
nbr_format = lambda x:round(x,2), \
|
|
|
|
v_min = None, v_max = None, \
|
|
|
|
exact_mean = None):
|
2016-01-09 15:51:20 +00:00
|
|
|
""" Generate a random list of value
|
2016-01-09 15:14:18 +00:00
|
|
|
|
|
|
|
:param length: length of the dataset
|
|
|
|
:param distrib: Distribution of the data set. It can be a function or string from ["randint", "uniform", "gauss"]
|
|
|
|
:param rd_args: arguments to pass to distrib
|
|
|
|
:param nbr_format: function which format value
|
|
|
|
:param v_min: minimum accepted value
|
|
|
|
:param v_max: maximum accepted value
|
|
|
|
:param exact_mean: if set, the last generated number will be create in order that the computed mean is exacly equal to "exact_mean"
|
|
|
|
"""
|
2016-01-09 15:51:20 +00:00
|
|
|
data = random_generator(length,\
|
|
|
|
distrib, rd_args, \
|
|
|
|
nbr_format, \
|
|
|
|
v_min, v_max, \
|
|
|
|
exact_mean)
|
2016-01-09 15:14:18 +00:00
|
|
|
|
|
|
|
return cls(data, data_name = data_name)
|
2016-01-08 13:58:21 +00:00
|
|
|
|
|
|
|
def __init__(self, data = [], data_name = "Valeurs"):
|
|
|
|
"""
|
|
|
|
Create a numeric data set
|
|
|
|
|
|
|
|
:param data: values of the data set
|
|
|
|
:param data_name: name of the data set
|
|
|
|
"""
|
2016-01-08 14:01:39 +00:00
|
|
|
list.__init__(self, data)
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-08 14:01:39 +00:00
|
|
|
self_name = data_name
|
2016-01-08 13:58:21 +00:00
|
|
|
|
|
|
|
def add_data(self, data):
|
|
|
|
"""Add datas to the data set
|
|
|
|
|
|
|
|
:param data: datas
|
|
|
|
"""
|
|
|
|
try:
|
2016-01-08 14:01:39 +00:00
|
|
|
self += data
|
2016-01-08 13:58:21 +00:00
|
|
|
except TypeError:
|
2016-01-08 14:01:39 +00:00
|
|
|
self += [data]
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-09 15:14:18 +00:00
|
|
|
# --------------------------
|
|
|
|
# Stat tools
|
|
|
|
|
2016-01-09 09:22:32 +00:00
|
|
|
def effectif_total(self):
|
|
|
|
return len(self)
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-09 15:40:02 +00:00
|
|
|
@number_factory
|
2016-01-08 13:58:21 +00:00
|
|
|
def sum(self):
|
2016-01-08 14:01:39 +00:00
|
|
|
return sum(self)
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-09 15:40:02 +00:00
|
|
|
@number_factory
|
2016-01-08 13:58:21 +00:00
|
|
|
def mean(self):
|
2016-01-09 09:22:32 +00:00
|
|
|
return self.sum()/self.effectif_total()
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-09 15:40:02 +00:00
|
|
|
@number_factory
|
2016-01-08 13:58:21 +00:00
|
|
|
def deviation(self):
|
|
|
|
""" Compute the deviation (not normalized) """
|
|
|
|
mean = self.mean()
|
2016-01-08 14:01:39 +00:00
|
|
|
return sum([(x - mean)**2 for x in self])
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-09 15:40:02 +00:00
|
|
|
@number_factory
|
2016-01-08 13:58:21 +00:00
|
|
|
def variance(self):
|
2016-01-09 09:22:32 +00:00
|
|
|
return self.deviation()/self.effectif_total()
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-09 15:40:02 +00:00
|
|
|
@number_factory
|
2016-01-08 13:58:21 +00:00
|
|
|
def sd(self):
|
|
|
|
""" Compute the standard deviation """
|
|
|
|
return sqrt(self.variance())
|
|
|
|
|
|
|
|
def quartiles(self):
|
|
|
|
"""
|
2016-01-09 09:22:32 +00:00
|
|
|
Calcul les quartiles de la série.
|
|
|
|
|
|
|
|
:return: un tuple avec (min, Q1, Me, Q3, Max)
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-09 09:22:32 +00:00
|
|
|
>>> w = Dataset(range(12))
|
|
|
|
>>> w.quartiles()
|
|
|
|
(0, 2.5, 5.5, 8.5, 11)
|
2016-01-08 13:58:21 +00:00
|
|
|
"""
|
2016-01-08 14:01:39 +00:00
|
|
|
return (min(self) , self.quartile(1) , self.quartile(2) , self.quartile(3), max(self))
|
2016-01-08 13:58:21 +00:00
|
|
|
|
2016-01-09 15:40:02 +00:00
|
|
|
@number_factory
|
2016-01-08 13:58:21 +00:00
|
|
|
def quartile(self, quartile = 1):
|
|
|
|
"""
|
|
|
|
Calcul un quartile de la série.
|
|
|
|
|
|
|
|
:param quartile: quartile à calculer (par defaut 1 -> Q1)
|
|
|
|
|
|
|
|
:return: le quartile demandé
|
|
|
|
|
|
|
|
: Example:
|
|
|
|
|
2016-01-09 09:22:32 +00:00
|
|
|
>>> w = Dataset(range(12))
|
|
|
|
>>> w.quartile(1)
|
|
|
|
2.5
|
|
|
|
>>> w.quartile(2)
|
|
|
|
5.5
|
|
|
|
>>> w.quartile(3)
|
|
|
|
8.5
|
|
|
|
>>> w = Dataset(range(14))
|
|
|
|
>>> w.quartile(1)
|
|
|
|
3
|
|
|
|
>>> w.quartile(2)
|
|
|
|
6.5
|
|
|
|
>>> w.quartile(3)
|
|
|
|
10
|
2016-01-08 13:58:21 +00:00
|
|
|
|
|
|
|
"""
|
2016-01-09 09:22:32 +00:00
|
|
|
# -1 to match with list indexing
|
|
|
|
position = self.posi_quartile(quartile) - 1
|
|
|
|
if position.is_integer():
|
|
|
|
return (self[int(position)] + self[int(position)+1])/2
|
|
|
|
else:
|
|
|
|
return self[ceil(position)]
|
2016-01-08 13:58:21 +00:00
|
|
|
|
|
|
|
def posi_quartile(self, quartile = 1):
|
|
|
|
"""
|
|
|
|
Calcul la position du quartile
|
|
|
|
|
|
|
|
:param quartile: le quartile concerné
|
|
|
|
|
|
|
|
:return : la position du quartile (arondis à l'entier suppérieur, non arrondis)
|
|
|
|
"""
|
2016-01-09 09:22:32 +00:00
|
|
|
return quartile * self.effectif_total() / 4
|
|
|
|
|
2016-01-08 13:58:21 +00:00
|
|
|
# --------------------------
|
|
|
|
# Rendu latex
|
|
|
|
|
|
|
|
def tabular_latex(self, nbr_lines = 1):
|
2016-01-09 10:20:20 +00:00
|
|
|
""" Latex code to display dataset as a tabular """
|
2016-01-09 09:22:32 +00:00
|
|
|
d_per_line = self.effectif_total() // nbr_lines
|
|
|
|
d_last_line = self.effectif_total() % d_per_line
|
|
|
|
splited_data = [self[x:x+d_per_line] for x in range(0, self.effectif_total(), d_per_line)]
|
2016-01-08 13:58:21 +00:00
|
|
|
# On ajoute les éléments manquant pour la dernière line
|
|
|
|
if d_last_line:
|
|
|
|
splited_data[-1] += [' ']*(d_per_line - d_last_line)
|
|
|
|
|
|
|
|
# Construction du tableau
|
2016-01-09 10:20:20 +00:00
|
|
|
latex = "\\begin{{tabular}}{{|c|*{{{nbr_col}}}{{c|}}}} \n".format(nbr_col = d_per_line)
|
2016-01-08 13:58:21 +00:00
|
|
|
latex += "\t\t \hline \n"
|
|
|
|
|
|
|
|
d_lines = [' & '.join(map(str,l)) for l in splited_data]
|
|
|
|
latex += " \\\\ \n \\hline \n".join(d_lines)
|
|
|
|
|
|
|
|
latex += " \\\\ \n \\hline \n"
|
2016-01-09 10:20:20 +00:00
|
|
|
latex += "\\end{tabular}"
|
2016-01-08 13:58:21 +00:00
|
|
|
|
|
|
|
return latex
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
# Reglages pour 'vim'
|
|
|
|
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
|
|
|
# cursor: 16 del
|
|
|
|
|