Mapytex/pymath/stat/weightedDataset.py

213 lines
5.7 KiB
Python
Raw Normal View History

2016-01-08 13:58:21 +00:00
#/usr/bin/env python
# -*- coding:Utf-8 -*-
#
#
# Ensemble de fonction rendant beaucoup plus pratique la résolution et l'élaboration des exercices de stat au lycée
#
#
2016-01-09 09:34:46 +00:00
from math import sqrt, ceil
from collections import Counter
from .dataset import Dataset
from ..calculus.generic import flatten_list
class WeightedDataset(dict):
""" A weighted dataset with statistics and latex rendering methods
>>> w = WeightedDataset([1, 2, 3, 4], "Enfants", [10, 11, 12, 13])
>>> print(w)
{1: 10, 2: 11, 3: 12, 4: 13}
>>> w.effectif_total()
46
>>> w.sum()
120
>>> w.mean()
2.608695652173913
>>> w.deviation()
56.95652173913044
>>> w.variance()
1.2381852551984878
>>> w.sd()
1.1127377297451937
"""
def __init__(self, datas = [], data_name = "Valeurs", weights = [], weight_name = "Effectifs"):
2016-01-08 13:58:21 +00:00
"""
2016-01-09 09:34:46 +00:00
Initiate the WeightedDataset
2016-01-08 13:58:21 +00:00
"""
2016-01-09 09:34:46 +00:00
if datas and not weights:
weightedDatas = Counter(datas)
elif datas and weights:
if len(datas) != len(weights):
raise ValueError("Datas and weights should have same length")
else:
weightedDatas = {i[0]:i[1] for i in zip(datas, weights)}
2016-01-08 13:58:21 +00:00
2016-01-09 09:34:46 +00:00
dict.__init__(self, weightedDatas)
self.data_name = data_name
self.weight_name = weight_name
2016-01-08 13:58:21 +00:00
2016-01-09 09:34:46 +00:00
def add_data(self, data, weight = 1):
try:
self[data] += weight
except KeyError:
self[data] = weight
2016-01-08 13:58:21 +00:00
2016-01-09 09:34:46 +00:00
def total_weight(self):
return sum(self.values())
2016-01-08 13:58:21 +00:00
2016-01-09 09:34:46 +00:00
def effectif_total(self):
return self.total_weight()
2016-01-08 13:58:21 +00:00
2016-01-09 09:34:46 +00:00
def sum(self):
""" Not really a sum but the sum of the product of key and values """
return sum([k*v for (k,v) in self.items()])
2016-01-08 13:58:21 +00:00
2016-01-09 09:34:46 +00:00
def mean(self):
return self.sum()/self.effectif_total()
2016-01-08 13:58:21 +00:00
2016-01-09 09:34:46 +00:00
def deviation(self):
""" Compute the deviation (not normalized) """
mean = self.mean()
return sum([v*(k - mean)**2 for (k,v) in self.items()])
2016-01-08 13:58:21 +00:00
def variance(self):
2016-01-09 09:34:46 +00:00
return self.deviation()/self.effectif_total()
2016-01-08 13:58:21 +00:00
2016-01-09 09:34:46 +00:00
def sd(self):
""" Compute the standard deviation """
2016-01-08 13:58:21 +00:00
return sqrt(self.variance())
def quartiles(self):
"""
Calcul les quartiles de la série.
:return: un tuple avec (min, Q1, Me, Q3, Max)
2016-01-09 09:34:46 +00:00
: Exemple:
>>> w = WeightedDataset(flatten_list([i*[i] for i in range(5)]))
>>> w.quartiles()
(1, 2, 3.0, 4, 4)
>>> w = WeightedDataset(flatten_list([i*[i] for i in range(6)]))
>>> w.quartiles()
(1, 3, 4, 5, 5)
2016-01-08 13:58:21 +00:00
"""
2016-01-09 09:34:46 +00:00
return (min(self.keys()) , self.quartile(1) , self.quartile(2) , self.quartile(3), max(self.keys()))
2016-01-08 13:58:21 +00:00
def quartile(self, quartile = 1):
"""
Calcul un quartile de la série.
:param quartile: quartile à calculer (par defaut 1 -> Q1)
:return: le quartile demandé
: Example:
2016-01-09 09:34:46 +00:00
>>> w = WeightedDataset(flatten_list([i*[i] for i in range(5)]))
>>> w.quartile(1)
2
>>> w.quartile(2)
3.0
>>> w.quartile(3)
4
>>> w = WeightedDataset(flatten_list([i*[i] for i in range(6)]))
>>> w.quartile(1)
3
>>> w.quartile(2)
4
>>> w.quartile(3)
5
"""
# -1 to match with list indexing
position = self.posi_quartile(quartile) - 1
expanded_values = flatten_list([v*[k] for (k,v) in self.items()])
if position.is_integer():
return (expanded_values[int(position)] + expanded_values[int(position)+1])/2
else:
return expanded_values[ceil(position)]
2016-01-08 13:58:21 +00:00
def posi_quartile(self, quartile = 1):
"""
Calcul la position du quartile
:param quartile: le quartile concerné
:return : la position du quartile (arondis à l'entier suppérieur, non arrondis)
"""
2016-01-09 09:34:46 +00:00
return quartile * self.effectif_total() / 4
2016-01-08 13:58:21 +00:00
# --------------------------
# Rendu latex
def tabular_latex(self):
""" Renvoie le code latex pour afficher le tableau
:return : le code latex pour afficher le tableau
"""
latex = "\\begin{{center}} \n \t \\begin{{tabular}}{{|c|*{{{nbr_col}}}{{c|}}}} \n".format(nbr_col = len(self.serie))
latex += "\t\t \hline \n"
latex += "\t\t {val_name} ".format(val_name = self.val_name)
eff_ligne = "\t\t {eff_name} ".format(eff_name = self.eff_name)
for (v,e) in self.serieCouple:
latex += "& {val:.2f} ".format(val = v)
eff_ligne += "& {eff:.2f} ".format(eff = e)
latex += "\\\\ \hline \n"
latex += eff_ligne + " \\\\ \hline \n"
latex += "\t \\end{tabular} \n \\end{center}"
return latex
if __name__ == '__main__':
valeurs = [65, 75, 85, 95, 105, 115, 125, 135]
valeurs.sort()
print(valeurs)
effectifs = [ 15, 2, 21, 24, 12, 9, 5, 2]
s = Serie()
s.set_values(valeurs, effectifs)
print(s.effectif_total)
print(s.valeurs)
print(s.effectifs)
print(s.serie)
# print(s.tabular_latex())
# print("Moyenne ", s.moyenne())
# print(s.variance())
# print(s.ecart_type())
print("\n-----------------------\n")
print(s.moyenne_latex())
print("\n-----------------------\n")
print(s.variance_latex())
print("\n-----------------------\n")
print(s.ecart_type_latex())
# print(s.quartiles())
print("\n-----------------------\n")
print(s.quartile_latex())
print(s.quartile_latex(1))
print(s.quartile_latex(3))
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del