diff --git a/pymath/stat/dataset.py b/pymath/stat/dataset.py index b0932c8..7640381 100644 --- a/pymath/stat/dataset.py +++ b/pymath/stat/dataset.py @@ -8,8 +8,8 @@ # from math import sqrt, ceil -from random import randint, uniform, gauss from .number_tools import number_factory +from .random_generator import random_generator class Dataset(list): """ A dataset (a list) with statistics and latex rendering methods @@ -29,11 +29,11 @@ class Dataset(list): @classmethod def random(cls, length, data_name = "Valeurs", \ - distrib = gauss, rd_args = (0,1), \ + distrib = "gauss", rd_args = (0,1), \ nbr_format = lambda x:round(x,2), \ v_min = None, v_max = None, \ exact_mean = None): - """ Create a random Dataset. + """ Generate a random list of value :param length: length of the dataset :param distrib: Distribution of the data set. It can be a function or string from ["randint", "uniform", "gauss"] @@ -42,53 +42,12 @@ class Dataset(list): :param v_min: minimum accepted value :param v_max: maximum accepted value :param exact_mean: if set, the last generated number will be create in order that the computed mean is exacly equal to "exact_mean" - - : Exemple: - >>> Dataset.random(10) - >>> Dataset.random(10, distrib = uniform, rd_args = (5, 10)) - >>> Dataset.random(10, distrib = "uniform", rd_args = (5, 10)) - >>> Dataset.random(10, v_min = 0) - >>> Dataset.random(10, exact_mean = 0) - >>> Dataset.random(10, distrib = gauss, rd_args = (50,20), nbr_format = int) - """ - # if exact_mean is set, we create automaticaly only length-1 value - if exact_mean != None: - length = length - 1 - - # build function to test created values - if v_min == None: - v1 = lambda x: True - else: - v1 = lambda x: x >= v_min - if v_max == None: - v2 = lambda x: True - else: - v2 = lambda x: x <= v_max - validate = lambda x : v1(x) and v2(x) - - # get distrib function - distribs = {"gauss": gauss, "uniform": uniform, "randint":randint} - try: - distrib(*rd_args) - except TypeError: - distrib = distribs[distrib] - - # building values - data = [] - for _ in range(length): - valid = False - while not valid: - v = nbr_format(distrib(*rd_args)) - valid = validate(v) - data.append(v) - - # Build last value - if exact_mean != None: - last_v = nbr_format((length+1) * exact_mean - sum(data)) - if not validate(last_v): - raise ValueError("Can't build the last value. Conflict between v_min/v_max and exact_mean") - data.append(last_v) + data = random_generator(length,\ + distrib, rd_args, \ + nbr_format, \ + v_min, v_max, \ + exact_mean) return cls(data, data_name = data_name) diff --git a/pymath/stat/random_generator.py b/pymath/stat/random_generator.py new file mode 100644 index 0000000..a22e8de --- /dev/null +++ b/pymath/stat/random_generator.py @@ -0,0 +1,75 @@ +#/usr/bin/env python +# -*- coding:Utf-8 -*- + +from random import randint, uniform, gauss + + +def random_generator(length,\ + distrib = gauss, rd_args = (0,1), \ + nbr_format = lambda x:round(x,2), \ + v_min = None, v_max = None, \ + exact_mean = None): + """ Generate a random list of value + + :param length: length of the dataset + :param distrib: Distribution of the data set. It can be a function or string from ["randint", "uniform", "gauss"] + :param rd_args: arguments to pass to distrib + :param nbr_format: function which format value + :param v_min: minimum accepted value + :param v_max: maximum accepted value + :param exact_mean: if set, the last generated number will be create in order that the computed mean is exacly equal to "exact_mean" + + : Exemple: + >>> random_generator(10) + >>> random_generator(10, distrib = uniform, rd_args = (5, 10)) + >>> random_generator(10, distrib = "uniform", rd_args = (5, 10)) + >>> random_generator(10, v_min = 0) + >>> random_generator(10, exact_mean = 0) + >>> random_generator(10, distrib = gauss, rd_args = (50,20), nbr_format = int) + + """ + # if exact_mean is set, we create automaticaly only length-1 value + if exact_mean != None: + length = length - 1 + + # build function to test created values + if v_min == None: + v1 = lambda x: True + else: + v1 = lambda x: x >= v_min + if v_max == None: + v2 = lambda x: True + else: + v2 = lambda x: x <= v_max + validate = lambda x : v1(x) and v2(x) + + # get distrib function + distribs = {"gauss": gauss, "uniform": uniform, "randint":randint} + try: + distrib(*rd_args) + except TypeError: + distrib = distribs[distrib] + + # building values + data = [] + for _ in range(length): + valid = False + while not valid: + v = nbr_format(distrib(*rd_args)) + valid = validate(v) + data.append(v) + + # Build last value + if exact_mean != None: + last_v = nbr_format((length+1) * exact_mean - sum(data)) + if not validate(last_v): + raise ValueError("Can't build the last value. Conflict between v_min/v_max and exact_mean") + data.append(last_v) + + return data + +# ----------------------------- +# Reglages pour 'vim' +# vim:set autoindent expandtab tabstop=4 shiftwidth=4: +# cursor: 16 del +