From 10b9954c05d0008710c61a7fde6e73bb7b8af8d7 Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Sun, 18 Apr 2021 22:43:46 +0200 Subject: [PATCH] Feat: import score dataframe functions --- recopytex/datalib/__init__.py | 0 recopytex/datalib/on_score_column.py | 131 ++++++++++++++++++++++ recopytex/datalib/on_score_dataframe.py | 141 ++++++++++++++++++++++++ recopytex/datalib/on_value.py | 40 +++++++ 4 files changed, 312 insertions(+) create mode 100644 recopytex/datalib/__init__.py create mode 100644 recopytex/datalib/on_score_column.py create mode 100644 recopytex/datalib/on_score_dataframe.py create mode 100644 recopytex/datalib/on_value.py diff --git a/recopytex/datalib/__init__.py b/recopytex/datalib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recopytex/datalib/on_score_column.py b/recopytex/datalib/on_score_column.py new file mode 100644 index 0000000..3df3c5c --- /dev/null +++ b/recopytex/datalib/on_score_column.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# encoding: utf-8 + +from math import ceil + + +def score_to_mark(x, score_max, rounding=lambda x: round(x, 2)): + """Compute the mark from the score + + if the item is leveled then the score is multiply by the score_rate + otherwise it copies the score + + :param x: dictionnary with "is_leveled", "score" and "score_rate" keys + :param score_max: + :param rounding: rounding mark function + :return: the mark + + >>> import pandas as pd + >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, + ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, + ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, + ... "score":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1.2, 2, 3], + ... } + >>> df = pd.DataFrame(d) + >>> df.loc[0] + Eleve E1 + score_rate 1 + is_leveled 0 + score 1.0 + Name: 0, dtype: object + >>> score_to_mark(df.loc[0], 3) + 1.0 + >>> df.loc[10] + Eleve E2 + score_rate 2 + is_leveled 1 + score 2.0 + Name: 10, dtype: object + >>> score_to_mark(df.loc[10], 3) + 1.33 + >>> from .on_value import round_half_point + >>> score_to_mark(df.loc[10], 3, round_half_point) + 1.5 + >>> df.loc[1] + Eleve E1 + score_rate 1 + is_leveled 0 + score 0.33 + Name: 1, dtype: object + >>> score_to_mark(df.loc[1], 3) + 0.33 + """ + if x["is_leveled"]: + if x["score"] not in list(range(score_max + 1)): + raise ValueError(f"The evaluation is out of range: {x['score']} at {x}") + return rounding(x["score"] * x["score_rate"] / score_max) + + return rounding(x["score"]) + + +def score_to_level(x, level_max=3): + """Compute the level (".",0,1,2,3). + + :param x: dictionnary with "is_leveled", "score" and "score_rate" keys + :return: the level + + >>> import pandas as pd + >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, + ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, + ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, + ... "score":[1, 0.33, 0, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], + ... } + >>> df = pd.DataFrame(d) + >>> df + Eleve score_rate is_leveled score + 0 E1 1 0 1.000 + 1 E1 1 0 0.330 + 2 E1 2 0 0.000 + 3 E1 2 0 1.500 + 4 E1 2 1 1.000 + 5 E1 2 1 3.000 + 6 E2 1 0 0.666 + 7 E2 1 0 1.000 + 8 E2 2 0 1.500 + 9 E2 2 0 1.000 + 10 E2 2 1 2.000 + 11 E2 2 1 3.000 + >>> df.apply(score_to_level, axis=1) + 0 3 + 1 1 + 2 0 + 3 3 + 4 1 + 5 3 + 6 2 + 7 3 + 8 3 + 9 2 + 10 2 + 11 3 + dtype: int64 + >>> df.apply(lambda x: score_to_level(x, 5), axis=1) + 0 5 + 1 2 + 2 0 + 3 4 + 4 1 + 5 3 + 6 4 + 7 5 + 8 4 + 9 3 + 10 2 + 11 3 + dtype: int64 + """ + if x["is_leveled"]: + return int(x["score"]) + + if x["score"] > x["score_rate"]: + raise ValueError( + f"score is higher than score_rate ({x['score']} > {x['score_rate']}) for {x}" + ) + + return int(ceil(x["score"] / x["score_rate"] * level_max)) + + +# ----------------------------- +# Reglages pour 'vim' +# vim:set autoindent expandtab tabstop=4 shiftwidth=4: +# cursor: 16 del diff --git a/recopytex/datalib/on_score_dataframe.py b/recopytex/datalib/on_score_dataframe.py new file mode 100644 index 0000000..af8e3d1 --- /dev/null +++ b/recopytex/datalib/on_score_dataframe.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python +# encoding: utf-8 + +from .on_score_column import score_to_mark, score_to_level + + +def compute_marks(df, score_max, rounding=lambda x: round(x, 2)): + """Compute the mark for the dataframe + + apply score_to_mark to each row + + :param df: DataFrame with "score", "is_leveled" and "score_rate" columns. + + >>> import pandas as pd + >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, + ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, + ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, + ... "score":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], + ... } + >>> df = pd.DataFrame(d) + >>> df + Eleve score_rate is_leveled score + 0 E1 1 0 1.000 + 1 E1 1 0 0.330 + 2 E1 2 0 2.000 + 3 E1 2 0 1.500 + 4 E1 2 1 1.000 + 5 E1 2 1 3.000 + 6 E2 1 0 0.666 + 7 E2 1 0 1.000 + 8 E2 2 0 1.500 + 9 E2 2 0 1.000 + 10 E2 2 1 2.000 + 11 E2 2 1 3.000 + >>> compute_marks(df, 3) + 0 1.00 + 1 0.33 + 2 2.00 + 3 1.50 + 4 0.67 + 5 2.00 + 6 0.67 + 7 1.00 + 8 1.50 + 9 1.00 + 10 1.33 + 11 2.00 + dtype: float64 + >>> from .on_value import round_half_point + >>> compute_marks(df, 3, round_half_point) + 0 1.0 + 1 0.5 + 2 2.0 + 3 1.5 + 4 0.5 + 5 2.0 + 6 0.5 + 7 1.0 + 8 1.5 + 9 1.0 + 10 1.5 + 11 2.0 + dtype: float64 + """ + return df[["score", "is_leveled", "score_rate"]].apply( + lambda x: score_to_mark(x, score_max, rounding), axis=1 + ) + + +def compute_level(df, level_max=3): + """Compute level for the dataframe + + Applies score_to_level to each row + + :param df: DataFrame with "score", "is_leveled" and "score_rate" columns. + :return: Columns with level + + >>> import pandas as pd + >>> import numpy as np + >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, + ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, + ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, + ... "score":[0, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], + ... } + >>> df = pd.DataFrame(d) + >>> compute_level(df) + 0 0 + 1 1 + 2 3 + 3 3 + 4 1 + 5 3 + 6 2 + 7 3 + 8 3 + 9 2 + 10 2 + 11 3 + dtype: int64 + """ + return df[["score", "is_leveled", "score_rate"]].apply( + lambda x: score_to_level(x, level_max), axis=1 + ) + + +def compute_normalized(df, rounding=lambda x: round(x, 2)): + """Compute the normalized mark (Mark / score_rate) + + :param df: DataFrame with "Mark" and "score_rate" columns + :return: column with normalized mark + + >>> import pandas as pd + >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, + ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, + ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, + ... "score":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], + ... } + >>> df = pd.DataFrame(d) + >>> df["mark"] = compute_marks(df, 3) + >>> compute_normalized(df) + 0 1.00 + 1 0.33 + 2 1.00 + 3 0.75 + 4 0.34 + 5 1.00 + 6 0.67 + 7 1.00 + 8 0.75 + 9 0.50 + 10 0.66 + 11 1.00 + dtype: float64 + """ + return rounding(df["mark"] / df["score_rate"]) + + +# ----------------------------- +# Reglages pour 'vim' +# vim:set autoindent expandtab tabstop=4 shiftwidth=4: +# cursor: 16 del diff --git a/recopytex/datalib/on_value.py b/recopytex/datalib/on_value.py new file mode 100644 index 0000000..5d88997 --- /dev/null +++ b/recopytex/datalib/on_value.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# encoding: utf-8 + +from math import ceil, floor + + +def round_with_base(x, base=0.5): + """Round to a multiple of base + + :example: + >>> round_with_base(1.33, 0.1) + 1.3 + >>> round_with_base(1.33, 0.2) + 1.4 + >>> round_with_base(1.33, 1) + 1 + >>> round_with_base(1.33, 2) + 2 + """ + try: + prec = len(str(base).split(".")[1]) + except IndexError: + prec = 0 + return round(base * round(float(x) / base), prec) + + +def round_half_point(x): + """Round to nearest half point + + :example: + >>> round_half_point(1.33) + 1.5 + >>> round_half_point(1.1) + 1.0 + >>> round_half_point(1.66) + 1.5 + >>> round_half_point(1.76) + 2.0 + """ + return round_with_base(x, base=0.5)