From 876f583d5175b69cad45a04229cb94b38b9d961e Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Thu, 22 Apr 2021 07:49:51 +0200 Subject: [PATCH] Feat: format_score --- recopytex/datalib/on_score_column.py | 112 +++++++++++++++++++-------- 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/recopytex/datalib/on_score_column.py b/recopytex/datalib/on_score_column.py index 6a05c89..90af725 100644 --- a/recopytex/datalib/on_score_column.py +++ b/recopytex/datalib/on_score_column.py @@ -43,6 +43,57 @@ def is_none_score(x, score_config): return x["score"] in none_values or pd.isnull(x["score"]) +def format_score(x, score_config): + """Make sure that score have the appropriate format + + >>> import pandas as pd + >>> d = {"Eleve":["E1"]*6, + ... "score_rate": [1]*6, + ... "is_leveled":[0]+[1]*5, + ... "score":[0.33, ".", "a", 1, 2, 3], + ... } + >>> score_config = { + ... 'BAD': {'value': 0, 'numeric_value': 0}, + ... 'FEW': {'value': 1, 'numeric_value': 1}, + ... 'NEARLY': {'value': 2, 'numeric_value': 2}, + ... 'GOOD': {'value': 3, 'numeric_value': 3}, + ... 'NOTFILLED': {'value': '', 'numeric_value': 'None'}, + ... 'NOANSWER': {'value': '.', 'numeric_value': 0}, + ... 'ABS': {'value': 'a', 'numeric_value': 'None'} + ... } + >>> df = pd.DataFrame(d) + >>> df.apply(lambda x:format_score(x, score_config), axis=1) + 0 0.33 + 1 . + 2 a + 3 1 + 4 2 + 5 3 + dtype: object + >>> format_score({"score": "1.0", "is_leveled": 1}, score_config) + 1 + >>> format_score({"score": "3.0", "is_leveled": 1}, score_config) + 3 + >>> format_score({"score": 4, "is_leveled": 1}, score_config) + Traceback (most recent call last): + ... + ValueError: 4 () can't be a score + + """ + if not x["is_leveled"]: + return float(x["score"]) + + try: + score = int(float(x["score"])) + except ValueError: + score = str(x["score"]) + + if score in [v["value"] for v in score_config.values()]: + return score + + raise ValueError(f"{x['score']} ({type(x['score'])}) can't be a score") + + def score_to_numeric_score(x, score_config): """Convert a score to the corresponding numeric value @@ -81,7 +132,7 @@ def score_to_numeric_score(x, score_config): def score_to_mark(x, score_max, rounding=lambda x: round(x, 2)): - """Compute the mark from the score + """Compute the mark from "score" which have to be filtered and in numeric form if the item is leveled then the score is multiply by the score_rate otherwise it copies the score @@ -92,39 +143,38 @@ def score_to_mark(x, score_max, rounding=lambda x: round(x, 2)): :return: the mark >>> import pandas as pd - >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, - ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, - ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, - ... "score":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1.2, 2, 3], + >>> d = {"Eleve":["E1"]*7, + ... "score_rate": [1]*7, + ... "is_leveled":[0]+[1]*6, + ... "score":[0.33, "", ".", "a", 1, 2, 3], + ... } + >>> score_config = { + ... 'BAD': {'value': 0, 'numeric_value': 0}, + ... 'FEW': {'value': 1, 'numeric_value': 1}, + ... 'NEARLY': {'value': 2, 'numeric_value': 2}, + ... 'GOOD': {'value': 3, 'numeric_value': 3}, + ... 'NOTFILLED': {'value': '', 'numeric_value': 'None'}, + ... 'NOANSWER': {'value': '.', 'numeric_value': 0}, + ... 'ABS': {'value': 'a', 'numeric_value': 'None'} ... } >>> df = pd.DataFrame(d) - >>> df.loc[0] - Eleve E1 - score_rate 1 - is_leveled 0 - score 1.0 - Name: 0, dtype: object - >>> score_to_mark(df.loc[0], 3) - 1.0 - >>> df.loc[10] - Eleve E2 - score_rate 2 - is_leveled 1 - score 2.0 - Name: 10, dtype: object - >>> score_to_mark(df.loc[10], 3) - 1.33 + >>> df = df[~df.apply(lambda x:is_none_score(x, score_config), axis=1)] + >>> df["score"] = df.apply(lambda x:score_to_numeric_score(x, score_config), axis=1) + >>> df.apply(lambda x:score_to_mark(x, 3), axis=1) + 0 0.33 + 2 0.00 + 4 0.33 + 5 0.67 + 6 1.00 + dtype: float64 >>> from .on_value import round_half_point - >>> score_to_mark(df.loc[10], 3, round_half_point) - 1.5 - >>> df.loc[1] - Eleve E1 - score_rate 1 - is_leveled 0 - score 0.33 - Name: 1, dtype: object - >>> score_to_mark(df.loc[1], 3) - 0.33 + >>> df.apply(lambda x:score_to_mark(x, 3, round_half_point), axis=1) + 0 0.5 + 2 0.0 + 4 0.5 + 5 0.5 + 6 1.0 + dtype: float64 """ if x["is_leveled"]: if x["score"] not in list(range(score_max + 1)):