From 8ec24a24b333d6780aab77c60d8e992b03a47485 Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Mon, 19 Apr 2021 21:54:44 +0200 Subject: [PATCH] Feat: delete functions on dataframe and move it to functions on rows --- recopytex/datalib/on_score_column.py | 79 +++++++++- recopytex/datalib/on_score_dataframe.py | 196 ------------------------ 2 files changed, 75 insertions(+), 200 deletions(-) delete mode 100644 recopytex/datalib/on_score_dataframe.py diff --git a/recopytex/datalib/on_score_column.py b/recopytex/datalib/on_score_column.py index 89655df..4592405 100644 --- a/recopytex/datalib/on_score_column.py +++ b/recopytex/datalib/on_score_column.py @@ -4,6 +4,81 @@ from math import ceil +def is_none_score(x, score_config): + """Is a score correspond to a None numeric_value which + + >>> import pandas as pd + >>> d = {"Eleve":["E1"]*7, + ... "score_rate": [1]*7, + ... "is_leveled":[0]+[1]*6, + ... "score":[0.33, "", ".", "a", 1, 2, 3], + ... } + >>> score_config = { + ... 'BAD': {'value': 0, 'numeric_value': 0}, + ... 'FEW': {'value': 1, 'numeric_value': 1}, + ... 'NEARLY': {'value': 2, 'numeric_value': 2}, + ... 'GOOD': {'value': 3, 'numeric_value': 3}, + ... 'NOTFILLED': {'value': '', 'numeric_value': 'None'}, + ... 'NOANSWER': {'value': '.', 'numeric_value': 0}, + ... 'ABS': {'value': 'a', 'numeric_value': 'None'} + ... } + >>> df = pd.DataFrame(d) + >>> df.apply(lambda x:is_none_score(x, score_config), axis=1) + 0 False + 1 True + 2 False + 3 True + 4 False + 5 False + 6 False + dtype: bool + + """ + none_values = [ + v["value"] + for v in score_config.values() + if str(v["numeric_value"]).lower() == "none" + ] + return x["score"] in none_values + + +def score_to_numeric_score(x, score_config): + """Convert a score to the corresponding numeric value + + >>> import pandas as pd + >>> d = {"Eleve":["E1"]*7, + ... "score_rate": [1]*7, + ... "is_leveled":[0]+[1]*6, + ... "score":[0.33, "", ".", "a", 1, 2, 3], + ... } + >>> score_config = { + ... 'BAD': {'value': 0, 'numeric_value': 0}, + ... 'FEW': {'value': 1, 'numeric_value': 1}, + ... 'NEARLY': {'value': 2, 'numeric_value': 2}, + ... 'GOOD': {'value': 3, 'numeric_value': 3}, + ... 'NOTFILLED': {'value': '', 'numeric_value': 'None'}, + ... 'NOANSWER': {'value': '.', 'numeric_value': 0}, + ... 'ABS': {'value': 'a', 'numeric_value': 'None'} + ... } + >>> df = pd.DataFrame(d) + >>> df.apply(lambda x:score_to_numeric_score(x, score_config), axis=1) + 0 0.33 + 1 None + 2 0 + 3 None + 4 1 + 5 2 + 6 3 + dtype: object + + """ + if x["is_leveled"]: + replacements = {v["value"]: v["numeric_value"] for v in score_config.values()} + return replacements[x["score"]] + + return x["score"] + + def score_to_mark(x, score_max, rounding=lambda x: round(x, 2)): """Compute the mark from the score @@ -125,10 +200,6 @@ def score_to_level(x, level_max=3): return int(ceil(x["score"] / x["score_rate"] * level_max)) -def score_to_numeric_score(x, score_config): - pass - - # ----------------------------- # Reglages pour 'vim' # vim:set autoindent expandtab tabstop=4 shiftwidth=4: diff --git a/recopytex/datalib/on_score_dataframe.py b/recopytex/datalib/on_score_dataframe.py deleted file mode 100644 index 2310c4e..0000000 --- a/recopytex/datalib/on_score_dataframe.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - -from .on_score_column import score_to_mark, score_to_level -import pandas as pd - - -def compute_marks(df, score_max, rounding=lambda x: round(x, 2)): - """Compute the mark for the dataframe - - apply score_to_mark to each row - - :param df: DataFrame with "score" (need to be number), "is_leveled" and "score_rate" columns. - - >>> import pandas as pd - >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, - ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, - ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, - ... "score":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], - ... } - >>> df = pd.DataFrame(d) - >>> df - Eleve score_rate is_leveled score - 0 E1 1 0 1.000 - 1 E1 1 0 0.330 - 2 E1 2 0 2.000 - 3 E1 2 0 1.500 - 4 E1 2 1 1.000 - 5 E1 2 1 3.000 - 6 E2 1 0 0.666 - 7 E2 1 0 1.000 - 8 E2 2 0 1.500 - 9 E2 2 0 1.000 - 10 E2 2 1 2.000 - 11 E2 2 1 3.000 - >>> compute_marks(df, 3) - 0 1.00 - 1 0.33 - 2 2.00 - 3 1.50 - 4 0.67 - 5 2.00 - 6 0.67 - 7 1.00 - 8 1.50 - 9 1.00 - 10 1.33 - 11 2.00 - dtype: float64 - >>> from .on_value import round_half_point - >>> compute_marks(df, 3, round_half_point) - 0 1.0 - 1 0.5 - 2 2.0 - 3 1.5 - 4 0.5 - 5 2.0 - 6 0.5 - 7 1.0 - 8 1.5 - 9 1.0 - 10 1.5 - 11 2.0 - dtype: float64 - """ - return df[["score", "is_leveled", "score_rate"]].apply( - lambda x: score_to_mark(x, score_max, rounding), axis=1 - ) - - -def compute_level(df, level_max=3): - """Compute level for the dataframe - - Applies score_to_level to each row - - :param df: DataFrame with "score", "is_leveled" and "score_rate" columns. - :return: Columns with level - - >>> import pandas as pd - >>> import numpy as np - >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, - ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, - ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, - ... "score":[0, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], - ... } - >>> df = pd.DataFrame(d) - >>> compute_level(df) - 0 0 - 1 1 - 2 3 - 3 3 - 4 1 - 5 3 - 6 2 - 7 3 - 8 3 - 9 2 - 10 2 - 11 3 - dtype: int64 - """ - return df[["score", "is_leveled", "score_rate"]].apply( - lambda x: score_to_level(x, level_max), axis=1 - ) - - -def compute_normalized(df, rounding=lambda x: round(x, 2)): - """Compute the normalized mark (Mark / score_rate) - - :param df: DataFrame with "Mark" and "score_rate" columns - :return: column with normalized mark - - >>> import pandas as pd - >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, - ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, - ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, - ... "score":[0, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], - ... } - >>> df = pd.DataFrame(d) - >>> df["mark"] = compute_marks(df, 3) - >>> compute_normalized(df) - 0 0.00 - 1 0.33 - 2 1.00 - 3 0.75 - 4 0.34 - 5 1.00 - 6 0.67 - 7 1.00 - 8 0.75 - 9 0.50 - 10 0.66 - 11 1.00 - dtype: float64 - """ - return rounding(df["mark"] / df["score_rate"]) - - -def filter_none_score(df, score_config): - """Filter rows where scores have None numeric values - - :example: - - >>> import pandas as pd - >>> d = {"Eleve":["E1"]*7, - ... "score_rate": [1]*7, - ... "is_leveled":[0]+[1]*6, - ... "score":[0.33, "", ".", "a", 1, 2, 3], - ... } - >>> score_config = { - ... 'BAD': {'value': 0, 'numeric_value': 0}, - ... 'FEW': {'value': 1, 'numeric_value': 1}, - ... 'NEARLY': {'value': 2, 'numeric_value': 2}, - ... 'GOOD': {'value': 3, 'numeric_value': 3}, - ... 'NOTFILLED': {'value': '', 'numeric_value': 'None'}, - ... 'NOANSWER': {'value': '.', 'numeric_value': 0}, - ... 'ABS': {'value': 'a', 'numeric_value': 'None'} - ... } - >>> df = pd.DataFrame(d) - >>> filter_none_score(df, score_config) - Eleve score_rate is_leveled score - 0 E1 1 0 0.33 - 2 E1 1 1 . - 4 E1 1 1 1 - 5 E1 1 1 2 - 6 E1 1 1 3 - """ - not_leveled_df = df[df["is_leveled"] != 1] - leveled_df = df[df["is_leveled"] == 1] - - not_none_values = [ - v["value"] - for v in score_config.values() - if str(v["numeric_value"]).lower() != "none" - ] - filtered_leveled_df = leveled_df[leveled_df["score"].isin(not_none_values)] - - return pd.concat([not_leveled_df, filtered_leveled_df]) - - -def score_to_numeric_score(df, score_config): - """Transform a score to the corresponding numeric value - - >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, - ... "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, - ... "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2, - ... "score":[0, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], - ... } - """ - pass - - -# ----------------------------- -# Reglages pour 'vim' -# vim:set autoindent expandtab tabstop=4 shiftwidth=4: -# cursor: 16 del