Feat: delete functions on dataframe and move it to functions on rows

2021-04-19 21:54:44 +02:00
parent 2e86b3a0a2
commit 8ec24a24b3
2 changed files with 75 additions and 200 deletions
--- a/recopytex/datalib/on_score_column.py
+++ b/recopytex/datalib/on_score_column.py
@@ -4,6 +4,81 @@
 from math import ceil


+def is_none_score(x, score_config):
+    """Is a score correspond to a None numeric_value which
+
+    >>> import pandas as pd
+    >>> d = {"Eleve":["E1"]*7,
+    ...    "score_rate": [1]*7,
+    ...    "is_leveled":[0]+[1]*6,
+    ...    "score":[0.33, "", ".", "a", 1, 2, 3],
+    ...    }
+    >>> score_config = {
+    ...   'BAD': {'value': 0, 'numeric_value': 0},
+    ...   'FEW': {'value': 1, 'numeric_value': 1},
+    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
+    ...   'GOOD': {'value': 3, 'numeric_value': 3},
+    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
+    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
+    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> df.apply(lambda x:is_none_score(x, score_config), axis=1)
+    0    False
+    1     True
+    2    False
+    3     True
+    4    False
+    5    False
+    6    False
+    dtype: bool
+
+    """
+    none_values = [
+        v["value"]
+        for v in score_config.values()
+        if str(v["numeric_value"]).lower() == "none"
+    ]
+    return x["score"] in none_values
+
+
+def score_to_numeric_score(x, score_config):
+    """Convert a score to the corresponding numeric value
+
+    >>> import pandas as pd
+    >>> d = {"Eleve":["E1"]*7,
+    ...    "score_rate": [1]*7,
+    ...    "is_leveled":[0]+[1]*6,
+    ...    "score":[0.33, "", ".", "a", 1, 2, 3],
+    ...    }
+    >>> score_config = {
+    ...   'BAD': {'value': 0, 'numeric_value': 0},
+    ...   'FEW': {'value': 1, 'numeric_value': 1},
+    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
+    ...   'GOOD': {'value': 3, 'numeric_value': 3},
+    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
+    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
+    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> df.apply(lambda x:score_to_numeric_score(x, score_config), axis=1)
+    0    0.33
+    1    None
+    2       0
+    3    None
+    4       1
+    5       2
+    6       3
+    dtype: object
+
+    """
+    if x["is_leveled"]:
+        replacements = {v["value"]: v["numeric_value"] for v in score_config.values()}
+        return replacements[x["score"]]
+
+    return x["score"]
+
+
 def score_to_mark(x, score_max, rounding=lambda x: round(x, 2)):
    """Compute the mark from the score

@@ -125,10 +200,6 @@ def score_to_level(x, level_max=3):
    return int(ceil(x["score"] / x["score_rate"] * level_max))


-def score_to_numeric_score(x, score_config):
-    pass
-
-
 # -----------------------------
 # Reglages pour 'vim'
 # vim:set autoindent expandtab tabstop=4 shiftwidth=4:
--- a/recopytex/datalib/on_score_dataframe.py
+++ b/recopytex/datalib/on_score_dataframe.py
@@ -1,196 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-from .on_score_column import score_to_mark, score_to_level
-import pandas as pd
-
-
-def compute_marks(df, score_max, rounding=lambda x: round(x, 2)):
-    """Compute the mark for the dataframe
-
-    apply score_to_mark to each row
-
-    :param df: DataFrame with "score" (need to be number), "is_leveled" and "score_rate" columns.
-
-    >>> import pandas as pd
-    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
-    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
-    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
-    ...    "score":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
-    ...    }
-    >>> df = pd.DataFrame(d)
-    >>> df
-       Eleve  score_rate  is_leveled  score
-    0     E1           1           0  1.000
-    1     E1           1           0  0.330
-    2     E1           2           0  2.000
-    3     E1           2           0  1.500
-    4     E1           2           1  1.000
-    5     E1           2           1  3.000
-    6     E2           1           0  0.666
-    7     E2           1           0  1.000
-    8     E2           2           0  1.500
-    9     E2           2           0  1.000
-    10    E2           2           1  2.000
-    11    E2           2           1  3.000
-    >>> compute_marks(df, 3)
-    0     1.00
-    1     0.33
-    2     2.00
-    3     1.50
-    4     0.67
-    5     2.00
-    6     0.67
-    7     1.00
-    8     1.50
-    9     1.00
-    10    1.33
-    11    2.00
-    dtype: float64
-    >>> from .on_value import round_half_point
-    >>> compute_marks(df, 3, round_half_point)
-    0     1.0
-    1     0.5
-    2     2.0
-    3     1.5
-    4     0.5
-    5     2.0
-    6     0.5
-    7     1.0
-    8     1.5
-    9     1.0
-    10    1.5
-    11    2.0
-    dtype: float64
-    """
-    return df[["score", "is_leveled", "score_rate"]].apply(
-        lambda x: score_to_mark(x, score_max, rounding), axis=1
-    )
-
-
-def compute_level(df, level_max=3):
-    """Compute level for the dataframe
-
-    Applies score_to_level to each row
-
-    :param df: DataFrame with "score", "is_leveled" and "score_rate" columns.
-    :return: Columns with level
-
-    >>> import pandas as pd
-    >>> import numpy as np
-    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
-    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
-    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
-    ...    "score":[0, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
-    ...    }
-    >>> df = pd.DataFrame(d)
-    >>> compute_level(df)
-    0     0
-    1     1
-    2     3
-    3     3
-    4     1
-    5     3
-    6     2
-    7     3
-    8     3
-    9     2
-    10    2
-    11    3
-    dtype: int64
-    """
-    return df[["score", "is_leveled", "score_rate"]].apply(
-        lambda x: score_to_level(x, level_max), axis=1
-    )
-
-
-def compute_normalized(df, rounding=lambda x: round(x, 2)):
-    """Compute the normalized mark (Mark / score_rate)
-
-    :param df: DataFrame with "Mark" and "score_rate" columns
-    :return: column with normalized mark
-
-    >>> import pandas as pd
-    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
-    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
-    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
-    ...    "score":[0, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
-    ...    }
-    >>> df = pd.DataFrame(d)
-    >>> df["mark"] = compute_marks(df, 3)
-    >>> compute_normalized(df)
-    0     0.00
-    1     0.33
-    2     1.00
-    3     0.75
-    4     0.34
-    5     1.00
-    6     0.67
-    7     1.00
-    8     0.75
-    9     0.50
-    10    0.66
-    11    1.00
-    dtype: float64
-    """
-    return rounding(df["mark"] / df["score_rate"])
-
-
-def filter_none_score(df, score_config):
-    """Filter rows where scores have None numeric values
-
-    :example:
-
-    >>> import pandas as pd
-    >>> d = {"Eleve":["E1"]*7,
-    ...    "score_rate": [1]*7,
-    ...    "is_leveled":[0]+[1]*6,
-    ...    "score":[0.33, "", ".", "a", 1, 2, 3],
-    ...    }
-    >>> score_config = {
-    ...   'BAD': {'value': 0, 'numeric_value': 0},
-    ...   'FEW': {'value': 1, 'numeric_value': 1},
-    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
-    ...   'GOOD': {'value': 3, 'numeric_value': 3},
-    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
-    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
-    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
-    ...    }
-    >>> df = pd.DataFrame(d)
-    >>> filter_none_score(df, score_config)
-      Eleve  score_rate  is_leveled score
-    0    E1           1           0  0.33
-    2    E1           1           1     .
-    4    E1           1           1     1
-    5    E1           1           1     2
-    6    E1           1           1     3
-    """
-    not_leveled_df = df[df["is_leveled"] != 1]
-    leveled_df = df[df["is_leveled"] == 1]
-
-    not_none_values = [
-        v["value"]
-        for v in score_config.values()
-        if str(v["numeric_value"]).lower() != "none"
-    ]
-    filtered_leveled_df = leveled_df[leveled_df["score"].isin(not_none_values)]
-
-    return pd.concat([not_leveled_df, filtered_leveled_df])
-
-
-def score_to_numeric_score(df, score_config):
-    """Transform a score to the corresponding numeric value
-
-    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
-    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
-    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
-    ...    "score":[0, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
-    ...    }
-    """
-    pass
-
-
-# -----------------------------
-# Reglages pour 'vim'
-# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
-# cursor: 16 del