Feat: delete functions on dataframe and move it to functions on rows

2021-04-19 21:54:44 +02:00
parent 2e86b3a0a2
commit 8ec24a24b3
2 changed files with 75 additions and 200 deletions
--- a/recopytex/datalib/on_score_column.py
+++ b/recopytex/datalib/on_score_column.py
@@ -4,6 +4,81 @@
 from math import ceil
 def is_none_score(x, score_config):
    """Is a score correspond to a None numeric_value which
    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*7,
    ...    "score_rate": [1]*7,
    ...    "is_leveled":[0]+[1]*6,
    ...    "score":[0.33, "", ".", "a", 1, 2, 3],
    ...    }
    >>> score_config = {
    ...   'BAD': {'value': 0, 'numeric_value': 0},
    ...   'FEW': {'value': 1, 'numeric_value': 1},
    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
    ...   'GOOD': {'value': 3, 'numeric_value': 3},
    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df.apply(lambda x:is_none_score(x, score_config), axis=1)
    0    False
    1     True
    2    False
    3     True
    4    False
    5    False
    6    False
    dtype: bool
    """
    none_values = [
        v["value"]
        for v in score_config.values()
        if str(v["numeric_value"]).lower() == "none"
    ]
    return x["score"] in none_values
 def score_to_numeric_score(x, score_config):
    """Convert a score to the corresponding numeric value
    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*7,
    ...    "score_rate": [1]*7,
    ...    "is_leveled":[0]+[1]*6,
    ...    "score":[0.33, "", ".", "a", 1, 2, 3],
    ...    }
    >>> score_config = {
    ...   'BAD': {'value': 0, 'numeric_value': 0},
    ...   'FEW': {'value': 1, 'numeric_value': 1},
    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
    ...   'GOOD': {'value': 3, 'numeric_value': 3},
    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df.apply(lambda x:score_to_numeric_score(x, score_config), axis=1)
    0    0.33
    1    None
    2       0
    3    None
    4       1
    5       2
    6       3
    dtype: object
    """
    if x["is_leveled"]:
        replacements = {v["value"]: v["numeric_value"] for v in score_config.values()}
        return replacements[x["score"]]
    return x["score"]
 def score_to_mark(x, score_max, rounding=lambda x: round(x, 2)):
    """Compute the mark from the score
@@ -125,10 +200,6 @@ def score_to_level(x, level_max=3):
    return int(ceil(x["score"] / x["score_rate"] * level_max))
 def score_to_numeric_score(x, score_config):
    pass
 # -----------------------------
 # Reglages pour 'vim'
 # vim:set autoindent expandtab tabstop=4 shiftwidth=4:
--- a/recopytex/datalib/on_score_dataframe.py
+++ b/recopytex/datalib/on_score_dataframe.py
@@ -1,196 +0,0 @@
 #!/usr/bin/env python
 # encoding: utf-8
 from .on_score_column import score_to_mark, score_to_level
 import pandas as pd
 def compute_marks(df, score_max, rounding=lambda x: round(x, 2)):
    """Compute the mark for the dataframe
    apply score_to_mark to each row
    :param df: DataFrame with "score" (need to be number), "is_leveled" and "score_rate" columns.
    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "score":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df
       Eleve  score_rate  is_leveled  score
    0     E1           1           0  1.000
    1     E1           1           0  0.330
    2     E1           2           0  2.000
    3     E1           2           0  1.500
    4     E1           2           1  1.000
    5     E1           2           1  3.000
    6     E2           1           0  0.666
    7     E2           1           0  1.000
    8     E2           2           0  1.500
    9     E2           2           0  1.000
    10    E2           2           1  2.000
    11    E2           2           1  3.000
    >>> compute_marks(df, 3)
    0     1.00
    1     0.33
    2     2.00
    3     1.50
    4     0.67
    5     2.00
    6     0.67
    7     1.00
    8     1.50
    9     1.00
    10    1.33
    11    2.00
    dtype: float64
    >>> from .on_value import round_half_point
    >>> compute_marks(df, 3, round_half_point)
    0     1.0
    1     0.5
    2     2.0
    3     1.5
    4     0.5
    5     2.0
    6     0.5
    7     1.0
    8     1.5
    9     1.0
    10    1.5
    11    2.0
    dtype: float64
    """
    return df[["score", "is_leveled", "score_rate"]].apply(
        lambda x: score_to_mark(x, score_max, rounding), axis=1
    )
 def compute_level(df, level_max=3):
    """Compute level for the dataframe
    Applies score_to_level to each row
    :param df: DataFrame with "score", "is_leveled" and "score_rate" columns.
    :return: Columns with level
    >>> import pandas as pd
    >>> import numpy as np
    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "score":[0, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> compute_level(df)
    0     0
    1     1
    2     3
    3     3
    4     1
    5     3
    6     2
    7     3
    8     3
    9     2
    10    2
    11    3
    dtype: int64
    """
    return df[["score", "is_leveled", "score_rate"]].apply(
        lambda x: score_to_level(x, level_max), axis=1
    )
 def compute_normalized(df, rounding=lambda x: round(x, 2)):
    """Compute the normalized mark (Mark / score_rate)
    :param df: DataFrame with "Mark" and "score_rate" columns
    :return: column with normalized mark
    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "score":[0, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df["mark"] = compute_marks(df, 3)
    >>> compute_normalized(df)
    0     0.00
    1     0.33
    2     1.00
    3     0.75
    4     0.34
    5     1.00
    6     0.67
    7     1.00
    8     0.75
    9     0.50
    10    0.66
    11    1.00
    dtype: float64
    """
    return rounding(df["mark"] / df["score_rate"])
 def filter_none_score(df, score_config):
    """Filter rows where scores have None numeric values
    :example:
    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*7,
    ...    "score_rate": [1]*7,
    ...    "is_leveled":[0]+[1]*6,
    ...    "score":[0.33, "", ".", "a", 1, 2, 3],
    ...    }
    >>> score_config = {
    ...   'BAD': {'value': 0, 'numeric_value': 0},
    ...   'FEW': {'value': 1, 'numeric_value': 1},
    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
    ...   'GOOD': {'value': 3, 'numeric_value': 3},
    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
    ...    }
    >>> df = pd.DataFrame(d)
    >>> filter_none_score(df, score_config)
      Eleve  score_rate  is_leveled score
    0    E1           1           0  0.33
    2    E1           1           1     .
    4    E1           1           1     1
    5    E1           1           1     2
    6    E1           1           1     3
    """
    not_leveled_df = df[df["is_leveled"] != 1]
    leveled_df = df[df["is_leveled"] == 1]
    not_none_values = [
        v["value"]
        for v in score_config.values()
        if str(v["numeric_value"]).lower() != "none"
    ]
    filtered_leveled_df = leveled_df[leveled_df["score"].isin(not_none_values)]
    return pd.concat([not_leveled_df, filtered_leveled_df])
 def score_to_numeric_score(df, score_config):
    """Transform a score to the corresponding numeric value
    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "score":[0, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    """
    pass
 # -----------------------------
 # Reglages pour 'vim'
 # vim:set autoindent expandtab tabstop=4 shiftwidth=4:
 # cursor: 16 del