recopytex/recopytex/datalib/on_score_column.py

#!/usr/bin/env python
# encoding: utf-8

from math import ceil
import pandas as pd


def is_none_score(x, score_config):
    """Is a score correspond to a None numeric_value which

    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*7,
    ...    "score_rate": [1]*7,
    ...    "is_leveled":[0]+[1]*6,
    ...    "score":[0.33, "", ".", "a", 1, 2, 3],
    ...    }
    >>> score_config = {
    ...   'BAD': {'value': 0, 'numeric_value': 0},
    ...   'FEW': {'value': 1, 'numeric_value': 1},
    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
    ...   'GOOD': {'value': 3, 'numeric_value': 3},
    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df.apply(lambda x:is_none_score(x, score_config), axis=1)
    0    False
    1     True
    2    False
    3     True
    4    False
    5    False
    6    False
    dtype: bool

    """
    none_values = [
        v["value"]
        for v in score_config.values()
        if str(v["numeric_value"]).lower() == "none"
    ]
    return x["score"] in none_values or pd.isnull(x["score"])


def format_score(x, score_config):
    """Make sure that score have the appropriate format

    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*6,
    ...    "score_rate": [1]*6,
    ...    "is_leveled":[0]+[1]*5,
    ...    "score":[0.33,  ".", "a", 1, 2, 3],
    ...    }
    >>> score_config = {
    ...   'BAD': {'value': 0, 'numeric_value': 0},
    ...   'FEW': {'value': 1, 'numeric_value': 1},
    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
    ...   'GOOD': {'value': 3, 'numeric_value': 3},
    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df.apply(lambda x:format_score(x, score_config), axis=1)
    0    0.33
    1       .
    2       a
    3       1
    4       2
    5       3
    dtype: object
    >>> format_score({"score": "1.0", "is_leveled": 1}, score_config)
    1
    >>> format_score({"score": "3.0", "is_leveled": 1}, score_config)
    3
    >>> format_score({"score": 4, "is_leveled": 1}, score_config)
    Traceback (most recent call last):
        ...
    ValueError: 4 (<class 'int'>) can't be a score

    """
    if not x["is_leveled"]:
        return float(x["score"])

    try:
        score = int(float(x["score"]))
    except ValueError:
        score = str(x["score"])

    if score in [v["value"] for v in score_config.values()]:
        return score

    raise ValueError(f"{x['score']} ({type(x['score'])}) can't be a score")


def score_to_numeric_score(x, score_config):
    """Convert a score to the corresponding numeric value

    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*7,
    ...    "score_rate": [1]*7,
    ...    "is_leveled":[0]+[1]*6,
    ...    "score":[0.33, "", ".", "a", 1, 2, 3],
    ...    }
    >>> score_config = {
    ...   'BAD': {'value': 0, 'numeric_value': 0},
    ...   'FEW': {'value': 1, 'numeric_value': 1},
    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
    ...   'GOOD': {'value': 3, 'numeric_value': 3},
    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df.apply(lambda x:score_to_numeric_score(x, score_config), axis=1)
    0    0.33
    1    None
    2       0
    3    None
    4       1
    5       2
    6       3
    dtype: object

    """
    if x["is_leveled"]:
        replacements = {v["value"]: v["numeric_value"] for v in score_config.values()}
        return replacements[x["score"]]

    return x["score"]


def score_to_mark(x, score_max, rounding=lambda x: round(x, 2)):
    """Compute the mark from "score" which have to be filtered and in numeric form

    if the item is leveled then the score is multiply by the score_rate
    otherwise it copies the score

    :param x: dictionnary with "is_leveled", "score"  (need to be number) and "score_rate" keys
    :param score_max:
    :param rounding: rounding mark function
    :return: the mark

    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*7,
    ...    "score_rate": [1]*7,
    ...    "is_leveled":[0]+[1]*6,
    ...    "score":[0.33, "", ".", "a", 1, 2, 3],
    ...    }
    >>> score_config = {
    ...   'BAD': {'value': 0, 'numeric_value': 0},
    ...   'FEW': {'value': 1, 'numeric_value': 1},
    ...   'NEARLY': {'value': 2, 'numeric_value': 2},
    ...   'GOOD': {'value': 3, 'numeric_value': 3},
    ...   'NOTFILLED': {'value': '', 'numeric_value': 'None'},
    ...   'NOANSWER': {'value': '.', 'numeric_value': 0},
    ...   'ABS': {'value': 'a', 'numeric_value': 'None'}
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df = df[~df.apply(lambda x:is_none_score(x, score_config), axis=1)]
    >>> df["score"] = df.apply(lambda x:score_to_numeric_score(x, score_config), axis=1)
    >>> df.apply(lambda x:score_to_mark(x, 3), axis=1)
    0    0.33
    2    0.00
    4    0.33
    5    0.67
    6    1.00
    dtype: float64
    >>> from .on_value import round_half_point
    >>> df.apply(lambda x:score_to_mark(x, 3, round_half_point), axis=1)
    0    0.5
    2    0.0
    4    0.5
    5    0.5
    6    1.0
    dtype: float64
    """
    if x["is_leveled"]:
        if x["score"] not in list(range(score_max + 1)):
            raise ValueError(f"The evaluation is out of range: {x['score']} at {x}")
        return rounding(x["score"] * x["score_rate"] / score_max)

    return rounding(x["score"])


def score_to_level(x, level_max=3):
    """Compute the level (".",0,1,2,3).

    :param x: dictionnary with "is_leveled", "score" and "score_rate" keys
    :return: the level

    >>> import pandas as pd
    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "score":[1, 0.33, 0, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df
       Eleve  score_rate  is_leveled  score
    0     E1           1           0  1.000
    1     E1           1           0  0.330
    2     E1           2           0  0.000
    3     E1           2           0  1.500
    4     E1           2           1  1.000
    5     E1           2           1  3.000
    6     E2           1           0  0.666
    7     E2           1           0  1.000
    8     E2           2           0  1.500
    9     E2           2           0  1.000
    10    E2           2           1  2.000
    11    E2           2           1  3.000
    >>> df.apply(score_to_level, axis=1)
    0     3
    1     1
    2     0
    3     3
    4     1
    5     3
    6     2
    7     3
    8     3
    9     2
    10    2
    11    3
    dtype: int64
    >>> df.apply(lambda x: score_to_level(x, 5), axis=1)
    0     5
    1     2
    2     0
    3     4
    4     1
    5     3
    6     4
    7     5
    8     4
    9     3
    10    2
    11    3
    dtype: int64
    """
    if x["is_leveled"]:
        return int(x["score"])

    if x["score"] > x["score_rate"]:
        raise ValueError(
            f"score is higher than score_rate ({x['score']} > {x['score_rate']}) for {x}"
        )

    return int(ceil(x["score"] / x["score_rate"] * level_max))


# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del