recopytex/recopytex/df_marks_manip.py

206 lines
5.4 KiB
Python

#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
import numpy as np
from math import ceil, floor
from .config import COLUMNS, VALIDSCORE
# Values manipulations
def round_half_point(val):
try:
return 0.5 * ceil(2.0 * val)
except ValueError:
return val
except TypeError:
return val
def score_to_mark(x):
""" Compute the mark
if the item is leveled then the score is multiply by the score_rate
otherwise it copies the score
:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> score_to_mark(df.loc[0])
1.0
>>> score_to_mark(df.loc[10])
1.3333333333333333
"""
# -1 is no answer
if x[COLUMNS["score"]] == -1:
return 0
if x[COLUMNS["is_leveled"]]:
if x[COLUMNS["score"]] not in [0, 1, 2, 3]:
raise ValueError(f"The evaluation is out of range: {x[COLUMNS['score']]} at {x}")
return x[COLUMNS["score"]] * x[COLUMNS["score_rate"]] / 3
if x[COLUMNS["score"]] > x[COLUMNS["score_rate"]]:
raise ValueError(
f"The score ({x['score']}) is greated than the rating scale ({x[COLUMNS['score_rate']]}) at {x}"
)
return x[COLUMNS["score"]]
def score_to_level(x):
""" Compute the level (".",0,1,2,3).
:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[1, 0.33, np.nan, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> score_to_level(df.loc[0])
3
>>> score_to_level(df.loc[1])
1
>>> score_to_level(df.loc[2])
'na'
>>> score_to_level(df.loc[3])
3
>>> score_to_level(df.loc[5])
3
>>> score_to_level(df.loc[10])
2
"""
# negatives are no answer or negatives points
if x[COLUMNS["score"]] <= -1:
return np.nan
if x[COLUMNS["is_leveled"]]:
return int(x[COLUMNS["score"]])
return int(ceil(x[COLUMNS["score"]] / x[COLUMNS["score_rate"]] * 3))
# DataFrame columns manipulations
def compute_mark(df):
""" Add Mark column to df
:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> compute_mark(df)
0 1.00
1 0.33
2 2.00
3 1.50
4 0.67
5 2.00
6 0.67
7 1.00
8 1.50
9 1.00
10 1.33
11 2.00
dtype: float64
"""
return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
score_to_mark, axis=1
)
def compute_level(df):
""" Add Mark column to df
:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[np.nan, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> compute_level(df)
0 na
1 1
2 3
3 3
4 1
5 3
6 2
7 3
8 3
9 2
10 2
11 3
dtype: object
"""
return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
score_to_level, axis=1
)
def compute_normalized(df):
""" Compute the normalized mark (Mark / score_rate)
:param df: DataFrame with "Mark" and COLUMNS["score_rate"] columns
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> df["Mark"] = compute_marks(df)
>>> compute_normalized(df)
0 1.00
1 0.33
2 1.00
3 0.75
4 0.33
5 1.00
6 0.67
7 1.00
8 0.75
9 0.50
10 0.67
11 1.00
dtype: float64
"""
return df[COLUMNS["mark"]] / df[COLUMNS["score_rate"]]
# Postprocessing question scores
def pp_q_scores(df):
""" Postprocessing questions scores dataframe
:param df: questions-scores dataframe
:return: same data frame with mark, level and normalize columns
"""
assign = {
COLUMNS["mark"]: compute_mark,
COLUMNS["level"]: compute_level,
COLUMNS["normalized"]: compute_normalized,
}
return df.assign(**assign)
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del