add level column

This commit is contained in:
Benjamin Bertrand 2017-03-07 17:24:15 +03:00
parent 0abd2be854
commit 0fba0017fe
1 changed files with 92 additions and 18 deletions

View File

@ -3,7 +3,7 @@
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from math import ceil from math import ceil, floor
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -47,7 +47,7 @@ def note_to_rep(x):
return x["Note"] return x["Note"]
def note_to_mark(x): def note_to_mark(x):
""" Compute the mark when it is a "Nivea" note """ Compute the mark when it is a "Niveau" note
:param x: dictionnary with "Niveau", "Note" and "Bareme" keys :param x: dictionnary with "Niveau", "Note" and "Bareme" keys
@ -72,6 +72,46 @@ def note_to_mark(x):
return x["Note"] * x["Bareme"] / 3 return x["Note"] * x["Bareme"] / 3
return x["Note"] return x["Note"]
def note_to_level(x):
""" Compute the level ("na",0,1,2,3).
"na" correspond to "no answer"
:param x: dictionnary with "Niveau", "Note" and "Bareme" keys
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, np.nan, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> note_to_level(df.loc[0])
3
>>> note_to_level(df.loc[1])
1
>>> note_to_level(df.loc[2])
'na'
>>> note_to_level(df.loc[3])
3
>>> note_to_level(df.loc[5])
3
>>> note_to_level(df.loc[10])
2
"""
if pd.isnull(x["Note"]):
return "na"
if x["Niveau"]:
return int(x["Note"])
else:
return int(ceil(x["Note"] / x["Bareme"] * 3))
def question_uniq_formater(row): def question_uniq_formater(row):
""" Create a kind of unique description of the question """ Create a kind of unique description of the question
@ -154,6 +194,39 @@ def compute_marks(df):
""" """
return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1) return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1)
def compute_level(df):
""" Add Mark column to df
:param df: DataFrame with "Note", "Niveau" and "Bareme" columns.
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[np.nan, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> compute_level(df)
0 na
1 1
2 3
3 3
4 1
5 3
6 2
7 3
8 3
9 2
10 2
11 3
dtype: object
"""
return df[["Note", "Niveau", "Bareme"]].apply(note_to_level, axis=1)
def compute_latex_rep(df): def compute_latex_rep(df):
""" Add Latex_rep column to df """ Add Latex_rep column to df
@ -340,24 +413,24 @@ def digest_flat_df(flat_df):
... "Trimestre": ["1"]*12, ... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, nan, 0, 0, nan, nan, nan], ... "Note":[1, 0.33, 2, 1.5, 1, 3, np.nan, 0, 0, np.nan, np.nan, np.nan],
... } ... }
>>> df = pd.DataFrame(d) >>> df = pd.DataFrame(d)
>>> quest_df, exo_df, eval_df = digest_flat_df(df) >>> quest_df, exo_df, eval_df = digest_flat_df(df)
>>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest"]] >>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest", "Level"]]
Eleve Nom Mark Latex_rep Normalized Uniq_quest Eleve Nom Mark Latex_rep Normalized Uniq_quest Level
0 E1 N1 1.00 1 1.00 Ex1 Q1 0 E1 N1 1.00 1 1.00 Ex1 Q1 3
1 E1 N1 0.33 0.33 0.33 Ex1 Q2 1 E1 N1 0.33 0.33 0.33 Ex1 Q2 1
2 E1 N1 2.00 2 1.00 Ex2 Q1 2 E1 N1 2.00 2 1.00 Ex2 Q1 3
3 E1 N1 1.50 1.5 0.75 Ex2 Q2 3 E1 N1 1.50 1.5 0.75 Ex2 Q2 3
4 E1 N2 0.67 \RepU 0.33 Ex1 Q1 4 E1 N2 0.67 \RepU 0.33 Ex1 Q1 1
5 E1 N2 2.00 \RepT 1.00 Ex2 Q1 5 E1 N2 2.00 \RepT 1.00 Ex2 Q1 3
6 E2 N1 NaN ?? NaN Ex1 Q1 6 E2 N1 NaN ?? NaN Ex1 Q1 na
7 E2 N1 0.00 0 0.00 Ex1 Q2 7 E2 N1 0.00 0 0.00 Ex1 Q2 0
8 E2 N1 0.00 0 0.00 Ex2 Q1 8 E2 N1 0.00 0 0.00 Ex2 Q1 0
9 E2 N1 NaN ?? NaN Ex2 Q2 9 E2 N1 NaN ?? NaN Ex2 Q2 na
10 E2 N2 NaN \NoRep NaN Ex1 Q1 10 E2 N2 NaN \NoRep NaN Ex1 Q1 na
11 E2 N2 NaN \NoRep NaN Ex2 Q1 11 E2 N2 NaN \NoRep NaN Ex2 Q1 na
>>> exo_df[['Eleve', "Nom", "Exercice", "Mark", "Normalized"]] >>> exo_df[['Eleve', "Nom", "Exercice", "Mark", "Normalized"]]
Eleve Nom Exercice Mark Normalized Eleve Nom Exercice Mark Normalized
0 E1 N1 Ex1 1.5 0.75 0 E1 N1 Ex1 1.5 0.75
@ -376,8 +449,9 @@ def digest_flat_df(flat_df):
3 1 E2 N2 1 4.0 01/10/2016 NaN NaN 3 1 E2 N2 1 4.0 01/10/2016 NaN NaN
""" """
# Remove data with "nn" (non notés) # Remove data with "nn" (non notés)
df = flat_df.copy()[flat_df["Note"] != "nn"] df = flat_df.copy()[flat_df["Note"].astype("object") != "nn"]
df["Mark"] = compute_marks(df) df["Mark"] = compute_marks(df)
df["Level"] = compute_level(df)
df["Latex_rep"] = compute_latex_rep(df) df["Latex_rep"] = compute_latex_rep(df)
df["Normalized"] = compute_normalized(df) df["Normalized"] = compute_normalized(df)
df["Uniq_quest"] = compute_question_description(df) df["Uniq_quest"] = compute_question_description(df)