397 lines
14 KiB
Python
397 lines
14 KiB
Python
#!/usr/bin/env python
|
|
# encoding: utf-8
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
from math import ceil
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Values manipulations
|
|
|
|
def round_half_point(val):
|
|
try:
|
|
return 0.5 * ceil(2.0 * val)
|
|
except ValueError:
|
|
return val
|
|
except TypeError:
|
|
return val
|
|
|
|
latex_caract = ["\\NoRep", "\\RepZ", "\\RepU", "\\RepD", "\\RepT"]
|
|
def note_to_rep(x):
|
|
r""" Transform a Note to the latex caracter
|
|
|
|
:param x: dictionnary with "Niveau" and "Note" keys
|
|
|
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
|
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
|
|
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
|
|
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
|
|
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
|
|
... "Trimestre": ["1"]*12,
|
|
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
|
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
|
|
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.67, 1, 1.5, 1, 2, 3],
|
|
... }
|
|
>>> df = pd.DataFrame(d)
|
|
>>> note_to_rep(df.loc[0])
|
|
1.0
|
|
>>> note_to_rep(df.loc[4])
|
|
'\\RepU'
|
|
"""
|
|
if x["Niveau"]:
|
|
if pd.isnull(x["Note"]):
|
|
return latex_caract[0]
|
|
elif x["Note"] in range(4):
|
|
return latex_caract[int(x["Note"])+1]
|
|
return x["Note"]
|
|
|
|
def note_to_mark(x):
|
|
""" Compute the mark when it is a "Nivea" note
|
|
|
|
:param x: dictionnary with "Niveau", "Note" and "Bareme" keys
|
|
|
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
|
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
|
|
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
|
|
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
|
|
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
|
|
... "Trimestre": ["1"]*12,
|
|
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
|
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
|
|
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
|
... }
|
|
>>> df = pd.DataFrame(d)
|
|
>>> note_to_mark(df.loc[0])
|
|
1.0
|
|
>>> note_to_mark(df.loc[10])
|
|
1.3333333333333333
|
|
"""
|
|
|
|
if x["Niveau"]:
|
|
return x["Note"] * x["Bareme"] / 3
|
|
return x["Note"]
|
|
|
|
def question_uniq_formater(row):
|
|
""" Create a kind of unique description of the question
|
|
|
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
|
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
|
|
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
|
|
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
|
|
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
|
|
... "Trimestre": ["1"]*12,
|
|
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
|
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
|
|
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
|
... }
|
|
>>> df = pd.DataFrame(d)
|
|
>>> question_uniq_formater(df.loc[0])
|
|
'Ex1 Q1'
|
|
>>> question_uniq_formater(df.loc[10])
|
|
'Ex1 Q1'
|
|
|
|
"""
|
|
ans = ""
|
|
try:
|
|
int(row['Exercice'])
|
|
except ValueError:
|
|
ans += str(row["Exercice"])
|
|
else:
|
|
ans += "Exo"+str(row["Exercice"])
|
|
|
|
ans += " "
|
|
|
|
try:
|
|
int(row["Question"])
|
|
except ValueError:
|
|
if not pd.isnull(row["Question"]):
|
|
ans += str(row["Question"])
|
|
else:
|
|
ans += "Qu"+str(row["Question"])
|
|
|
|
try:
|
|
row["Commentaire"]
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
if not pd.isnull(row["Commentaire"]):
|
|
ans += " ({})".format(row["Commentaire"])
|
|
return ans
|
|
|
|
# DataFrame columns manipulations
|
|
|
|
def compute_marks(df):
|
|
""" Add Mark column to df
|
|
|
|
:param df: DataFrame with "Note", "Niveau" and "Bareme" columns.
|
|
|
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
|
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
|
|
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
|
|
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
|
|
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
|
|
... "Trimestre": ["1"]*12,
|
|
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
|
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
|
|
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
|
... }
|
|
>>> df = pd.DataFrame(d)
|
|
>>> compute_marks(df)
|
|
0 1.00
|
|
1 0.33
|
|
2 2.00
|
|
3 1.50
|
|
4 0.67
|
|
5 2.00
|
|
6 0.67
|
|
7 1.00
|
|
8 1.50
|
|
9 1.00
|
|
10 1.33
|
|
11 2.00
|
|
dtype: float64
|
|
"""
|
|
return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1)
|
|
|
|
def compute_latex_rep(df):
|
|
""" Add Latex_rep column to df
|
|
|
|
:param df: DataFrame with "Note" and "Niveau" columns.
|
|
|
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
|
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
|
|
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
|
|
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
|
|
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
|
|
... "Trimestre": ["1"]*12,
|
|
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
|
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
|
|
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
|
... }
|
|
>>> df = pd.DataFrame(d)
|
|
>>> compute_latex_rep(df)
|
|
0 1
|
|
1 0.33
|
|
2 2
|
|
3 1.5
|
|
4 \RepU
|
|
5 \RepT
|
|
6 0.67
|
|
7 1
|
|
8 1.5
|
|
9 1
|
|
10 \RepD
|
|
11 \RepT
|
|
dtype: object
|
|
"""
|
|
return df[["Note", "Niveau"]].apply(note_to_rep, axis=1).fillna("??")
|
|
|
|
def compute_normalized(df):
|
|
""" Compute the normalized mark (Mark / Bareme)
|
|
|
|
:param df: DataFrame with "Mark" and "Bareme" columns
|
|
|
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
|
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
|
|
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
|
|
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
|
|
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
|
|
... "Trimestre": ["1"]*12,
|
|
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
|
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
|
|
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
|
... }
|
|
>>> df = pd.DataFrame(d)
|
|
>>> df["Mark"] = compute_marks(df)
|
|
>>> compute_normalized(df)
|
|
0 1.00
|
|
1 0.33
|
|
2 1.00
|
|
3 0.75
|
|
4 0.33
|
|
5 1.00
|
|
6 0.67
|
|
7 1.00
|
|
8 0.75
|
|
9 0.50
|
|
10 0.67
|
|
11 1.00
|
|
dtype: float64
|
|
"""
|
|
return df["Mark"] / df["Bareme"]
|
|
|
|
def compute_question_description(df):
|
|
""" Compute the unique description of a question """
|
|
return df.apply(question_uniq_formater, axis = 1)
|
|
|
|
# Computing custom values
|
|
|
|
def compute_exo_marks(df):
|
|
""" Compute Exercice level marks
|
|
|
|
:param df: the original marks
|
|
:returns: DataFrame with computed marks
|
|
|
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
|
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
|
|
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
|
|
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
|
|
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
|
|
... "Trimestre": ["1"]*12,
|
|
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
|
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
|
|
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.67, 1, 1.5, 1, 2, 3],
|
|
... }
|
|
>>> df = pd.DataFrame(d)
|
|
>>> df["Mark"] = compute_marks(df)
|
|
>>> compute_exo_marks(df)
|
|
Eleve Nom Exercice Date Trimestre Bareme Mark Question Niveau
|
|
0 E1 N1 Ex1 16/09/2016 1 2.0 1.5 Total 0
|
|
1 E1 N1 Ex2 16/09/2016 1 4.0 3.5 Total 0
|
|
2 E1 N2 Ex1 01/10/2016 1 2.0 1.0 Total 0
|
|
3 E1 N2 Ex2 01/10/2016 1 2.0 2.0 Total 0
|
|
4 E2 N1 Ex1 16/09/2016 1 2.0 2.0 Total 0
|
|
5 E2 N1 Ex2 16/09/2016 1 4.0 2.5 Total 0
|
|
6 E2 N2 Ex1 01/10/2016 1 2.0 1.5 Total 0
|
|
7 E2 N2 Ex2 01/10/2016 1 2.0 2.0 Total 0
|
|
|
|
|
|
"""
|
|
exo_pt = pd.pivot_table(df,
|
|
index = [ "Eleve", "Nom", "Exercice", "Date", "Trimestre"],
|
|
values = ["Bareme", "Mark"],
|
|
aggfunc=np.sum,
|
|
).applymap(round_half_point)
|
|
|
|
exo = exo_pt.reset_index()
|
|
exo["Question"] = "Total"
|
|
exo["Niveau"] = 0
|
|
return exo
|
|
|
|
def compute_eval_marks(df):
|
|
""" Compute Nom level marks from the dataframe using only row with Total in Question
|
|
|
|
:param df: DataFrame with value Total in Question column
|
|
:returns: DataFrame with evaluation marks
|
|
|
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
|
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
|
|
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
|
|
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
|
|
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
|
|
... "Trimestre": ["1"]*12,
|
|
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
|
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
|
|
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.67, 1, 1.5, 1, 2, 3],
|
|
... }
|
|
>>> df = pd.DataFrame(d)
|
|
>>> df["Mark"] = compute_marks(df)
|
|
>>> df_exo = compute_exo_marks(df)
|
|
>>> compute_eval_marks(df_exo)
|
|
index Eleve Nom Trimestre Bareme Date Mark
|
|
0 0 E1 N1 1 6.0 16/09/2016 5.0
|
|
1 1 E2 N1 1 6.0 16/09/2016 4.5
|
|
2 0 E1 N2 1 4.0 01/10/2016 3.0
|
|
3 1 E2 N2 1 4.0 01/10/2016 3.5
|
|
|
|
|
|
"""
|
|
def date_format(dates):
|
|
date_l = list(dates.unique())
|
|
if len(date_l) == 1:
|
|
return date_l[0]
|
|
else:
|
|
return "Trimestre"
|
|
|
|
eval_m = pd.DataFrame()
|
|
for eval_name in df["Nom"].unique():
|
|
logger.debug(f"Compute marks for {eval_name}")
|
|
eval_df = df[df["Nom"] == eval_name]
|
|
dates = eval_df["Date"].unique()
|
|
logger.debug(f"Find those dates: {dates}")
|
|
if len(dates) > 1 or dates[0] == "Trimestre":
|
|
# Les devoirs sur la durée, les NaN ne sont pas pénalisants
|
|
# On les enlèves
|
|
eval_df = eval_df.dropna(subset=["Mark"])
|
|
dates = ["Trimestre"]
|
|
|
|
eval_pt = pd.pivot_table(eval_df,
|
|
index = [ "Eleve", "Nom", "Trimestre"],
|
|
values = ["Bareme", "Mark", "Date"],
|
|
aggfunc={"Bareme": np.sum, "Mark": np.sum, "Date":lambda x:dates[0]},
|
|
)
|
|
eval_pt = eval_pt.reset_index()
|
|
eval_m = pd.concat([eval_m, eval_pt])
|
|
|
|
eval_m = eval_m.reset_index()
|
|
|
|
return eval_m
|
|
|
|
def digest_flat_df(flat_df):
|
|
r""" Compute necessary element to make a flat df usable for analysis.
|
|
|
|
>>> from numpy import nan
|
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
|
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
|
|
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
|
|
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
|
|
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
|
|
... "Trimestre": ["1"]*12,
|
|
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
|
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
|
|
... "Note":[1, 0.33, 2, 1.5, 1, 3, nan, 0, 0, nan, nan, nan],
|
|
... }
|
|
>>> df = pd.DataFrame(d)
|
|
>>> quest_df, exo_df, eval_df = digest_flat_df(df)
|
|
>>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest"]]
|
|
Eleve Nom Mark Latex_rep Normalized Uniq_quest
|
|
0 E1 N1 1.00 1 1.00 Ex1 Q1
|
|
1 E1 N1 0.33 0.33 0.33 Ex1 Q2
|
|
2 E1 N1 2.00 2 1.00 Ex2 Q1
|
|
3 E1 N1 1.50 1.5 0.75 Ex2 Q2
|
|
4 E1 N2 0.67 \RepU 0.33 Ex1 Q1
|
|
5 E1 N2 2.00 \RepT 1.00 Ex2 Q1
|
|
6 E2 N1 NaN ?? NaN Ex1 Q1
|
|
7 E2 N1 0.00 0 0.00 Ex1 Q2
|
|
8 E2 N1 0.00 0 0.00 Ex2 Q1
|
|
9 E2 N1 NaN ?? NaN Ex2 Q2
|
|
10 E2 N2 NaN \NoRep NaN Ex1 Q1
|
|
11 E2 N2 NaN \NoRep NaN Ex2 Q1
|
|
>>> exo_df[['Eleve', "Nom", "Exercice", "Mark", "Normalized"]]
|
|
Eleve Nom Exercice Mark Normalized
|
|
0 E1 N1 Ex1 1.5 0.75
|
|
1 E1 N1 Ex2 3.5 0.88
|
|
2 E1 N2 Ex1 1.0 0.50
|
|
3 E1 N2 Ex2 2.0 1.00
|
|
4 E2 N1 Ex1 0.0 0.00
|
|
5 E2 N1 Ex2 0.0 0.00
|
|
6 E2 N2 Ex1 NaN NaN
|
|
7 E2 N2 Ex2 NaN NaN
|
|
>>> eval_df
|
|
index Eleve Nom Trimestre Bareme Date Mark Normalized
|
|
0 0 E1 N1 1 6.0 16/09/2016 5.0 0.83
|
|
1 1 E2 N1 1 6.0 16/09/2016 0.0 0.00
|
|
2 0 E1 N2 1 4.0 01/10/2016 3.0 0.75
|
|
3 1 E2 N2 1 4.0 01/10/2016 NaN NaN
|
|
"""
|
|
# Remove data with "nn" (non notés)
|
|
df = flat_df.copy()[flat_df["Note"] != "nn"]
|
|
df["Mark"] = compute_marks(df)
|
|
df["Latex_rep"] = compute_latex_rep(df)
|
|
df["Normalized"] = compute_normalized(df)
|
|
df["Uniq_quest"] = compute_question_description(df)
|
|
|
|
exo_df = compute_exo_marks(df)
|
|
exo_df["Normalized"] = compute_normalized(exo_df)
|
|
eval_df = compute_eval_marks(exo_df)
|
|
eval_df["Normalized"] = compute_normalized(eval_df)
|
|
|
|
return df, exo_df, eval_df
|
|
|
|
|
|
# -----------------------------
|
|
# Reglages pour 'vim'
|
|
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
|
# cursor: 16 del
|