repytex/notes_tools/df_marks_manip.py

342 lines
12 KiB
Python

#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
import numpy as np
from math import ceil
# Values manipulations
def round_half_point(val):
try:
return 0.5 * ceil(2.0 * val)
except ValueError:
return val
latex_caract = ["\\NoRep", "\\RepZ", "\\RepU", "\\RepD", "\\RepT"]
def note_to_rep(x):
r""" Transform a Note to the latex caracter
:param x: dictionnary with "Niveau" and "Note" keys
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> note_to_rep(df.loc[0])
1.0
>>> note_to_rep(df.loc[4])
'\\RepU'
"""
if x["Niveau"]:
if pd.isnull(x["Note"]):
return latex_caract[0]
elif x["Note"] in range(4):
return latex_caract[int(x["Note"])+1]
return x["Note"]
def note_to_mark(x):
""" Compute the mark when it is a "Nivea" note
:param x: dictionnary with "Niveau", "Note" and "Bareme" keys
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> note_to_mark(df.loc[0])
1.0
>>> note_to_mark(df.loc[10])
1.3333333333333333
"""
if x["Niveau"]:
return x["Note"] * x["Bareme"] / 3
return x["Note"]
# DataFrame columns manipulations
def compute_marks(df):
""" Add Mark column to df
:param df: DataFrame with "Note", "Niveau" and "Bareme" columns.
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> compute_marks(df)
0 1.000000
1 0.330000
2 2.000000
3 1.500000
4 0.666667
5 2.000000
6 0.666000
7 1.000000
8 1.500000
9 1.000000
10 1.333333
11 2.000000
dtype: float64
"""
return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1).fillna(0)
def compute_latex_rep(df):
""" Add Latex_rep column to df
:param df: DataFrame with "Note" and "Niveau" columns.
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> compute_latex_rep(df)
0 1
1 0.33
2 2
3 1.5
4 \RepU
5 \RepT
6 0.666
7 1
8 1.5
9 1
10 \RepD
11 \RepT
dtype: object
"""
return df[["Note", "Niveau"]].apply(note_to_rep, axis=1).fillna("??")
def compute_normalized(df):
""" Compute the normalized mark (Mark / Bareme)
:param df: DataFrame with "Mark" and "Bareme" columns
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> df["Mark"] = compute_marks(df)
>>> compute_normalized(df)
0 1.000000
1 0.330000
2 1.000000
3 0.750000
4 0.333333
5 1.000000
6 0.666000
7 1.000000
8 0.750000
9 0.500000
10 0.666667
11 1.000000
dtype: float64
"""
return df["Mark"] / df["Bareme"]
# Computing custom values
def compute_exo_marks(df):
""" Compute Exercice level marks
:param df: the original marks
:returns: DataFrame with computed marks
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> df["Mark"] = compute_marks(df)
>>> compute_exo_marks(df)
Eleve Nom Exercice Date Trimestre Bareme Mark Question Niveau
0 E1 N1 Ex1 16/09/2016 1 2.0 1.5 Total 0
1 E1 N1 Ex2 16/09/2016 1 4.0 3.5 Total 0
2 E1 N2 Ex1 01/10/2016 1 2.0 1.0 Total 0
3 E1 N2 Ex2 01/10/2016 1 2.0 2.0 Total 0
4 E2 N1 Ex1 16/09/2016 1 2.0 2.0 Total 0
5 E2 N1 Ex2 16/09/2016 1 4.0 2.5 Total 0
6 E2 N2 Ex1 01/10/2016 1 2.0 1.5 Total 0
7 E2 N2 Ex2 01/10/2016 1 2.0 2.0 Total 0
"""
exo_pt = pd.pivot_table(df,
index = [ "Eleve", "Nom", "Exercice", "Date", "Trimestre"],
values = ["Bareme", "Mark"],
aggfunc=np.sum,
).applymap(round_half_point)
exo = exo_pt.reset_index()
exo["Question"] = "Total"
exo["Niveau"] = 0
return exo
def compute_eval_marks(df):
""" Compute Nom level marks from the dataframe using only row with Total in Question
:param df: DataFrame with value Total in Question column
:returns: DataFrame with evaluation marks
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> df["Mark"] = compute_marks(df)
>>> df_exo = compute_exo_marks(df)
>>> compute_eval_marks(df_exo)
Eleve Nom Date Trimestre Bareme Mark Exercice Niveau
0 E1 N1 16/09/2016 1 6.0 5.0 Total 0
1 E1 N2 01/10/2016 1 4.0 3.0 Total 0
2 E2 N1 16/09/2016 1 6.0 4.5 Total 0
3 E2 N2 01/10/2016 1 4.0 3.5 Total 0
"""
exo = df[df["Question"] == "Total"]
eval_pt = pd.pivot_table(exo,
index = [ "Eleve", "Nom", "Date", "Trimestre"],
values = ["Bareme", "Mark"],
aggfunc=np.sum,
).applymap(round_half_point)
eval_m = eval_pt.reset_index()
eval_m["Exercice"] = "Total"
eval_m["Niveau"] = 0
return eval_m
def digest_flat_df(flat_df):
""" Compute necessary element to make a flat df usable for analysis.
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> quest_df, exo_df, eval_df = digest_flat_df(df)
"""
df = flat_df.copy()
df["Mark"] = compute_marks(flat_df)
df["Latex_rep"] = compute_latex_rep(flat_df)
df["Normalized"] = compute_normalized(df)
exo_df = compute_exo_marks(df)
exo_df["Normalized"] = compute_normalized(exo_df)
eval_df = compute_eval_marks(exo_df)
eval_df["Normalized"] = compute_normalized(eval_df)
return df, exo_df, eval_df
def students_pov(quest_df, exo_df, eval_df):
"""
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> quest_df, exo_df, eval_df = digest_flat_df(df)
>>> std_pov = students_pov(quest_df, exo_df, eval_df)
>>> std = std_pov[0]
>>> std["Nom"]
'E1'
>>> "{} / {}".format(std["Total"]["Mark"], std["Total"]["Bareme"])
'5.0 / 6.0'
>>> for exo in std["Exercices"]:
... print("{}: {} / {}".format(exo["Nom"], exo["Total"]["Mark"], exo["Total"]["Bareme"]))
Ex1: 1.5 / 2.0
Ex2: 3.5 / 4.0
>>> exo = std["Exercices"][0]
>>> for _,q in exo["Questions"].iterrows():
... print("{} : {}".format(q["Question"], q["Latex_rep"]))
Q1 : 1.0
Q2 : 0.33
Q1 : \RepU
"""
es = []
for e in eval_df["Eleve"].unique():
eleve = {"Nom":e}
e_quest = quest_df[quest_df["Eleve"] == e]
e_exo = exo_df[exo_df["Eleve"] == e]
#e_df = ds_df[ds_df["Eleve"] == e][["Exercice", "Question", "Bareme", "Commentaire", "Niveau", "Mark", "Latex_rep"]]
eleve["Total"] = eval_df[eval_df["Eleve"]==e].iloc[0]
exos = []
for exo in e_exo["Exercice"].unique():
ex = {"Nom":exo}
ex["Total"] = e_exo[e_exo["Exercice"]==exo].iloc[0]
ex["Questions"] = e_quest[e_quest["Exercice"] == exo]
exos.append(ex)
eleve["Exercices"] = exos
es.append(eleve)
return es
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del