start to build the package

This commit is contained in:
Benjamin Bertrand
2016-11-07 22:15:28 +03:00
parent e0748227c9
commit 58f0c3fbbc
10 changed files with 0 additions and 0 deletions

0
notes_tools/__init__.py Normal file
View File

View File

@@ -0,0 +1,295 @@
#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
import numpy as np
from math import ceil
# Values manipulations
def round_half_point(val):
try:
return 0.5 * ceil(2.0 * val)
except ValueError:
return val
latex_caract = ["\\NoRep", "\\RepZ", "\\RepU", "\\RepD", "\\RepT"]
def note_to_rep(x):
r""" Transform a Note to the latex caracter
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> note_to_rep(df.loc[0])
1.0
>>> note_to_rep(df.loc[4])
'\\RepU'
"""
if x["Niveau"]:
if pd.isnull(x["Note"]):
return latex_caract[0]
elif x["Note"] in range(4):
return latex_caract[int(x["Note"])+1]
return x["Note"]
def note_to_mark(x):
""" Compute the mark when it is a "Nivea" note
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> note_to_mark(df.loc[0])
1.0
>>> note_to_mark(df.loc[10])
1.3333333333333333
"""
if x["Niveau"]:
return x["Note"] * x["Bareme"] / 3
return x["Note"]
# DataFrame columns manipulations
def compute_marks(df):
""" Add Mark column to df
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> compute_marks(df)
0 1.000000
1 0.330000
2 2.000000
3 1.500000
4 0.666667
5 2.000000
6 0.666000
7 1.000000
8 1.500000
9 1.000000
10 1.333333
11 2.000000
dtype: float64
"""
return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1).fillna(0)
def compute_latex_rep(df):
""" Add Latex_rep column to df
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> compute_latex_rep(df)
0 1
1 0.33
2 2
3 1.5
4 \RepU
5 \RepT
6 0.666
7 1
8 1.5
9 1
10 \RepD
11 \RepT
dtype: object
"""
return df[["Note", "Niveau"]].apply(note_to_rep, axis=1).fillna("??")
# Computing custom values
def compute_exo_marks(df):
""" Compute Exercice level marks
:param df: the original marks
:returns: DataFrame with computed marks
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> df["Mark"] = compute_marks(df)
>>> compute_exo_marks(df)
Eleve Nom Exercice Date Trimestre Bareme Mark Question Niveau
0 E1 N1 Ex1 16/09/2016 1 2 1.5 Total 0
1 E1 N1 Ex2 16/09/2016 1 4 3.5 Total 0
2 E1 N2 Ex1 01/10/2016 1 2 1.0 Total 0
3 E1 N2 Ex2 01/10/2016 1 2 2.0 Total 0
4 E2 N1 Ex1 16/09/2016 1 2 2.0 Total 0
5 E2 N1 Ex2 16/09/2016 1 4 2.5 Total 0
6 E2 N2 Ex1 01/10/2016 1 2 1.5 Total 0
7 E2 N2 Ex2 01/10/2016 1 2 2.0 Total 0
"""
exo_pt = pd.pivot_table(df,
index = [ "Eleve", "Nom", "Exercice", "Date", "Trimestre"],
values = ["Bareme", "Mark"],
aggfunc=np.sum,
).applymap(round_half_point)
exo = exo_pt.reset_index()
exo["Question"] = "Total"
exo["Niveau"] = 0
return exo
def compute_eval_marks(df):
""" Compute Nom level marks from the dataframe using only row with Total in Question
:param df: DataFrame with value Total in Question column
:returns: DataFrame with evaluation marks
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> df["Mark"] = compute_marks(df)
>>> df_exo = compute_exo_marks(df)
>>> compute_eval_marks(df_exo)
Eleve Nom Date Trimestre Bareme Mark Exercice Niveau
0 E1 N1 16/09/2016 1 6 5.0 Total 0
1 E1 N2 01/10/2016 1 4 3.0 Total 0
2 E2 N1 16/09/2016 1 6 4.5 Total 0
3 E2 N2 01/10/2016 1 4 3.5 Total 0
"""
exo = df[df["Question"] == "Total"]
eval_pt = pd.pivot_table(exo,
index = [ "Eleve", "Nom", "Date", "Trimestre"],
values = ["Bareme", "Mark"],
aggfunc=np.sum,
).applymap(round_half_point)
eval_m = eval_pt.reset_index()
eval_m["Exercice"] = "Total"
eval_m["Niveau"] = 0
return eval_m
def digest_flat_df(flat_df):
""" Compute necessary element to make a flat df usable for analysis.
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> quest_df, exo_df, eval_df = digest_flat_df(df)
"""
df = flat_df.copy()
df["Mark"] = compute_marks(flat_df)
df["Latex_rep"] = compute_latex_rep(flat_df)
exo_df = compute_exo_marks(df)
eval_df = compute_eval_marks(exo_df)
return df, exo_df, eval_df
def students_pov(quest_df, exo_df, eval_df):
"""
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
... "Trimestre": ["1"]*12,
... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> quest_df, exo_df, eval_df = digest_flat_df(df)
>>> std_pov = students_pov(quest_df, exo_df, eval_df)
>>> std = std_pov[0]
>>> std["Nom"]
'E1'
>>> "{} / {}".format(std["Total"]["Mark"], std["Total"]["Bareme"])
'5.0 / 6.0'
>>> for exo in std["Exercices"]:
... print("{}: {} / {}".format(exo["Nom"], exo["Total"]["Mark"], exo["Total"]["Bareme"]))
Ex1: 1.5 / 2.0
Ex2: 3.5 / 4.0
>>> exo = std["Exercices"][0]
>>> for _,q in exo["Questions"].iterrows():
... print("{} : {}".format(q["Question"], q["Latex_rep"]))
Q1 : 1.0
Q2 : 0.33
Q1 : \RepU
"""
es = []
for e in eval_df["Eleve"].unique():
eleve = {"Nom":e}
e_quest = quest_df[quest_df["Eleve"] == e]
e_exo = exo_df[exo_df["Eleve"] == e]
#e_df = ds_df[ds_df["Eleve"] == e][["Exercice", "Question", "Bareme", "Commentaire", "Niveau", "Mark", "Latex_rep"]]
eleve["Total"] = eval_df[eval_df["Eleve"]==e].iloc[0]
exos = []
for exo in e_exo["Exercice"].unique():
ex = {"Nom":exo}
ex["Total"] = e_exo[e_exo["Exercice"]==exo].iloc[0]
ex["Questions"] = e_quest[e_quest["Exercice"] == exo]
exos.append(ex)
eleve["Exercices"] = exos
es.append(eleve)
return es
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del

134
notes_tools/extract.py Normal file
View File

@@ -0,0 +1,134 @@
#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
import numpy as np
import xlrd
from path import Path
notes_path = Path("./")
notStudent = ["Trimestre", "Nom", "Date", "Exercice", "Question", "Competence", "Domaine", "Commentaire", "Bareme", "Niveau"]
pure_marks = ["Malus", "Bonus", "Presentation"]
def list_classes(path = notes_path):
"""
List classes available in notes_path
>>> list_classes()
['509', '503', '308', '312']
>>> p = Path("./")
>>> list_classes(p)
['509', '503', '308', '312']
>>> list_classes("./")
['509', '503', '308', '312']
"""
try:
return [n.namebase for n in path.files("*.xlsx")]
except AttributeError:
p = Path(path)
return [n.namebase for n in p.files("*.xlsx")]
def get_class_ws(classe, path = notes_path):
"""
From the name of a classe, returns pd.ExcelFile
"""
if classe in list_classes(path):
return pd.ExcelFile(notes_path/classe+".xlsx")
else:
raise ValueError("This class is not disponible in {p}. You have to choose in {c}".format(p = path, c = list_classes(path)))
def extract_students(df, notStudent = notStudent):
""" Extract the list of students from df """
students = df.columns.difference(notStudent)
return students
def check_students(dfs, notStudent = notStudent):
""" Build students list """
dfs_students = [extract_students(df) for df in dfs]
if not are_equal(dfs_students):
raise ValueError("Not same list of students between df1 = {} ans df2 = {}".format(df1, df2))
return dfs_students[0]
def are_equal(elems):
""" Test if item of elems are equal
>>> L = [[1, 2, 3], [1, 3, 2], [1, 3, 2]]
>>> are_equal(L)
True
>>> L = [[0, 2, 3], [1, 3, 2], [1, 3, 2]]
>>> are_equal(L)
False
"""
first = sorted(elems[0])
others = [sorted(e) for e in elems[1:]]
diff = [e == first for e in others]
if False in diff:
return False
return True
def flat_df_students(df, students):
""" Flat the ws for students """
flat_df = pd.DataFrame()
flat_data = []
dfT = df.T
for n in dfT:
pre_di = dfT[n][notStudent].to_dict()
for e in students:
data = pre_di.copy()
data["Eleve"] = e
data["Note"] = dfT[n].loc[e]
flat_data.append(data)
return pd.DataFrame.from_dict(flat_data)
def get_all_marks(ws, marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]):
""" Extract marks from marks_sheetnames
:param ws: TODO
:returns: TODO
"""
for sheetname in marks_sheetnames:
try:
marks = ws.parse(sheetname)
except xlrd.biffh.XLRDError:
pass
def extract_flat_marks(ws):
""" Extract, flat and contact marks from the worksheet
:param ws: TODO
:returns: TODO
"""
marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]
sheets = []
for sheetname in marks_sheetnames:
try:
sheets.append(ws.parse(sheetname))
except xlrd.biffh.XLRDError:
pass
students = check_students(sheets)
flat_df = pd.DataFrame()
for sheet in sheets:
flat = flat_df_students(sheet, students)
flat_df = pd.concat([flat_df, flat])
return flat_df
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del