start to build the package

2016-11-07 22:15:28 +03:00
parent e0748227c9
commit 58f0c3fbbc
10 changed files with 0 additions and 0 deletions
--- a/notes_tools/init.py
+++ b/notes_tools/init.py
--- a/notes_tools/df_marks_manip.py
+++ b/notes_tools/df_marks_manip.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+import pandas as pd
+import numpy as np
+from math import ceil
+
+# Values manipulations
+
+def round_half_point(val):
+    try:
+        return 0.5 * ceil(2.0 * val)
+    except ValueError:
+        return val
+
+latex_caract = ["\\NoRep", "\\RepZ", "\\RepU", "\\RepD", "\\RepT"]
+def note_to_rep(x):
+    r""" Transform a Note to the latex caracter
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> note_to_rep(df.loc[0])
+    1.0
+    >>> note_to_rep(df.loc[4])
+    '\\RepU'
+    """
+    if x["Niveau"]:
+        if pd.isnull(x["Note"]):
+            return latex_caract[0]
+        elif x["Note"] in range(4):
+            return latex_caract[int(x["Note"])+1]
+    return x["Note"]
+
+def note_to_mark(x):
+    """ Compute the mark when it is a "Nivea" note
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> note_to_mark(df.loc[0])
+    1.0
+    >>> note_to_mark(df.loc[10])
+    1.3333333333333333
+    """
+
+    if x["Niveau"]:
+        return x["Note"] * x["Bareme"] / 3
+    return x["Note"]
+
+# DataFrame columns manipulations
+
+def compute_marks(df):
+    """ Add Mark column to df
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> compute_marks(df)
+    0     1.000000
+    1     0.330000
+    2     2.000000
+    3     1.500000
+    4     0.666667
+    5     2.000000
+    6     0.666000
+    7     1.000000
+    8     1.500000
+    9     1.000000
+    10    1.333333
+    11    2.000000
+    dtype: float64
+    """
+    return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1).fillna(0)
+
+def compute_latex_rep(df):
+    """ Add Latex_rep column to df
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> compute_latex_rep(df)
+    0         1
+    1      0.33
+    2         2
+    3       1.5
+    4     \RepU
+    5     \RepT
+    6     0.666
+    7         1
+    8       1.5
+    9         1
+    10    \RepD
+    11    \RepT
+    dtype: object
+    """
+    return df[["Note", "Niveau"]].apply(note_to_rep, axis=1).fillna("??")
+
+# Computing custom values
+
+def compute_exo_marks(df):
+    """ Compute Exercice level marks
+
+    :param df: the original marks
+    :returns: DataFrame with computed marks
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> df["Mark"] = compute_marks(df)
+    >>> compute_exo_marks(df)
+      Eleve Nom Exercice        Date Trimestre  Bareme  Mark Question  Niveau
+    0    E1  N1      Ex1  16/09/2016         1       2   1.5    Total       0
+    1    E1  N1      Ex2  16/09/2016         1       4   3.5    Total       0
+    2    E1  N2      Ex1  01/10/2016         1       2   1.0    Total       0
+    3    E1  N2      Ex2  01/10/2016         1       2   2.0    Total       0
+    4    E2  N1      Ex1  16/09/2016         1       2   2.0    Total       0
+    5    E2  N1      Ex2  16/09/2016         1       4   2.5    Total       0
+    6    E2  N2      Ex1  01/10/2016         1       2   1.5    Total       0
+    7    E2  N2      Ex2  01/10/2016         1       2   2.0    Total       0
+
+    """
+    exo_pt = pd.pivot_table(df,
+              index = [ "Eleve", "Nom", "Exercice", "Date", "Trimestre"],
+              values = ["Bareme", "Mark"],
+              aggfunc=np.sum,
+              ).applymap(round_half_point)
+
+    exo = exo_pt.reset_index()
+    exo["Question"] = "Total"
+    exo["Niveau"] = 0
+    return exo
+
+def compute_eval_marks(df):
+    """ Compute Nom level marks from the dataframe using only row with Total in Question
+
+    :param df: DataFrame with value Total in Question column
+    :returns: DataFrame with evaluation marks
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> df["Mark"] = compute_marks(df)
+    >>> df_exo = compute_exo_marks(df)
+    >>> compute_eval_marks(df_exo)
+      Eleve Nom        Date Trimestre  Bareme  Mark Exercice  Niveau
+    0    E1  N1  16/09/2016         1       6   5.0    Total       0
+    1    E1  N2  01/10/2016         1       4   3.0    Total       0
+    2    E2  N1  16/09/2016         1       6   4.5    Total       0
+    3    E2  N2  01/10/2016         1       4   3.5    Total       0
+
+    """
+    exo = df[df["Question"] == "Total"]
+    eval_pt = pd.pivot_table(exo,
+              index = [ "Eleve", "Nom", "Date", "Trimestre"],
+              values = ["Bareme", "Mark"],
+              aggfunc=np.sum,
+              ).applymap(round_half_point)
+
+    eval_m = eval_pt.reset_index()
+    eval_m["Exercice"] = "Total"
+    eval_m["Niveau"] = 0
+    return eval_m
+
+def digest_flat_df(flat_df):
+    """ Compute necessary element to make a flat df usable for analysis.
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> quest_df, exo_df, eval_df = digest_flat_df(df)
+    """
+    df = flat_df.copy()
+    df["Mark"] = compute_marks(flat_df)
+    df["Latex_rep"] = compute_latex_rep(flat_df)
+
+    exo_df = compute_exo_marks(df)
+    eval_df = compute_eval_marks(exo_df)
+
+    return df, exo_df, eval_df
+
+def students_pov(quest_df, exo_df, eval_df):
+    """
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> quest_df, exo_df, eval_df = digest_flat_df(df)
+    >>> std_pov = students_pov(quest_df, exo_df, eval_df)
+    >>> std = std_pov[0]
+    >>> std["Nom"]
+    'E1'
+    >>> "{} / {}".format(std["Total"]["Mark"], std["Total"]["Bareme"])
+    '5.0 / 6.0'
+    >>> for exo in std["Exercices"]:
+    ...    print("{}: {} / {}".format(exo["Nom"], exo["Total"]["Mark"], exo["Total"]["Bareme"]))
+    Ex1: 1.5 / 2.0
+    Ex2: 3.5 / 4.0
+    >>> exo = std["Exercices"][0]
+    >>> for _,q in exo["Questions"].iterrows():
+    ...    print("{} : {}".format(q["Question"], q["Latex_rep"]))
+    Q1 : 1.0
+    Q2 : 0.33
+    Q1 : \RepU
+
+    """
+    es = []
+    for e in eval_df["Eleve"].unique():
+        eleve = {"Nom":e}
+        e_quest = quest_df[quest_df["Eleve"] == e]
+        e_exo = exo_df[exo_df["Eleve"] == e]
+        #e_df = ds_df[ds_df["Eleve"] == e][["Exercice", "Question", "Bareme", "Commentaire", "Niveau", "Mark", "Latex_rep"]]
+        eleve["Total"] = eval_df[eval_df["Eleve"]==e].iloc[0]
+
+        exos = []
+        for exo in e_exo["Exercice"].unique():
+            ex = {"Nom":exo}
+            ex["Total"] = e_exo[e_exo["Exercice"]==exo].iloc[0]
+            ex["Questions"] = e_quest[e_quest["Exercice"] == exo]
+            exos.append(ex)
+        eleve["Exercices"] = exos
+
+        es.append(eleve)
+    return es
+
+
+
+# -----------------------------
+# Reglages pour 'vim'
+# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
+# cursor: 16 del 
--- a/notes_tools/extract.py
+++ b/notes_tools/extract.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+import pandas as pd
+import numpy as np
+import xlrd
+from path import Path
+
+
+notes_path = Path("./")
+notStudent = ["Trimestre", "Nom", "Date", "Exercice", "Question", "Competence", "Domaine", "Commentaire", "Bareme", "Niveau"]
+pure_marks = ["Malus", "Bonus", "Presentation"]
+
+def list_classes(path = notes_path):
+    """
+    List classes available in notes_path
+
+    >>> list_classes()
+    ['509', '503', '308', '312']
+    >>> p = Path("./")
+    >>> list_classes(p)
+    ['509', '503', '308', '312']
+    >>> list_classes("./")
+    ['509', '503', '308', '312']
+    """
+    try:
+        return [n.namebase for n in path.files("*.xlsx")]
+    except AttributeError:
+        p = Path(path)
+        return [n.namebase for n in p.files("*.xlsx")]
+
+def get_class_ws(classe, path = notes_path):
+    """
+    From the name of a classe, returns pd.ExcelFile
+    """
+    if classe in list_classes(path):
+        return pd.ExcelFile(notes_path/classe+".xlsx")
+    else:
+        raise ValueError("This class is not disponible in {p}. You have to choose in {c}".format(p = path, c = list_classes(path)))
+
+def extract_students(df, notStudent = notStudent):
+    """ Extract the list of students from df """
+    students = df.columns.difference(notStudent)
+    return students
+
+def check_students(dfs, notStudent = notStudent):
+    """ Build students list """
+    dfs_students = [extract_students(df) for df in dfs]
+
+    if not are_equal(dfs_students):
+        raise ValueError("Not same list of students between df1 = {} ans df2 = {}".format(df1, df2))
+
+    return dfs_students[0]
+
+def are_equal(elems):
+    """ Test if item of elems are equal
+
+    >>> L = [[1, 2, 3], [1, 3, 2], [1, 3, 2]]
+    >>> are_equal(L)
+    True
+    >>> L = [[0, 2, 3], [1, 3, 2], [1, 3, 2]]
+    >>> are_equal(L)
+    False
+
+    """
+    first = sorted(elems[0])
+    others = [sorted(e) for e in elems[1:]]
+    diff = [e == first for e in others]
+
+    if False in diff:
+        return False
+
+    return True
+
+def flat_df_students(df, students):
+    """ Flat the ws for students """
+    flat_df = pd.DataFrame()
+    flat_data = []
+    dfT = df.T
+    for n in dfT:
+        pre_di = dfT[n][notStudent].to_dict()
+        for e in students:
+            data = pre_di.copy()
+            data["Eleve"] = e
+            data["Note"] = dfT[n].loc[e]
+            flat_data.append(data)
+    return pd.DataFrame.from_dict(flat_data)
+
+def get_all_marks(ws, marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]):
+    """ Extract marks from marks_sheetnames
+
+    :param ws: TODO
+    :returns: TODO
+
+    """
+    for sheetname in marks_sheetnames:
+        try:
+            marks = ws.parse(sheetname)
+        except xlrd.biffh.XLRDError:
+            pass
+
+def extract_flat_marks(ws):
+    """ Extract, flat and contact marks from the worksheet
+
+    :param ws: TODO
+    :returns: TODO
+
+    """
+    marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]
+
+    sheets = []
+    for sheetname in marks_sheetnames:
+        try:
+            sheets.append(ws.parse(sheetname))
+        except xlrd.biffh.XLRDError:
+            pass
+
+    students = check_students(sheets)
+
+    flat_df = pd.DataFrame()
+    for sheet in sheets:
+        flat = flat_df_students(sheet, students)
+        flat_df = pd.concat([flat_df, flat])
+
+    return flat_df
+
+
+
+
+
+# -----------------------------
+# Reglages pour 'vim'
+# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
+# cursor: 16 del