repytex/notes_tools/tools/df_marks_manip.py

#!/usr/bin/env python
# encoding: utf-8

import pandas as pd
import numpy as np
from math import ceil

# Values manipulations

def round_half_point(val):
    try:
        return 0.5 * ceil(2.0 * val)
    except ValueError:
        return val

latex_caract = ["\\NoRep", "\\RepZ", "\\RepU", "\\RepD", "\\RepT"]
def note_to_rep(x):
    r""" Transform a Note to the latex caracter

    :param x: dictionnary with "Niveau" and "Note" keys

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> note_to_rep(df.loc[0])
    1.0
    >>> note_to_rep(df.loc[4])
    '\\RepU'
    """
    if x["Niveau"]:
        if pd.isnull(x["Note"]):
            return latex_caract[0]
        elif x["Note"] in range(4):
            return latex_caract[int(x["Note"])+1]
    return x["Note"]

def note_to_mark(x):
    """ Compute the mark when it is a "Nivea" note

    :param x: dictionnary with "Niveau", "Note" and "Bareme" keys

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> note_to_mark(df.loc[0])
    1.0
    >>> note_to_mark(df.loc[10])
    1.3333333333333333
    """

    if x["Niveau"]:
        return x["Note"] * x["Bareme"] / 3
    return x["Note"]

# DataFrame columns manipulations

def compute_marks(df):
    """ Add Mark column to df

    :param df: DataFrame with "Note", "Niveau" and "Bareme" columns.

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> compute_marks(df)
    0     1.000000
    1     0.330000
    2     2.000000
    3     1.500000
    4     0.666667
    5     2.000000
    6     0.666000
    7     1.000000
    8     1.500000
    9     1.000000
    10    1.333333
    11    2.000000
    dtype: float64
    """
    return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1).fillna(0)

def compute_latex_rep(df):
    """ Add Latex_rep column to df

    :param df: DataFrame with "Note" and "Niveau" columns.

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> compute_latex_rep(df)
    0         1
    1      0.33
    2         2
    3       1.5
    4     \RepU
    5     \RepT
    6     0.666
    7         1
    8       1.5
    9         1
    10    \RepD
    11    \RepT
    dtype: object
    """
    return df[["Note", "Niveau"]].apply(note_to_rep, axis=1).fillna("??")

def compute_normalized(df):
    """ Compute the normalized mark (Mark / Bareme)

    :param df: DataFrame with "Mark" and "Bareme" columns

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df["Mark"] = compute_marks(df)
    >>> compute_normalized(df)
    0     1.000000
    1     0.330000
    2     1.000000
    3     0.750000
    4     0.333333
    5     1.000000
    6     0.666000
    7     1.000000
    8     0.750000
    9     0.500000
    10    0.666667
    11    1.000000
    dtype: float64
    """
    return df["Mark"] / df["Bareme"]

# Computing custom values

def compute_exo_marks(df):
    """ Compute Exercice level marks

    :param df: the original marks
    :returns: DataFrame with computed marks

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df["Mark"] = compute_marks(df)
    >>> compute_exo_marks(df)
      Eleve Nom Exercice        Date Trimestre  Bareme  Mark Question  Niveau
    0    E1  N1      Ex1  16/09/2016         1     2.0   1.5    Total       0
    1    E1  N1      Ex2  16/09/2016         1     4.0   3.5    Total       0
    2    E1  N2      Ex1  01/10/2016         1     2.0   1.0    Total       0
    3    E1  N2      Ex2  01/10/2016         1     2.0   2.0    Total       0
    4    E2  N1      Ex1  16/09/2016         1     2.0   2.0    Total       0
    5    E2  N1      Ex2  16/09/2016         1     4.0   2.5    Total       0
    6    E2  N2      Ex1  01/10/2016         1     2.0   1.5    Total       0
    7    E2  N2      Ex2  01/10/2016         1     2.0   2.0    Total       0


    """
    exo_pt = pd.pivot_table(df,
              index = [ "Eleve", "Nom", "Exercice", "Date", "Trimestre"],
              values = ["Bareme", "Mark"],
              aggfunc=np.sum,
              ).applymap(round_half_point)

    exo = exo_pt.reset_index()
    exo["Question"] = "Total"
    exo["Niveau"] = 0
    return exo

def compute_eval_marks(df):
    """ Compute Nom level marks from the dataframe using only row with Total in Question

    :param df: DataFrame with value Total in Question column
    :returns: DataFrame with evaluation marks

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df["Mark"] = compute_marks(df)
    >>> df_exo = compute_exo_marks(df)
    >>> compute_eval_marks(df_exo)
      Eleve Nom        Date Trimestre  Bareme  Mark Exercice  Niveau
    0    E1  N1  16/09/2016         1     6.0   5.0    Total       0
    1    E1  N2  01/10/2016         1     4.0   3.0    Total       0
    2    E2  N1  16/09/2016         1     6.0   4.5    Total       0
    3    E2  N2  01/10/2016         1     4.0   3.5    Total       0

    """
    exo = df[df["Question"] == "Total"]
    eval_pt = pd.pivot_table(exo,
              index = [ "Eleve", "Nom", "Date", "Trimestre"],
              values = ["Bareme", "Mark"],
              aggfunc=np.sum,
              ).applymap(round_half_point)

    eval_m = eval_pt.reset_index()
    eval_m["Exercice"] = "Total"
    eval_m["Niveau"] = 0
    return eval_m

def digest_flat_df(flat_df):
    """ Compute necessary element to make a flat df usable for analysis.

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> quest_df, exo_df, eval_df = digest_flat_df(df)
    """
    df = flat_df.copy()
    df["Mark"] = compute_marks(flat_df)
    df["Latex_rep"] = compute_latex_rep(flat_df)
    df["Normalized"] = compute_normalized(df)

    exo_df = compute_exo_marks(df)
    exo_df["Normalized"] = compute_normalized(exo_df)
    eval_df = compute_eval_marks(exo_df)
    eval_df["Normalized"] = compute_normalized(eval_df)

    return df, exo_df, eval_df

def students_pov(quest_df, exo_df, eval_df):
    """

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> quest_df, exo_df, eval_df = digest_flat_df(df)
    >>> std_pov = students_pov(quest_df, exo_df, eval_df)
    >>> std = std_pov[0]
    >>> std["Nom"]
    'E1'
    >>> "{} / {}".format(std["Total"]["Mark"], std["Total"]["Bareme"])
    '5.0 / 6.0'
    >>> for exo in std["Exercices"]:
    ...    print("{}: {} / {}".format(exo["Nom"], exo["Total"]["Mark"], exo["Total"]["Bareme"]))
    Ex1: 1.5 / 2.0
    Ex2: 3.5 / 4.0
    >>> exo = std["Exercices"][0]
    >>> for _,q in exo["Questions"].iterrows():
    ...    print("{} : {}".format(q["Question"], q["Latex_rep"]))
    Q1 : 1.0
    Q2 : 0.33
    Q1 : \RepU

    """
    es = []
    for e in eval_df["Eleve"].unique():
        eleve = {"Nom":e}
        e_quest = quest_df[quest_df["Eleve"] == e]
        e_exo = exo_df[exo_df["Eleve"] == e]
        #e_df = ds_df[ds_df["Eleve"] == e][["Exercice", "Question", "Bareme", "Commentaire", "Niveau", "Mark", "Latex_rep"]]
        eleve["Total"] = eval_df[eval_df["Eleve"]==e].iloc[0]

        exos = []
        for exo in e_exo["Exercice"].unique():
            ex = {"Nom":exo}
            ex["Total"] = e_exo[e_exo["Exercice"]==exo].iloc[0]
            ex["Questions"] = e_quest[e_quest["Exercice"] == exo]
            exos.append(ex)
        eleve["Exercices"] = exos

        es.append(eleve)
    return es


# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del