repytex/notes_tools/tools/df_marks_manip.py

#!/usr/bin/env python
# encoding: utf-8

import pandas as pd
import numpy as np
from math import ceil

# Values manipulations

def round_half_point(val):
    try:
        return 0.5 * ceil(2.0 * val)
    except ValueError:
        return val
    except TypeError:
        return val

latex_caract = ["\\NoRep", "\\RepZ", "\\RepU", "\\RepD", "\\RepT"]
def note_to_rep(x):
    r""" Transform a Note to the latex caracter

    :param x: dictionnary with "Niveau" and "Note" keys

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> note_to_rep(df.loc[0])
    1.0
    >>> note_to_rep(df.loc[4])
    '\\RepU'
    """
    if x["Niveau"]:
        if pd.isnull(x["Note"]):
            return latex_caract[0]
        elif x["Note"] in range(4):
            return latex_caract[int(x["Note"])+1]
    return x["Note"]

def note_to_mark(x):
    """ Compute the mark when it is a "Nivea" note

    :param x: dictionnary with "Niveau", "Note" and "Bareme" keys

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> note_to_mark(df.loc[0])
    1.0
    >>> note_to_mark(df.loc[10])
    1.3333333333333333
    """

    if x["Niveau"]:
        return x["Note"] * x["Bareme"] / 3
    return x["Note"]

def question_uniq_formater(row):
    """ Create a kind of unique description of the question

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> question_uniq_formater(df.loc[0])
    'Ex1 Q1'
    >>> question_uniq_formater(df.loc[10])
    'Ex1 Q1'

    """
    ans = ""
    try:
        int(row['Exercice'])
    except ValueError:
        ans += str(row["Exercice"])
    else:
        ans += "Exo"+str(row["Exercice"])

    ans += " "

    try:
        int(row["Question"])
    except ValueError:
        if not pd.isnull(row["Question"]):
            ans += str(row["Question"])
    else:
        ans += "Qu"+str(row["Question"])

    try:
        row["Commentaire"]
    except KeyError:
        pass
    else:
        if not pd.isnull(row["Commentaire"]):
            ans += " ({})".format(row["Commentaire"])
    return ans

# DataFrame columns manipulations

def compute_marks(df):
    """ Add Mark column to df

    :param df: DataFrame with "Note", "Niveau" and "Bareme" columns.

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> compute_marks(df)
    0     1.000000
    1     0.330000
    2     2.000000
    3     1.500000
    4     0.666667
    5     2.000000
    6     0.666000
    7     1.000000
    8     1.500000
    9     1.000000
    10    1.333333
    11    2.000000
    dtype: float64
    """
    return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1)

def compute_latex_rep(df):
    """ Add Latex_rep column to df

    :param df: DataFrame with "Note" and "Niveau" columns.

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> compute_latex_rep(df)
    0         1
    1      0.33
    2         2
    3       1.5
    4     \RepU
    5     \RepT
    6     0.666
    7         1
    8       1.5
    9         1
    10    \RepD
    11    \RepT
    dtype: object
    """
    return df[["Note", "Niveau"]].apply(note_to_rep, axis=1).fillna("??")

def compute_normalized(df):
    """ Compute the normalized mark (Mark / Bareme)

    :param df: DataFrame with "Mark" and "Bareme" columns

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df["Mark"] = compute_marks(df)
    >>> compute_normalized(df)
    0     1.000000
    1     0.330000
    2     1.000000
    3     0.750000
    4     0.333333
    5     1.000000
    6     0.666000
    7     1.000000
    8     0.750000
    9     0.500000
    10    0.666667
    11    1.000000
    dtype: float64
    """
    return df["Mark"] / df["Bareme"]

def compute_question_description(df):
    """ Compute the unique description of a question """
    return df.apply(question_uniq_formater, axis = 1)

# Computing custom values

def compute_exo_marks(df):
    """ Compute Exercice level marks

    :param df: the original marks
    :returns: DataFrame with computed marks

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df["Mark"] = compute_marks(df)
    >>> compute_exo_marks(df)
      Eleve Nom Exercice        Date Trimestre  Bareme  Mark Question  Niveau
    0    E1  N1      Ex1  16/09/2016         1     2.0   1.5    Total       0
    1    E1  N1      Ex2  16/09/2016         1     4.0   3.5    Total       0
    2    E1  N2      Ex1  01/10/2016         1     2.0   1.0    Total       0
    3    E1  N2      Ex2  01/10/2016         1     2.0   2.0    Total       0
    4    E2  N1      Ex1  16/09/2016         1     2.0   2.0    Total       0
    5    E2  N1      Ex2  16/09/2016         1     4.0   2.5    Total       0
    6    E2  N2      Ex1  01/10/2016         1     2.0   1.5    Total       0
    7    E2  N2      Ex2  01/10/2016         1     2.0   2.0    Total       0


    """
    exo_pt = pd.pivot_table(df,
              index = [ "Eleve", "Nom", "Exercice", "Date", "Trimestre"],
              values = ["Bareme", "Mark"],
              aggfunc=np.sum,
              ).applymap(round_half_point)

    exo = exo_pt.reset_index()
    exo["Question"] = "Total"
    exo["Niveau"] = 0
    return exo

def compute_eval_marks(df):
    """ Compute Nom level marks from the dataframe using only row with Total in Question

    :param df: DataFrame with value Total in Question column
    :returns: DataFrame with evaluation marks

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df["Mark"] = compute_marks(df)
    >>> df_exo = compute_exo_marks(df)
    >>> compute_eval_marks(df_exo)
      Eleve Nom Trimestre  Bareme        Date  Mark Exercice  Niveau
    0    E1  N1         1     6.0  16/09/2016   5.0    Total       0
    1    E1  N2         1     4.0  01/10/2016   3.0    Total       0
    2    E2  N1         1     6.0  16/09/2016   4.5    Total       0
    3    E2  N2         1     4.0  01/10/2016   3.5    Total       0

    """
    def date_format(dates):
        date_l = list(dates.unique())
        if len(date_l) == 1:
            return date_l[0]
        else:
            return "Trimestre"

    eval_m = pd.DataFrame()
    for eval_name in df["Nom"].unique():
        eval_df = df[df["Nom"] == eval_name]
        dates = eval_df["Date"].unique()
        if len(dates) > 1:
            # Les devoirs sur la durée, les NaN ne sont pas pénalisants
            # On les enlèves
            eval_df = eval_df.dropna(subset=["Mark"])
            dates = ["Trimestre"]

        eval_pt = pd.pivot_table(eval_df,
                index = [ "Eleve", "Nom", "Trimestre"],
                values = ["Bareme", "Mark", "Normalized", "Date"],
                aggfunc={"Bareme": np.sum, "Mark": np.sum,"Normalized":np.mean, "Date":lambda x:dates[0]},
                )
        eval_pt = eval_pt.reset_index()
        eval_m = pd.concat([eval_m, eval_pt])


    return eval_m

def digest_flat_df(flat_df):
    r""" Compute necessary element to make a flat df usable for analysis.

    >>> from numpy import nan
    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   nan, 0, 0, nan, nan, nan],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> quest_df, exo_df, eval_df = digest_flat_df(df)
    >>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest"]]
       Eleve Nom      Mark Latex_rep  Normalized Uniq_quest
    0     E1  N1  1.000000         1    1.000000     Ex1 Q1
    1     E1  N1  0.330000      0.33    0.330000     Ex1 Q2
    2     E1  N1  2.000000         2    1.000000     Ex2 Q1
    3     E1  N1  1.500000       1.5    0.750000     Ex2 Q2
    4     E1  N2  0.666667     \RepU    0.333333     Ex1 Q1
    5     E1  N2  2.000000     \RepT    1.000000     Ex2 Q1
    6     E2  N1       NaN        ??         NaN     Ex1 Q1
    7     E2  N1  0.000000         0    0.000000     Ex1 Q2
    8     E2  N1  0.000000         0    0.000000     Ex2 Q1
    9     E2  N1       NaN        ??         NaN     Ex2 Q2
    10    E2  N2       NaN    \NoRep         NaN     Ex1 Q1
    11    E2  N2       NaN    \NoRep         NaN     Ex2 Q1
    >>> exo_df[['Eleve', "Nom", "Exercice", "Mark", "Normalized"]]
      Eleve Nom Exercice  Mark  Normalized
    0    E1  N1      Ex1   1.5       0.750
    1    E1  N1      Ex2   3.5       0.875
    2    E1  N2      Ex1   1.0       0.500
    3    E1  N2      Ex2   2.0       1.000
    4    E2  N1      Ex1   0.0       0.000
    5    E2  N1      Ex2   0.0       0.000
    6    E2  N2      Ex1   NaN         NaN
    7    E2  N2      Ex2   NaN         NaN
    >>> eval_df[['Eleve', "Nom", "Mark", "Normalized"]]
      Eleve Nom  Mark  Normalized
    0    E1  N1   5.0    0.833333
    1    E1  N2   3.0    0.750000
    2    E2  N1   0.0    0.000000
    3    E2  N2   NaN         NaN
    """
    df = flat_df.copy()
    df["Mark"] = compute_marks(flat_df)
    df["Latex_rep"] = compute_latex_rep(flat_df)
    df["Normalized"] = compute_normalized(df)
    df["Uniq_quest"] = compute_question_description(df)

    exo_df = compute_exo_marks(df)
    exo_df["Normalized"] = compute_normalized(exo_df)
    eval_df = compute_eval_marks(exo_df)
    eval_df["Normalized"] = compute_normalized(eval_df)

    return df, exo_df, eval_df

def students_pov(quest_df, exo_df, eval_df):
    """

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> quest_df, exo_df, eval_df = digest_flat_df(df)
    >>> std_pov = students_pov(quest_df, exo_df, eval_df)
    >>> std = std_pov[0]
    >>> std["Nom"]
    'E1'
    >>> "{} / {}".format(std["Total"]["Mark"], std["Total"]["Bareme"])
    '5.0 / 6.0'
    >>> for exo in std["Exercices"]:
    ...    print("{}: {} / {}".format(exo["Nom"], exo["Total"]["Mark"], exo["Total"]["Bareme"]))
    Ex1: 1.5 / 2.0
    Ex2: 3.5 / 4.0
    >>> exo = std["Exercices"][0]
    >>> for _,q in exo["Questions"].iterrows():
    ...    print("{} : {}".format(q["Question"], q["Latex_rep"]))
    Q1 : 1.0
    Q2 : 0.33
    Q1 : \RepU

    """
    es = []
    for e in eval_df["Eleve"].unique():
        eleve = {"Nom":e}
        e_quest = quest_df[quest_df["Eleve"] == e]
        e_exo = exo_df[exo_df["Eleve"] == e]
        #e_df = ds_df[ds_df["Eleve"] == e][["Exercice", "Question", "Bareme", "Commentaire", "Niveau", "Mark", "Latex_rep"]]
        eleve["Total"] = eval_df[eval_df["Eleve"]==e].iloc[0]

        exos = []
        for exo in e_exo["Exercice"].unique():
            ex = {"Nom":exo}
            ex["Total"] = e_exo[e_exo["Exercice"]==exo].iloc[0]
            ex["Questions"] = e_quest[e_quest["Exercice"] == exo]
            exos.append(ex)
        eleve["Exercices"] = exos

        es.append(eleve)
    return es


# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del