add level column

2017-03-07 17:24:15 +03:00 · 2017-03-07 17:24:15 +03:00 · 0fba0017fe
parent 0abd2be854
commit 0fba0017fe
1 changed files with 92 additions and 18 deletions
--- a/notes_tools/tools/df_marks_manip.py
+++ b/notes_tools/tools/df_marks_manip.py
@ -3,7 +3,7 @@

 import pandas as pd
 import numpy as np
-from math import ceil
+from math import ceil, floor
 import logging
 logger = logging.getLogger(__name__)

@ -47,7 +47,7 @@ def note_to_rep(x):
    return x["Note"]

 def note_to_mark(x):
-    """ Compute the mark when it is a "Nivea" note
+    """ Compute the mark when it is a "Niveau" note

    :param x: dictionnary with "Niveau", "Note" and "Bareme" keys

@ -72,6 +72,46 @@ def note_to_mark(x):
        return x["Note"] * x["Bareme"] / 3
    return x["Note"]

+def note_to_level(x):
+    """ Compute the level ("na",0,1,2,3).
+
+    "na" correspond to "no answer"
+
+    :param x: dictionnary with "Niveau", "Note" and "Bareme" keys
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[1, 0.33, np.nan, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> note_to_level(df.loc[0])
+    3
+    >>> note_to_level(df.loc[1])
+    1
+    >>> note_to_level(df.loc[2])
+    'na'
+    >>> note_to_level(df.loc[3])
+    3
+    >>> note_to_level(df.loc[5])
+    3
+    >>> note_to_level(df.loc[10])
+    2
+    """
+
+    if pd.isnull(x["Note"]):
+        return "na"
+
+    if x["Niveau"]:
+        return int(x["Note"])
+    else:
+        return int(ceil(x["Note"] / x["Bareme"] * 3))
+
 def question_uniq_formater(row):
    """ Create a kind of unique description of the question 

@ -154,6 +194,39 @@ def compute_marks(df):
    """
    return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1)

+def compute_level(df):
+    """ Add Mark column to df
+
+    :param df: DataFrame with "Note", "Niveau" and "Bareme" columns.
+
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
+    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
+    ...    "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"],
+    ...    "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2,
+    ...    "Trimestre": ["1"]*12,
+    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "Note":[np.nan, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> compute_level(df)
+    0     na
+    1      1
+    2      3
+    3      3
+    4      1
+    5      3
+    6      2
+    7      3
+    8      3
+    9      2
+    10     2
+    11     3
+    dtype: object
+    """
+    return df[["Note", "Niveau", "Bareme"]].apply(note_to_level, axis=1)
+
 def compute_latex_rep(df):
    """ Add Latex_rep column to df

@ -340,24 +413,24 @@ def digest_flat_df(flat_df):
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
-    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   nan, 0, 0, nan, nan, nan],
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   np.nan, 0, 0, np.nan, np.nan, np.nan],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> quest_df, exo_df, eval_df = digest_flat_df(df)
-    >>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest"]]
-       Eleve Nom  Mark Latex_rep  Normalized Uniq_quest
-    0     E1  N1  1.00         1        1.00     Ex1 Q1
-    1     E1  N1  0.33      0.33        0.33     Ex1 Q2
-    2     E1  N1  2.00         2        1.00     Ex2 Q1
-    3     E1  N1  1.50       1.5        0.75     Ex2 Q2
-    4     E1  N2  0.67     \RepU        0.33     Ex1 Q1
-    5     E1  N2  2.00     \RepT        1.00     Ex2 Q1
-    6     E2  N1   NaN        ??         NaN     Ex1 Q1
-    7     E2  N1  0.00         0        0.00     Ex1 Q2
-    8     E2  N1  0.00         0        0.00     Ex2 Q1
-    9     E2  N1   NaN        ??         NaN     Ex2 Q2
-    10    E2  N2   NaN    \NoRep         NaN     Ex1 Q1
-    11    E2  N2   NaN    \NoRep         NaN     Ex2 Q1
+    >>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest", "Level"]]
+       Eleve Nom  Mark Latex_rep  Normalized Uniq_quest Level
+    0     E1  N1  1.00         1        1.00     Ex1 Q1     3
+    1     E1  N1  0.33      0.33        0.33     Ex1 Q2     1
+    2     E1  N1  2.00         2        1.00     Ex2 Q1     3
+    3     E1  N1  1.50       1.5        0.75     Ex2 Q2     3
+    4     E1  N2  0.67     \RepU        0.33     Ex1 Q1     1
+    5     E1  N2  2.00     \RepT        1.00     Ex2 Q1     3
+    6     E2  N1   NaN        ??         NaN     Ex1 Q1    na
+    7     E2  N1  0.00         0        0.00     Ex1 Q2     0
+    8     E2  N1  0.00         0        0.00     Ex2 Q1     0
+    9     E2  N1   NaN        ??         NaN     Ex2 Q2    na
+    10    E2  N2   NaN    \NoRep         NaN     Ex1 Q1    na
+    11    E2  N2   NaN    \NoRep         NaN     Ex2 Q1    na
    >>> exo_df[['Eleve', "Nom", "Exercice", "Mark", "Normalized"]]
      Eleve Nom Exercice  Mark  Normalized
    0    E1  N1      Ex1   1.5        0.75
@ -376,8 +449,9 @@ def digest_flat_df(flat_df):
    3      1    E2  N2         1     4.0  01/10/2016   NaN         NaN
    """
    # Remove data with "nn" (non notés)
-    df = flat_df.copy()[flat_df["Note"] != "nn"]
+    df = flat_df.copy()[flat_df["Note"].astype("object") != "nn"]
    df["Mark"] = compute_marks(df)
+    df["Level"] = compute_level(df)
    df["Latex_rep"] = compute_latex_rep(df)
    df["Normalized"] = compute_normalized(df)
    df["Uniq_quest"] = compute_question_description(df)