NaN management

2016-11-22 15:20:23 +03:00
parent ee6274fd0a
commit b2b9723fd9
2 changed files with 34 additions and 3 deletions
--- a/notes_tools/tools/df_marks_manip.py
+++ b/notes_tools/tools/df_marks_manip.py
@@ -150,7 +150,7 @@ def compute_marks(df):
    11    2.000000
    dtype: float64
    """
-    return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1).fillna(0)
+    return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1)

 def compute_latex_rep(df):
    """ Add Latex_rep column to df
@@ -317,8 +317,9 @@ def compute_eval_marks(df):
    return eval_m

 def digest_flat_df(flat_df):
-    """ Compute necessary element to make a flat df usable for analysis.
+    r""" Compute necessary element to make a flat df usable for analysis.

+    >>> from numpy import nan
    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2,
    ...    "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"],
@@ -327,10 +328,40 @@ def digest_flat_df(flat_df):
    ...    "Trimestre": ["1"]*12,
    ...    "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2,
-    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    "Note":[1, 0.33, 2, 1.5, 1, 3,   nan, 0, 0, nan, nan, nan],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> quest_df, exo_df, eval_df = digest_flat_df(df)
+    >>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest"]]
+       Eleve Nom      Mark Latex_rep  Normalized Uniq_quest
+    0     E1  N1  1.000000         1    1.000000     Ex1 Q1
+    1     E1  N1  0.330000      0.33    0.330000     Ex1 Q2
+    2     E1  N1  2.000000         2    1.000000     Ex2 Q1
+    3     E1  N1  1.500000       1.5    0.750000     Ex2 Q2
+    4     E1  N2  0.666667     \RepU    0.333333     Ex1 Q1
+    5     E1  N2  2.000000     \RepT    1.000000     Ex2 Q1
+    6     E2  N1       NaN        ??         NaN     Ex1 Q1
+    7     E2  N1  0.000000         0    0.000000     Ex1 Q2
+    8     E2  N1  0.000000         0    0.000000     Ex2 Q1
+    9     E2  N1       NaN        ??         NaN     Ex2 Q2
+    10    E2  N2       NaN    \NoRep         NaN     Ex1 Q1
+    11    E2  N2       NaN    \NoRep         NaN     Ex2 Q1
+    >>> exo_df[['Eleve', "Nom", "Exercice", "Mark", "Normalized"]]
+      Eleve Nom Exercice  Mark  Normalized
+    0    E1  N1      Ex1   1.5       0.750
+    1    E1  N1      Ex2   3.5       0.875
+    2    E1  N2      Ex1   1.0       0.500
+    3    E1  N2      Ex2   2.0       1.000
+    4    E2  N1      Ex1   0.0       0.000
+    5    E2  N1      Ex2   0.0       0.000
+    6    E2  N2      Ex1   NaN         NaN
+    7    E2  N2      Ex2   NaN         NaN
+    >>> eval_df[['Eleve', "Nom", "Mark", "Normalized"]]
+      Eleve Nom  Mark  Normalized
+    0    E1  N1   5.0    0.833333
+    1    E1  N2   3.0    0.750000
+    2    E2  N1   0.0    0.000000
+    3    E2  N2   NaN         NaN
    """
    df = flat_df.copy()
    df["Mark"] = compute_marks(flat_df)
--- a/samples/312.xlsx
+++ b/samples/312.xlsx