Feat: Base for student exploration

2019-08-20 21:15:39 +02:00
parent 1fc7270bed
commit 7bb224a48f
4 changed files with 1281 additions and 165 deletions
--- a/recopytex/init.py
+++ b/recopytex/init.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
 # encoding: utf-8

-from .csv_extraction import flat_clear_csv
+from .csv_extraction import flat_df_students, flat_df_for
 from .df_marks_manip import pp_q_scores
--- a/recopytex/csv_extraction.py
+++ b/recopytex/csv_extraction.py
@@ -8,6 +8,7 @@ from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE

 pd.set_option("Precision", 2)

+
 def try_replace(x, old, new):
    try:
        return str(x).replace(old, new)
@@ -26,8 +27,10 @@ def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()):
    return students


-def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
-    """ Flat the ws for students
+def flat_df_students(
+    df, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
+):
+    """ Flat the dataframe by returning a dataframe with on student on each line

    :param df: the dataframe (one row per questions)
    :param no_student_columns: columns that are not students
@@ -52,18 +55,45 @@ def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
                value_name=COLUMNS["score"],
            ).dropna(subset=[COLUMNS["score"]])
        )
+    if postprocessing:
+        return postprocess(pd.concat(scores))
    return pd.concat(scores)


-def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
-    """ Flat and clear the dataframe extracted from csv
+def flat_df_for(
+    df, student, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
+):
+    """ Extract the data only for one student

-    :param csv_df: data frame read from csv
+    :param df: the dataframe (one row per questions)
    :param no_student_columns: columns that are not students
    :return: dataframe with one row per questions and students

+    Columns of csv files:
+
+    - NO_ST_COLUMNS meta data on questions
+    - one for each students
+
+    """
+    students = extract_students(df, no_student_columns)
+    if student not in students:
+        raise KeyError("This student is not in the table")
+    st_df = df[list(no_student_columns) + [student]]
+    st_df = st_df.rename(columns={student: COLUMNS["score"]}).dropna(
+        subset=[COLUMNS["score"]]
+    )
+    if postprocessing:
+        return postprocess(st_df)
+    return st_df
+
+
+def postprocess(df):
+    """ Postprocessing score dataframe 
+
+    - Replace na with an empty string
+    - Replace "NOANSWER" with -1
+    - Turn commas number to dot numbers
    """
-    df = flat_df_students(csv_df)

    df[COLUMNS["question"]].fillna("", inplace=True)
    df[COLUMNS["exercise"]].fillna("", inplace=True)
@@ -76,9 +106,8 @@ def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
        .apply(lambda x: try_replace(x, ",", "."))
    )
    df[COLUMNS["score_rate"]] = pd.to_numeric(
-        df[COLUMNS["score_rate"]]
-        .apply(lambda x: try_replace(x, ",", ".")),
-        errors="coerce"
+        df[COLUMNS["score_rate"]].apply(lambda x: try_replace(x, ",", ".")),
+        errors="coerce",
    )

    return df