Feat: Base for student exploration

This commit is contained in:
2019-08-20 21:15:39 +02:00
parent 1fc7270bed
commit 7bb224a48f
4 changed files with 1281 additions and 165 deletions

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env python
# encoding: utf-8
from .csv_extraction import flat_clear_csv
from .csv_extraction import flat_df_students, flat_df_for
from .df_marks_manip import pp_q_scores

View File

@@ -8,6 +8,7 @@ from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE
pd.set_option("Precision", 2)
def try_replace(x, old, new):
try:
return str(x).replace(old, new)
@@ -26,8 +27,10 @@ def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()):
return students
def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
""" Flat the ws for students
def flat_df_students(
df, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
):
""" Flat the dataframe by returning a dataframe with on student on each line
:param df: the dataframe (one row per questions)
:param no_student_columns: columns that are not students
@@ -52,18 +55,45 @@ def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
value_name=COLUMNS["score"],
).dropna(subset=[COLUMNS["score"]])
)
if postprocessing:
return postprocess(pd.concat(scores))
return pd.concat(scores)
def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
""" Flat and clear the dataframe extracted from csv
def flat_df_for(
df, student, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
):
""" Extract the data only for one student
:param csv_df: data frame read from csv
:param df: the dataframe (one row per questions)
:param no_student_columns: columns that are not students
:return: dataframe with one row per questions and students
Columns of csv files:
- NO_ST_COLUMNS meta data on questions
- one for each students
"""
students = extract_students(df, no_student_columns)
if student not in students:
raise KeyError("This student is not in the table")
st_df = df[list(no_student_columns) + [student]]
st_df = st_df.rename(columns={student: COLUMNS["score"]}).dropna(
subset=[COLUMNS["score"]]
)
if postprocessing:
return postprocess(st_df)
return st_df
def postprocess(df):
""" Postprocessing score dataframe
- Replace na with an empty string
- Replace "NOANSWER" with -1
- Turn commas number to dot numbers
"""
df = flat_df_students(csv_df)
df[COLUMNS["question"]].fillna("", inplace=True)
df[COLUMNS["exercise"]].fillna("", inplace=True)
@@ -76,9 +106,8 @@ def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
.apply(lambda x: try_replace(x, ",", "."))
)
df[COLUMNS["score_rate"]] = pd.to_numeric(
df[COLUMNS["score_rate"]]
.apply(lambda x: try_replace(x, ",", ".")),
errors="coerce"
df[COLUMNS["score_rate"]].apply(lambda x: try_replace(x, ",", ".")),
errors="coerce",
)
return df