Feat: Base for student exploration
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
from .csv_extraction import flat_clear_csv
|
||||
from .csv_extraction import flat_df_students, flat_df_for
|
||||
from .df_marks_manip import pp_q_scores
|
||||
|
||||
@@ -8,6 +8,7 @@ from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE
|
||||
|
||||
pd.set_option("Precision", 2)
|
||||
|
||||
|
||||
def try_replace(x, old, new):
|
||||
try:
|
||||
return str(x).replace(old, new)
|
||||
@@ -26,8 +27,10 @@ def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||
return students
|
||||
|
||||
|
||||
def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||
""" Flat the ws for students
|
||||
def flat_df_students(
|
||||
df, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
|
||||
):
|
||||
""" Flat the dataframe by returning a dataframe with on student on each line
|
||||
|
||||
:param df: the dataframe (one row per questions)
|
||||
:param no_student_columns: columns that are not students
|
||||
@@ -52,18 +55,45 @@ def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||
value_name=COLUMNS["score"],
|
||||
).dropna(subset=[COLUMNS["score"]])
|
||||
)
|
||||
if postprocessing:
|
||||
return postprocess(pd.concat(scores))
|
||||
return pd.concat(scores)
|
||||
|
||||
|
||||
def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||
""" Flat and clear the dataframe extracted from csv
|
||||
def flat_df_for(
|
||||
df, student, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
|
||||
):
|
||||
""" Extract the data only for one student
|
||||
|
||||
:param csv_df: data frame read from csv
|
||||
:param df: the dataframe (one row per questions)
|
||||
:param no_student_columns: columns that are not students
|
||||
:return: dataframe with one row per questions and students
|
||||
|
||||
Columns of csv files:
|
||||
|
||||
- NO_ST_COLUMNS meta data on questions
|
||||
- one for each students
|
||||
|
||||
"""
|
||||
students = extract_students(df, no_student_columns)
|
||||
if student not in students:
|
||||
raise KeyError("This student is not in the table")
|
||||
st_df = df[list(no_student_columns) + [student]]
|
||||
st_df = st_df.rename(columns={student: COLUMNS["score"]}).dropna(
|
||||
subset=[COLUMNS["score"]]
|
||||
)
|
||||
if postprocessing:
|
||||
return postprocess(st_df)
|
||||
return st_df
|
||||
|
||||
|
||||
def postprocess(df):
|
||||
""" Postprocessing score dataframe
|
||||
|
||||
- Replace na with an empty string
|
||||
- Replace "NOANSWER" with -1
|
||||
- Turn commas number to dot numbers
|
||||
"""
|
||||
df = flat_df_students(csv_df)
|
||||
|
||||
df[COLUMNS["question"]].fillna("", inplace=True)
|
||||
df[COLUMNS["exercise"]].fillna("", inplace=True)
|
||||
@@ -76,9 +106,8 @@ def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||
.apply(lambda x: try_replace(x, ",", "."))
|
||||
)
|
||||
df[COLUMNS["score_rate"]] = pd.to_numeric(
|
||||
df[COLUMNS["score_rate"]]
|
||||
.apply(lambda x: try_replace(x, ",", ".")),
|
||||
errors="coerce"
|
||||
df[COLUMNS["score_rate"]].apply(lambda x: try_replace(x, ",", ".")),
|
||||
errors="coerce",
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
Reference in New Issue
Block a user