Feat: first analysis and plots
This commit is contained in:
parent
2296615cb4
commit
358af2aabf
@ -1,15 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
NO_STUDENT_COLUMNS = [
|
||||
"Trimestre",
|
||||
"Nom",
|
||||
"Date",
|
||||
"Exercice",
|
||||
"Question",
|
||||
"Competence",
|
||||
"Domaine",
|
||||
"Commentaire",
|
||||
"Bareme",
|
||||
"Niveau",
|
||||
]
|
||||
from .csv_extraction import flat_clear_csv
|
||||
from .df_marks_manip import pp_q_scores
|
||||
|
30
recopytex/config.py
Normal file
30
recopytex/config.py
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
NO_ST_COLUMNS = {
|
||||
"term": "Trimestre",
|
||||
"assessment": "Nom",
|
||||
"date": "Date",
|
||||
"exercise": "Exercice",
|
||||
"question": "Question",
|
||||
"competence": "Competence",
|
||||
"theme": "Domaine",
|
||||
"comment": "Commentaire",
|
||||
"score_rate": "Bareme",
|
||||
"is_leveled": "Est_nivele",
|
||||
}
|
||||
|
||||
COLUMNS = {
|
||||
**NO_ST_COLUMNS,
|
||||
"student": "Eleve",
|
||||
"score": "Score",
|
||||
"mark": "Note",
|
||||
"level": "Niveau",
|
||||
"normalized": "Normalise",
|
||||
}
|
||||
|
||||
VALIDSCORE = {
|
||||
"NOTFILLED": "", # The item is not scored yet
|
||||
"NOANSWER": ".", # Student gives no answer (this score will impact the fianl mark)
|
||||
"ABS": "a", # Student has absent (this score won't be impact the final mark)
|
||||
}
|
@ -4,12 +4,12 @@
|
||||
""" Extracting data from xlsx files """
|
||||
|
||||
import pandas as pd
|
||||
from . import NO_STUDENT_COLUMNS
|
||||
from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE
|
||||
|
||||
pd.set_option("Precision", 2)
|
||||
|
||||
|
||||
def extract_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||
""" Extract the list of students from df
|
||||
|
||||
:param df: the dataframe
|
||||
@ -20,7 +20,7 @@ def extract_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
return students
|
||||
|
||||
|
||||
def flat_df_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||
""" Flat the ws for students
|
||||
|
||||
:param df: the dataframe (one row per questions)
|
||||
@ -29,7 +29,7 @@ def flat_df_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
|
||||
Columns of csv files:
|
||||
|
||||
- NO_STUDENT_COLUMNS
|
||||
- NO_ST_COLUMNS meta data on questions
|
||||
- one for each students
|
||||
|
||||
This function flat student's columns to "student" and "score"
|
||||
@ -42,14 +42,14 @@ def flat_df_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
df,
|
||||
id_vars=no_student_columns,
|
||||
value_vars=st,
|
||||
var_name="student",
|
||||
value_name="score",
|
||||
var_name=COLUMNS["student"],
|
||||
value_name=COLUMNS["score"],
|
||||
)
|
||||
)
|
||||
return pd.concat(scores)
|
||||
return pd.concat(scores).dropna(subset=[COLUMNS["score"]])
|
||||
|
||||
|
||||
def flat_clear_csv(csv_df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||
""" Flat and clear the dataframe extracted from csv
|
||||
|
||||
:param csv_df: data frame read from csv
|
||||
@ -59,12 +59,16 @@ def flat_clear_csv(csv_df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
"""
|
||||
df = flat_df_students(csv_df)
|
||||
|
||||
df.columns = df.columns.map(lambda x: x.lower())
|
||||
df[COLUMNS["question"]].fillna("", inplace=True)
|
||||
df[COLUMNS["exercise"]].fillna("", inplace=True)
|
||||
df[COLUMNS["comment"]].fillna("", inplace=True)
|
||||
df[COLUMNS["competence"]].fillna("", inplace=True)
|
||||
|
||||
df["question"].fillna("", inplace=True)
|
||||
df["exercice"].fillna("", inplace=True)
|
||||
df["commentaire"].fillna("", inplace=True)
|
||||
df["competence"].fillna("", inplace=True)
|
||||
df[COLUMNS["score"]] = pd.to_numeric(
|
||||
df[COLUMNS["score"]]
|
||||
.replace(VALIDSCORE["NOANSWER"], -1)
|
||||
.apply(lambda x: str(x).replace(",", "."))
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
|
205
recopytex/df_marks_manip.py
Normal file
205
recopytex/df_marks_manip.py
Normal file
@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from math import ceil, floor
|
||||
from .config import COLUMNS, VALIDSCORE
|
||||
|
||||
# Values manipulations
|
||||
|
||||
|
||||
def round_half_point(val):
|
||||
try:
|
||||
return 0.5 * ceil(2.0 * val)
|
||||
except ValueError:
|
||||
return val
|
||||
except TypeError:
|
||||
return val
|
||||
|
||||
|
||||
def score_to_mark(x):
|
||||
""" Compute the mark
|
||||
|
||||
if the item is leveled then the score is multiply by the score_rate
|
||||
otherwise it copies the score
|
||||
|
||||
:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys
|
||||
|
||||
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||
... }
|
||||
>>> df = pd.DataFrame(d)
|
||||
>>> score_to_mark(df.loc[0])
|
||||
1.0
|
||||
>>> score_to_mark(df.loc[10])
|
||||
1.3333333333333333
|
||||
"""
|
||||
# -1 is no answer
|
||||
if x[COLUMNS["score"]] == -1:
|
||||
return 0
|
||||
|
||||
if x[COLUMNS["is_leveled"]]:
|
||||
if x[COLUMNS["score"]] not in [0, 1, 2, 3]:
|
||||
raise ValueError(f"The evaluation is out of range: {x[COLUMNS['score']]} at {x}")
|
||||
return x[COLUMNS["score"]] * x[COLUMNS["score_rate"]] / 3
|
||||
|
||||
if x[COLUMNS["score"]] > x[COLUMNS["score_rate"]]:
|
||||
raise ValueError(
|
||||
f"The score ({x['score']}) is greated than the rating scale ({x[COLUMNS['score_rate']]}) at {x}"
|
||||
)
|
||||
return x[COLUMNS["score"]]
|
||||
|
||||
|
||||
def score_to_level(x):
|
||||
""" Compute the level (".",0,1,2,3).
|
||||
|
||||
:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys
|
||||
|
||||
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||
... COLUMNS["score"]:[1, 0.33, np.nan, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||
... }
|
||||
>>> df = pd.DataFrame(d)
|
||||
>>> score_to_level(df.loc[0])
|
||||
3
|
||||
>>> score_to_level(df.loc[1])
|
||||
1
|
||||
>>> score_to_level(df.loc[2])
|
||||
'na'
|
||||
>>> score_to_level(df.loc[3])
|
||||
3
|
||||
>>> score_to_level(df.loc[5])
|
||||
3
|
||||
>>> score_to_level(df.loc[10])
|
||||
2
|
||||
"""
|
||||
# -1 is no answer
|
||||
if x[COLUMNS["score"]] == -1:
|
||||
return x[COLUMNS["score"]]
|
||||
|
||||
if x[COLUMNS["is_leveled"]]:
|
||||
return int(x[COLUMNS["score"]])
|
||||
else:
|
||||
return int(ceil(x[COLUMNS["score"]] / x[COLUMNS["score_rate"]] * 3))
|
||||
|
||||
|
||||
# DataFrame columns manipulations
|
||||
|
||||
|
||||
def compute_mark(df):
|
||||
""" Add Mark column to df
|
||||
|
||||
:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.
|
||||
|
||||
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||
... }
|
||||
>>> df = pd.DataFrame(d)
|
||||
>>> compute_mark(df)
|
||||
0 1.00
|
||||
1 0.33
|
||||
2 2.00
|
||||
3 1.50
|
||||
4 0.67
|
||||
5 2.00
|
||||
6 0.67
|
||||
7 1.00
|
||||
8 1.50
|
||||
9 1.00
|
||||
10 1.33
|
||||
11 2.00
|
||||
dtype: float64
|
||||
"""
|
||||
return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
|
||||
score_to_mark, axis=1
|
||||
)
|
||||
|
||||
|
||||
def compute_level(df):
|
||||
""" Add Mark column to df
|
||||
|
||||
:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.
|
||||
|
||||
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||
... COLUMNS["score"]:[np.nan, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||
... }
|
||||
>>> df = pd.DataFrame(d)
|
||||
>>> compute_level(df)
|
||||
0 na
|
||||
1 1
|
||||
2 3
|
||||
3 3
|
||||
4 1
|
||||
5 3
|
||||
6 2
|
||||
7 3
|
||||
8 3
|
||||
9 2
|
||||
10 2
|
||||
11 3
|
||||
dtype: object
|
||||
"""
|
||||
return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
|
||||
score_to_level, axis=1
|
||||
)
|
||||
|
||||
|
||||
def compute_normalized(df):
|
||||
""" Compute the normalized mark (Mark / score_rate)
|
||||
|
||||
:param df: DataFrame with "Mark" and COLUMNS["score_rate"] columns
|
||||
|
||||
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||
... }
|
||||
>>> df = pd.DataFrame(d)
|
||||
>>> df["Mark"] = compute_marks(df)
|
||||
>>> compute_normalized(df)
|
||||
0 1.00
|
||||
1 0.33
|
||||
2 1.00
|
||||
3 0.75
|
||||
4 0.33
|
||||
5 1.00
|
||||
6 0.67
|
||||
7 1.00
|
||||
8 0.75
|
||||
9 0.50
|
||||
10 0.67
|
||||
11 1.00
|
||||
dtype: float64
|
||||
"""
|
||||
return df[COLUMNS["mark"]] / df[COLUMNS["score_rate"]]
|
||||
|
||||
|
||||
# Postprocessing question scores
|
||||
|
||||
|
||||
def pp_q_scores(df):
|
||||
""" Postprocessing questions scores dataframe
|
||||
|
||||
:param df: questions-scores dataframe
|
||||
:return: same data frame with mark, level and normalize columns
|
||||
"""
|
||||
assign = {
|
||||
COLUMNS["mark"]: compute_mark,
|
||||
COLUMNS["level"]: compute_level,
|
||||
COLUMNS["normalized"]: compute_normalized,
|
||||
}
|
||||
return df.assign(**assign)
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Reglages pour 'vim'
|
||||
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
||||
# cursor: 16 del
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user