Feat: first analysis and plots
This commit is contained in:
parent
2296615cb4
commit
358af2aabf
@ -1,15 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
|
||||||
NO_STUDENT_COLUMNS = [
|
from .csv_extraction import flat_clear_csv
|
||||||
"Trimestre",
|
from .df_marks_manip import pp_q_scores
|
||||||
"Nom",
|
|
||||||
"Date",
|
|
||||||
"Exercice",
|
|
||||||
"Question",
|
|
||||||
"Competence",
|
|
||||||
"Domaine",
|
|
||||||
"Commentaire",
|
|
||||||
"Bareme",
|
|
||||||
"Niveau",
|
|
||||||
]
|
|
||||||
|
30
recopytex/config.py
Normal file
30
recopytex/config.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
NO_ST_COLUMNS = {
|
||||||
|
"term": "Trimestre",
|
||||||
|
"assessment": "Nom",
|
||||||
|
"date": "Date",
|
||||||
|
"exercise": "Exercice",
|
||||||
|
"question": "Question",
|
||||||
|
"competence": "Competence",
|
||||||
|
"theme": "Domaine",
|
||||||
|
"comment": "Commentaire",
|
||||||
|
"score_rate": "Bareme",
|
||||||
|
"is_leveled": "Est_nivele",
|
||||||
|
}
|
||||||
|
|
||||||
|
COLUMNS = {
|
||||||
|
**NO_ST_COLUMNS,
|
||||||
|
"student": "Eleve",
|
||||||
|
"score": "Score",
|
||||||
|
"mark": "Note",
|
||||||
|
"level": "Niveau",
|
||||||
|
"normalized": "Normalise",
|
||||||
|
}
|
||||||
|
|
||||||
|
VALIDSCORE = {
|
||||||
|
"NOTFILLED": "", # The item is not scored yet
|
||||||
|
"NOANSWER": ".", # Student gives no answer (this score will impact the fianl mark)
|
||||||
|
"ABS": "a", # Student has absent (this score won't be impact the final mark)
|
||||||
|
}
|
@ -4,12 +4,12 @@
|
|||||||
""" Extracting data from xlsx files """
|
""" Extracting data from xlsx files """
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from . import NO_STUDENT_COLUMNS
|
from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE
|
||||||
|
|
||||||
pd.set_option("Precision", 2)
|
pd.set_option("Precision", 2)
|
||||||
|
|
||||||
|
|
||||||
def extract_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||||
""" Extract the list of students from df
|
""" Extract the list of students from df
|
||||||
|
|
||||||
:param df: the dataframe
|
:param df: the dataframe
|
||||||
@ -20,7 +20,7 @@ def extract_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
|||||||
return students
|
return students
|
||||||
|
|
||||||
|
|
||||||
def flat_df_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||||
""" Flat the ws for students
|
""" Flat the ws for students
|
||||||
|
|
||||||
:param df: the dataframe (one row per questions)
|
:param df: the dataframe (one row per questions)
|
||||||
@ -29,7 +29,7 @@ def flat_df_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
|||||||
|
|
||||||
Columns of csv files:
|
Columns of csv files:
|
||||||
|
|
||||||
- NO_STUDENT_COLUMNS
|
- NO_ST_COLUMNS meta data on questions
|
||||||
- one for each students
|
- one for each students
|
||||||
|
|
||||||
This function flat student's columns to "student" and "score"
|
This function flat student's columns to "student" and "score"
|
||||||
@ -42,14 +42,14 @@ def flat_df_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
|||||||
df,
|
df,
|
||||||
id_vars=no_student_columns,
|
id_vars=no_student_columns,
|
||||||
value_vars=st,
|
value_vars=st,
|
||||||
var_name="student",
|
var_name=COLUMNS["student"],
|
||||||
value_name="score",
|
value_name=COLUMNS["score"],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return pd.concat(scores)
|
return pd.concat(scores).dropna(subset=[COLUMNS["score"]])
|
||||||
|
|
||||||
|
|
||||||
def flat_clear_csv(csv_df, no_student_columns=NO_STUDENT_COLUMNS):
|
def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
|
||||||
""" Flat and clear the dataframe extracted from csv
|
""" Flat and clear the dataframe extracted from csv
|
||||||
|
|
||||||
:param csv_df: data frame read from csv
|
:param csv_df: data frame read from csv
|
||||||
@ -59,12 +59,16 @@ def flat_clear_csv(csv_df, no_student_columns=NO_STUDENT_COLUMNS):
|
|||||||
"""
|
"""
|
||||||
df = flat_df_students(csv_df)
|
df = flat_df_students(csv_df)
|
||||||
|
|
||||||
df.columns = df.columns.map(lambda x: x.lower())
|
df[COLUMNS["question"]].fillna("", inplace=True)
|
||||||
|
df[COLUMNS["exercise"]].fillna("", inplace=True)
|
||||||
|
df[COLUMNS["comment"]].fillna("", inplace=True)
|
||||||
|
df[COLUMNS["competence"]].fillna("", inplace=True)
|
||||||
|
|
||||||
df["question"].fillna("", inplace=True)
|
df[COLUMNS["score"]] = pd.to_numeric(
|
||||||
df["exercice"].fillna("", inplace=True)
|
df[COLUMNS["score"]]
|
||||||
df["commentaire"].fillna("", inplace=True)
|
.replace(VALIDSCORE["NOANSWER"], -1)
|
||||||
df["competence"].fillna("", inplace=True)
|
.apply(lambda x: str(x).replace(",", "."))
|
||||||
|
)
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
205
recopytex/df_marks_manip.py
Normal file
205
recopytex/df_marks_manip.py
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from math import ceil, floor
|
||||||
|
from .config import COLUMNS, VALIDSCORE
|
||||||
|
|
||||||
|
# Values manipulations
|
||||||
|
|
||||||
|
|
||||||
|
def round_half_point(val):
|
||||||
|
try:
|
||||||
|
return 0.5 * ceil(2.0 * val)
|
||||||
|
except ValueError:
|
||||||
|
return val
|
||||||
|
except TypeError:
|
||||||
|
return val
|
||||||
|
|
||||||
|
|
||||||
|
def score_to_mark(x):
|
||||||
|
""" Compute the mark
|
||||||
|
|
||||||
|
if the item is leveled then the score is multiply by the score_rate
|
||||||
|
otherwise it copies the score
|
||||||
|
|
||||||
|
:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys
|
||||||
|
|
||||||
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||||
|
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||||
|
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||||
|
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||||
|
... }
|
||||||
|
>>> df = pd.DataFrame(d)
|
||||||
|
>>> score_to_mark(df.loc[0])
|
||||||
|
1.0
|
||||||
|
>>> score_to_mark(df.loc[10])
|
||||||
|
1.3333333333333333
|
||||||
|
"""
|
||||||
|
# -1 is no answer
|
||||||
|
if x[COLUMNS["score"]] == -1:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if x[COLUMNS["is_leveled"]]:
|
||||||
|
if x[COLUMNS["score"]] not in [0, 1, 2, 3]:
|
||||||
|
raise ValueError(f"The evaluation is out of range: {x[COLUMNS['score']]} at {x}")
|
||||||
|
return x[COLUMNS["score"]] * x[COLUMNS["score_rate"]] / 3
|
||||||
|
|
||||||
|
if x[COLUMNS["score"]] > x[COLUMNS["score_rate"]]:
|
||||||
|
raise ValueError(
|
||||||
|
f"The score ({x['score']}) is greated than the rating scale ({x[COLUMNS['score_rate']]}) at {x}"
|
||||||
|
)
|
||||||
|
return x[COLUMNS["score"]]
|
||||||
|
|
||||||
|
|
||||||
|
def score_to_level(x):
|
||||||
|
""" Compute the level (".",0,1,2,3).
|
||||||
|
|
||||||
|
:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys
|
||||||
|
|
||||||
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||||
|
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||||
|
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||||
|
... COLUMNS["score"]:[1, 0.33, np.nan, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||||
|
... }
|
||||||
|
>>> df = pd.DataFrame(d)
|
||||||
|
>>> score_to_level(df.loc[0])
|
||||||
|
3
|
||||||
|
>>> score_to_level(df.loc[1])
|
||||||
|
1
|
||||||
|
>>> score_to_level(df.loc[2])
|
||||||
|
'na'
|
||||||
|
>>> score_to_level(df.loc[3])
|
||||||
|
3
|
||||||
|
>>> score_to_level(df.loc[5])
|
||||||
|
3
|
||||||
|
>>> score_to_level(df.loc[10])
|
||||||
|
2
|
||||||
|
"""
|
||||||
|
# -1 is no answer
|
||||||
|
if x[COLUMNS["score"]] == -1:
|
||||||
|
return x[COLUMNS["score"]]
|
||||||
|
|
||||||
|
if x[COLUMNS["is_leveled"]]:
|
||||||
|
return int(x[COLUMNS["score"]])
|
||||||
|
else:
|
||||||
|
return int(ceil(x[COLUMNS["score"]] / x[COLUMNS["score_rate"]] * 3))
|
||||||
|
|
||||||
|
|
||||||
|
# DataFrame columns manipulations
|
||||||
|
|
||||||
|
|
||||||
|
def compute_mark(df):
|
||||||
|
""" Add Mark column to df
|
||||||
|
|
||||||
|
:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.
|
||||||
|
|
||||||
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||||
|
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||||
|
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||||
|
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||||
|
... }
|
||||||
|
>>> df = pd.DataFrame(d)
|
||||||
|
>>> compute_mark(df)
|
||||||
|
0 1.00
|
||||||
|
1 0.33
|
||||||
|
2 2.00
|
||||||
|
3 1.50
|
||||||
|
4 0.67
|
||||||
|
5 2.00
|
||||||
|
6 0.67
|
||||||
|
7 1.00
|
||||||
|
8 1.50
|
||||||
|
9 1.00
|
||||||
|
10 1.33
|
||||||
|
11 2.00
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
|
||||||
|
score_to_mark, axis=1
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_level(df):
|
||||||
|
""" Add Mark column to df
|
||||||
|
|
||||||
|
:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.
|
||||||
|
|
||||||
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||||
|
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||||
|
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||||
|
... COLUMNS["score"]:[np.nan, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||||
|
... }
|
||||||
|
>>> df = pd.DataFrame(d)
|
||||||
|
>>> compute_level(df)
|
||||||
|
0 na
|
||||||
|
1 1
|
||||||
|
2 3
|
||||||
|
3 3
|
||||||
|
4 1
|
||||||
|
5 3
|
||||||
|
6 2
|
||||||
|
7 3
|
||||||
|
8 3
|
||||||
|
9 2
|
||||||
|
10 2
|
||||||
|
11 3
|
||||||
|
dtype: object
|
||||||
|
"""
|
||||||
|
return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
|
||||||
|
score_to_level, axis=1
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_normalized(df):
|
||||||
|
""" Compute the normalized mark (Mark / score_rate)
|
||||||
|
|
||||||
|
:param df: DataFrame with "Mark" and COLUMNS["score_rate"] columns
|
||||||
|
|
||||||
|
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
|
||||||
|
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
|
||||||
|
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
|
||||||
|
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
|
||||||
|
... }
|
||||||
|
>>> df = pd.DataFrame(d)
|
||||||
|
>>> df["Mark"] = compute_marks(df)
|
||||||
|
>>> compute_normalized(df)
|
||||||
|
0 1.00
|
||||||
|
1 0.33
|
||||||
|
2 1.00
|
||||||
|
3 0.75
|
||||||
|
4 0.33
|
||||||
|
5 1.00
|
||||||
|
6 0.67
|
||||||
|
7 1.00
|
||||||
|
8 0.75
|
||||||
|
9 0.50
|
||||||
|
10 0.67
|
||||||
|
11 1.00
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
return df[COLUMNS["mark"]] / df[COLUMNS["score_rate"]]
|
||||||
|
|
||||||
|
|
||||||
|
# Postprocessing question scores
|
||||||
|
|
||||||
|
|
||||||
|
def pp_q_scores(df):
|
||||||
|
""" Postprocessing questions scores dataframe
|
||||||
|
|
||||||
|
:param df: questions-scores dataframe
|
||||||
|
:return: same data frame with mark, level and normalize columns
|
||||||
|
"""
|
||||||
|
assign = {
|
||||||
|
COLUMNS["mark"]: compute_mark,
|
||||||
|
COLUMNS["level"]: compute_level,
|
||||||
|
COLUMNS["normalized"]: compute_normalized,
|
||||||
|
}
|
||||||
|
return df.assign(**assign)
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------
|
||||||
|
# Reglages pour 'vim'
|
||||||
|
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
||||||
|
# cursor: 16 del
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user