Feat: Base for student exploration

This commit is contained in:
Bertrand Benjamin 2019-08-20 21:15:39 +02:00
parent 1fc7270bed
commit 7bb224a48f
4 changed files with 1281 additions and 165 deletions

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python
# encoding: utf-8
from .csv_extraction import flat_clear_csv
from .csv_extraction import flat_df_students, flat_df_for
from .df_marks_manip import pp_q_scores

View File

@ -8,6 +8,7 @@ from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE
pd.set_option("Precision", 2)
def try_replace(x, old, new):
try:
return str(x).replace(old, new)
@ -26,8 +27,10 @@ def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()):
return students
def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
""" Flat the ws for students
def flat_df_students(
df, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
):
""" Flat the dataframe by returning a dataframe with on student on each line
:param df: the dataframe (one row per questions)
:param no_student_columns: columns that are not students
@ -52,18 +55,45 @@ def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
value_name=COLUMNS["score"],
).dropna(subset=[COLUMNS["score"]])
)
if postprocessing:
return postprocess(pd.concat(scores))
return pd.concat(scores)
def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
""" Flat and clear the dataframe extracted from csv
def flat_df_for(
df, student, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
):
""" Extract the data only for one student
:param csv_df: data frame read from csv
:param df: the dataframe (one row per questions)
:param no_student_columns: columns that are not students
:return: dataframe with one row per questions and students
Columns of csv files:
- NO_ST_COLUMNS meta data on questions
- one for each students
"""
students = extract_students(df, no_student_columns)
if student not in students:
raise KeyError("This student is not in the table")
st_df = df[list(no_student_columns) + [student]]
st_df = st_df.rename(columns={student: COLUMNS["score"]}).dropna(
subset=[COLUMNS["score"]]
)
if postprocessing:
return postprocess(st_df)
return st_df
def postprocess(df):
""" Postprocessing score dataframe
- Replace na with an empty string
- Replace "NOANSWER" with -1
- Turn commas number to dot numbers
"""
df = flat_df_students(csv_df)
df[COLUMNS["question"]].fillna("", inplace=True)
df[COLUMNS["exercise"]].fillna("", inplace=True)
@ -76,9 +106,8 @@ def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
.apply(lambda x: try_replace(x, ",", "."))
)
df[COLUMNS["score_rate"]] = pd.to_numeric(
df[COLUMNS["score_rate"]]
.apply(lambda x: try_replace(x, ",", ".")),
errors="coerce"
df[COLUMNS["score_rate"]].apply(lambda x: try_replace(x, ",", ".")),
errors="coerce",
)
return df

View File

@ -2,8 +2,20 @@
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"execution_count": 1,
"metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": true
}
}
}
}
},
"outputs": [],
"source": [
"from IPython.display import Markdown as md\n",
@ -11,7 +23,7 @@
"import pandas as pd\n",
"from pathlib import Path\n",
"from datetime import datetime\n",
"from recopytex import flat_clear_csv, pp_q_scores\n",
"from recopytex import flat_df_students, pp_q_scores\n",
"#import prettytable as pt\n",
"%matplotlib inline"
]
@ -20,6 +32,17 @@
"cell_type": "code",
"execution_count": 2,
"metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": true
}
}
}
},
"tags": [
"parameters"
]
@ -35,7 +58,19 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": false
}
}
}
}
},
"outputs": [
{
"data": {
@ -59,107 +94,65 @@
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"execution_count": 5,
"metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": true
}
}
}
}
},
"outputs": [],
"source": [
"stack_scores = pd.read_csv(csv_file, encoding=\"latin_1\")\n",
"scores = flat_clear_csv(stack_scores).dropna(subset=[\"Score\"])\n",
"scores = flat_df_students(stack_scores).dropna(subset=[\"Score\"])\n",
"scores = pp_q_scores(scores)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Note</th>\n",
" <th>Bareme</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Exercice</th>\n",
" <th>Eleve</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">1</th>\n",
" <th>ABDOU Asmahane</th>\n",
" <td>3.67</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ABOU Roihim</th>\n",
" <td>0.00</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AHMED BOINALI Kouraichia</th>\n",
" <td>1.33</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AHMED Rahada</th>\n",
" <td>2.67</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ALI SAID Anchourati</th>\n",
" <td>0.00</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Note Bareme\n",
"Exercice Eleve \n",
"1 ABDOU Asmahane 3.67 6.0\n",
" ABOU Roihim 0.00 6.0\n",
" AHMED BOINALI Kouraichia 1.33 6.0\n",
" AHMED Rahada 2.67 6.0\n",
" ALI SAID Anchourati 0.00 6.0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
"execution_count": 6,
"metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": true
}
}
}
}
],
},
"outputs": [],
"source": [
"exercises_scores = scores.groupby([\"Exercice\", \"Eleve\"]).agg({\"Note\": \"sum\", \"Bareme\": \"sum\"})\n",
"exercises_scores.head()"
"#exercises_scores.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"execution_count": 7,
"metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": false
}
}
}
}
},
"outputs": [
{
"data": {
@ -195,117 +188,117 @@
" <tr>\n",
" <th>ABDOU Asmahane</th>\n",
" <td>5.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ABOU Roihim</th>\n",
" <td>0.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AHMED BOINALI Kouraichia</th>\n",
" <td>2.67</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AHMED Rahada</th>\n",
" <td>6.33</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ALI SAID Anchourati</th>\n",
" <td>0.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ASSANE Noussouraniya</th>\n",
" <td>4.67</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BACAR Issiaka</th>\n",
" <td>0.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BACAR Samina</th>\n",
" <td>3.67</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CHAIHANE Said</th>\n",
" <td>5.33</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>COMBO Houzaimati</th>\n",
" <td>5.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DAOUD Anzilati</th>\n",
" <td>5.17</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DAOUD Talaenti</th>\n",
" <td>5.67</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DARKAOUI Rachma</th>\n",
" <td>5.67</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DHAKIOINE Nabaouya</th>\n",
" <td>1.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DJANFAR Soioutinour</th>\n",
" <td>5.33</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DRISSA Ibrahim</th>\n",
" <td>0.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HACHIM SIDI Assani</th>\n",
" <td>7.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HAFIDHUI Zalifa</th>\n",
" <td>5.67</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HOUMADI Marie</th>\n",
" <td>6.67</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HOUMADI Sania</th>\n",
" <td>5.33</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MAANDHUI Halouoi</th>\n",
" <td>7.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MASSONDI Nasma</th>\n",
" <td>7.33</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SAIDALI Irichad</th>\n",
" <td>5.00</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -314,32 +307,32 @@
"text/plain": [
" Note Bareme\n",
"Eleve \n",
"ABDOU Asmahane 5.00 12.0\n",
"ABOU Roihim 0.00 12.0\n",
"AHMED BOINALI Kouraichia 2.67 12.0\n",
"AHMED Rahada 6.33 12.0\n",
"ALI SAID Anchourati 0.00 12.0\n",
"ASSANE Noussouraniya 4.67 12.0\n",
"BACAR Issiaka 0.00 12.0\n",
"BACAR Samina 3.67 12.0\n",
"CHAIHANE Said 5.33 12.0\n",
"COMBO Houzaimati 5.00 12.0\n",
"DAOUD Anzilati 5.17 12.0\n",
"DAOUD Talaenti 5.67 12.0\n",
"DARKAOUI Rachma 5.67 12.0\n",
"DHAKIOINE Nabaouya 1.00 12.0\n",
"DJANFAR Soioutinour 5.33 12.0\n",
"DRISSA Ibrahim 0.00 12.0\n",
"HACHIM SIDI Assani 7.00 12.0\n",
"HAFIDHUI Zalifa 5.67 12.0\n",
"HOUMADI Marie 6.67 12.0\n",
"HOUMADI Sania 5.33 12.0\n",
"MAANDHUI Halouoi 7.00 12.0\n",
"MASSONDI Nasma 7.33 12.0\n",
"SAIDALI Irichad 5.00 12.0"
"ABDOU Asmahane 5.00 12\n",
"ABOU Roihim 0.00 12\n",
"AHMED BOINALI Kouraichia 2.67 12\n",
"AHMED Rahada 6.33 12\n",
"ALI SAID Anchourati 0.00 12\n",
"ASSANE Noussouraniya 4.67 12\n",
"BACAR Issiaka 0.00 12\n",
"BACAR Samina 3.67 12\n",
"CHAIHANE Said 5.33 12\n",
"COMBO Houzaimati 5.00 12\n",
"DAOUD Anzilati 5.17 12\n",
"DAOUD Talaenti 5.67 12\n",
"DARKAOUI Rachma 5.67 12\n",
"DHAKIOINE Nabaouya 1.00 12\n",
"DJANFAR Soioutinour 5.33 12\n",
"DRISSA Ibrahim 0.00 12\n",
"HACHIM SIDI Assani 7.00 12\n",
"HAFIDHUI Zalifa 5.67 12\n",
"HOUMADI Marie 6.67 12\n",
"HOUMADI Sania 5.33 12\n",
"MAANDHUI Halouoi 7.00 12\n",
"MASSONDI Nasma 7.33 12\n",
"SAIDALI Irichad 5.00 12"
]
},
"execution_count": 15,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@ -351,8 +344,20 @@
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"execution_count": 8,
"metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": false
}
}
}
}
},
"outputs": [
{
"data": {
@ -368,7 +373,7 @@
"Name: Note, dtype: float64"
]
},
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@ -379,16 +384,38 @@
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"execution_count": 9,
"metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": false
}
}
}
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
" return f(*args, **kwds)\n",
"/usr/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
" return f(*args, **kwds)\n"
]
},
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f0ae61e5cf8>"
"<matplotlib.axes._subplots.AxesSubplot at 0x7f104b318090>"
]
},
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
},
@ -413,13 +440,44 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": true
}
}
}
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"celltoolbar": "Tags",
"extensions": {
"jupyter_dashboards": {
"activeView": "grid_default",
"version": 1,
"views": {
"grid_default": {
"cellMargin": 10,
"defaultCellHeight": 20,
"maxColumns": 12,
"name": "grid",
"type": "grid"
},
"report_default": {
"name": "report",
"type": "report"
}
}
}
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
@ -435,7 +493,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.4"
}
},
"nbformat": 4,

File diff suppressed because it is too large Load Diff