Feat: Base for student exploration

This commit is contained in:
Bertrand Benjamin 2019-08-20 21:15:39 +02:00
parent 1fc7270bed
commit 7bb224a48f
4 changed files with 1281 additions and 165 deletions

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# encoding: utf-8 # encoding: utf-8
from .csv_extraction import flat_clear_csv from .csv_extraction import flat_df_students, flat_df_for
from .df_marks_manip import pp_q_scores from .df_marks_manip import pp_q_scores

View File

@ -8,6 +8,7 @@ from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE
pd.set_option("Precision", 2) pd.set_option("Precision", 2)
def try_replace(x, old, new): def try_replace(x, old, new):
try: try:
return str(x).replace(old, new) return str(x).replace(old, new)
@ -26,8 +27,10 @@ def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()):
return students return students
def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()): def flat_df_students(
""" Flat the ws for students df, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
):
""" Flat the dataframe by returning a dataframe with on student on each line
:param df: the dataframe (one row per questions) :param df: the dataframe (one row per questions)
:param no_student_columns: columns that are not students :param no_student_columns: columns that are not students
@ -52,18 +55,45 @@ def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
value_name=COLUMNS["score"], value_name=COLUMNS["score"],
).dropna(subset=[COLUMNS["score"]]) ).dropna(subset=[COLUMNS["score"]])
) )
if postprocessing:
return postprocess(pd.concat(scores))
return pd.concat(scores) return pd.concat(scores)
def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()): def flat_df_for(
""" Flat and clear the dataframe extracted from csv df, student, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
):
""" Extract the data only for one student
:param csv_df: data frame read from csv :param df: the dataframe (one row per questions)
:param no_student_columns: columns that are not students :param no_student_columns: columns that are not students
:return: dataframe with one row per questions and students :return: dataframe with one row per questions and students
Columns of csv files:
- NO_ST_COLUMNS meta data on questions
- one for each students
"""
students = extract_students(df, no_student_columns)
if student not in students:
raise KeyError("This student is not in the table")
st_df = df[list(no_student_columns) + [student]]
st_df = st_df.rename(columns={student: COLUMNS["score"]}).dropna(
subset=[COLUMNS["score"]]
)
if postprocessing:
return postprocess(st_df)
return st_df
def postprocess(df):
""" Postprocessing score dataframe
- Replace na with an empty string
- Replace "NOANSWER" with -1
- Turn commas number to dot numbers
""" """
df = flat_df_students(csv_df)
df[COLUMNS["question"]].fillna("", inplace=True) df[COLUMNS["question"]].fillna("", inplace=True)
df[COLUMNS["exercise"]].fillna("", inplace=True) df[COLUMNS["exercise"]].fillna("", inplace=True)
@ -76,9 +106,8 @@ def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
.apply(lambda x: try_replace(x, ",", ".")) .apply(lambda x: try_replace(x, ",", "."))
) )
df[COLUMNS["score_rate"]] = pd.to_numeric( df[COLUMNS["score_rate"]] = pd.to_numeric(
df[COLUMNS["score_rate"]] df[COLUMNS["score_rate"]].apply(lambda x: try_replace(x, ",", ".")),
.apply(lambda x: try_replace(x, ",", ".")), errors="coerce",
errors="coerce"
) )
return df return df

View File

@ -2,8 +2,20 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 1,
"metadata": {}, "metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": true
}
}
}
}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from IPython.display import Markdown as md\n", "from IPython.display import Markdown as md\n",
@ -11,7 +23,7 @@
"import pandas as pd\n", "import pandas as pd\n",
"from pathlib import Path\n", "from pathlib import Path\n",
"from datetime import datetime\n", "from datetime import datetime\n",
"from recopytex import flat_clear_csv, pp_q_scores\n", "from recopytex import flat_df_students, pp_q_scores\n",
"#import prettytable as pt\n", "#import prettytable as pt\n",
"%matplotlib inline" "%matplotlib inline"
] ]
@ -20,6 +32,17 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"metadata": { "metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": true
}
}
}
},
"tags": [ "tags": [
"parameters" "parameters"
] ]
@ -35,7 +58,19 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 3,
"metadata": {}, "metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": false
}
}
}
}
},
"outputs": [ "outputs": [
{ {
"data": { "data": {
@ -59,107 +94,65 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 5,
"metadata": {}, "metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": true
}
}
}
}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"stack_scores = pd.read_csv(csv_file, encoding=\"latin_1\")\n", "stack_scores = pd.read_csv(csv_file, encoding=\"latin_1\")\n",
"scores = flat_clear_csv(stack_scores).dropna(subset=[\"Score\"])\n", "scores = flat_df_students(stack_scores).dropna(subset=[\"Score\"])\n",
"scores = pp_q_scores(scores)" "scores = pp_q_scores(scores)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 6,
"metadata": {}, "metadata": {
"outputs": [ "extensions": {
{ "jupyter_dashboards": {
"data": { "version": 1,
"text/html": [ "views": {
"<div>\n", "grid_default": {},
"<style scoped>\n", "report_default": {
" .dataframe tbody tr th:only-of-type {\n", "hidden": true
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Note</th>\n",
" <th>Bareme</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Exercice</th>\n",
" <th>Eleve</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">1</th>\n",
" <th>ABDOU Asmahane</th>\n",
" <td>3.67</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ABOU Roihim</th>\n",
" <td>0.00</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AHMED BOINALI Kouraichia</th>\n",
" <td>1.33</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AHMED Rahada</th>\n",
" <td>2.67</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ALI SAID Anchourati</th>\n",
" <td>0.00</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Note Bareme\n",
"Exercice Eleve \n",
"1 ABDOU Asmahane 3.67 6.0\n",
" ABOU Roihim 0.00 6.0\n",
" AHMED BOINALI Kouraichia 1.33 6.0\n",
" AHMED Rahada 2.67 6.0\n",
" ALI SAID Anchourati 0.00 6.0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
} }
], }
}
}
},
"outputs": [],
"source": [ "source": [
"exercises_scores = scores.groupby([\"Exercice\", \"Eleve\"]).agg({\"Note\": \"sum\", \"Bareme\": \"sum\"})\n", "exercises_scores = scores.groupby([\"Exercice\", \"Eleve\"]).agg({\"Note\": \"sum\", \"Bareme\": \"sum\"})\n",
"exercises_scores.head()" "#exercises_scores.head()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 7,
"metadata": {}, "metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": false
}
}
}
}
},
"outputs": [ "outputs": [
{ {
"data": { "data": {
@ -195,117 +188,117 @@
" <tr>\n", " <tr>\n",
" <th>ABDOU Asmahane</th>\n", " <th>ABDOU Asmahane</th>\n",
" <td>5.00</td>\n", " <td>5.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>ABOU Roihim</th>\n", " <th>ABOU Roihim</th>\n",
" <td>0.00</td>\n", " <td>0.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>AHMED BOINALI Kouraichia</th>\n", " <th>AHMED BOINALI Kouraichia</th>\n",
" <td>2.67</td>\n", " <td>2.67</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>AHMED Rahada</th>\n", " <th>AHMED Rahada</th>\n",
" <td>6.33</td>\n", " <td>6.33</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>ALI SAID Anchourati</th>\n", " <th>ALI SAID Anchourati</th>\n",
" <td>0.00</td>\n", " <td>0.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>ASSANE Noussouraniya</th>\n", " <th>ASSANE Noussouraniya</th>\n",
" <td>4.67</td>\n", " <td>4.67</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>BACAR Issiaka</th>\n", " <th>BACAR Issiaka</th>\n",
" <td>0.00</td>\n", " <td>0.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>BACAR Samina</th>\n", " <th>BACAR Samina</th>\n",
" <td>3.67</td>\n", " <td>3.67</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>CHAIHANE Said</th>\n", " <th>CHAIHANE Said</th>\n",
" <td>5.33</td>\n", " <td>5.33</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>COMBO Houzaimati</th>\n", " <th>COMBO Houzaimati</th>\n",
" <td>5.00</td>\n", " <td>5.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>DAOUD Anzilati</th>\n", " <th>DAOUD Anzilati</th>\n",
" <td>5.17</td>\n", " <td>5.17</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>DAOUD Talaenti</th>\n", " <th>DAOUD Talaenti</th>\n",
" <td>5.67</td>\n", " <td>5.67</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>DARKAOUI Rachma</th>\n", " <th>DARKAOUI Rachma</th>\n",
" <td>5.67</td>\n", " <td>5.67</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>DHAKIOINE Nabaouya</th>\n", " <th>DHAKIOINE Nabaouya</th>\n",
" <td>1.00</td>\n", " <td>1.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>DJANFAR Soioutinour</th>\n", " <th>DJANFAR Soioutinour</th>\n",
" <td>5.33</td>\n", " <td>5.33</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>DRISSA Ibrahim</th>\n", " <th>DRISSA Ibrahim</th>\n",
" <td>0.00</td>\n", " <td>0.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>HACHIM SIDI Assani</th>\n", " <th>HACHIM SIDI Assani</th>\n",
" <td>7.00</td>\n", " <td>7.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>HAFIDHUI Zalifa</th>\n", " <th>HAFIDHUI Zalifa</th>\n",
" <td>5.67</td>\n", " <td>5.67</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>HOUMADI Marie</th>\n", " <th>HOUMADI Marie</th>\n",
" <td>6.67</td>\n", " <td>6.67</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>HOUMADI Sania</th>\n", " <th>HOUMADI Sania</th>\n",
" <td>5.33</td>\n", " <td>5.33</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>MAANDHUI Halouoi</th>\n", " <th>MAANDHUI Halouoi</th>\n",
" <td>7.00</td>\n", " <td>7.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>MASSONDI Nasma</th>\n", " <th>MASSONDI Nasma</th>\n",
" <td>7.33</td>\n", " <td>7.33</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>SAIDALI Irichad</th>\n", " <th>SAIDALI Irichad</th>\n",
" <td>5.00</td>\n", " <td>5.00</td>\n",
" <td>12.0</td>\n", " <td>12</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
@ -314,32 +307,32 @@
"text/plain": [ "text/plain": [
" Note Bareme\n", " Note Bareme\n",
"Eleve \n", "Eleve \n",
"ABDOU Asmahane 5.00 12.0\n", "ABDOU Asmahane 5.00 12\n",
"ABOU Roihim 0.00 12.0\n", "ABOU Roihim 0.00 12\n",
"AHMED BOINALI Kouraichia 2.67 12.0\n", "AHMED BOINALI Kouraichia 2.67 12\n",
"AHMED Rahada 6.33 12.0\n", "AHMED Rahada 6.33 12\n",
"ALI SAID Anchourati 0.00 12.0\n", "ALI SAID Anchourati 0.00 12\n",
"ASSANE Noussouraniya 4.67 12.0\n", "ASSANE Noussouraniya 4.67 12\n",
"BACAR Issiaka 0.00 12.0\n", "BACAR Issiaka 0.00 12\n",
"BACAR Samina 3.67 12.0\n", "BACAR Samina 3.67 12\n",
"CHAIHANE Said 5.33 12.0\n", "CHAIHANE Said 5.33 12\n",
"COMBO Houzaimati 5.00 12.0\n", "COMBO Houzaimati 5.00 12\n",
"DAOUD Anzilati 5.17 12.0\n", "DAOUD Anzilati 5.17 12\n",
"DAOUD Talaenti 5.67 12.0\n", "DAOUD Talaenti 5.67 12\n",
"DARKAOUI Rachma 5.67 12.0\n", "DARKAOUI Rachma 5.67 12\n",
"DHAKIOINE Nabaouya 1.00 12.0\n", "DHAKIOINE Nabaouya 1.00 12\n",
"DJANFAR Soioutinour 5.33 12.0\n", "DJANFAR Soioutinour 5.33 12\n",
"DRISSA Ibrahim 0.00 12.0\n", "DRISSA Ibrahim 0.00 12\n",
"HACHIM SIDI Assani 7.00 12.0\n", "HACHIM SIDI Assani 7.00 12\n",
"HAFIDHUI Zalifa 5.67 12.0\n", "HAFIDHUI Zalifa 5.67 12\n",
"HOUMADI Marie 6.67 12.0\n", "HOUMADI Marie 6.67 12\n",
"HOUMADI Sania 5.33 12.0\n", "HOUMADI Sania 5.33 12\n",
"MAANDHUI Halouoi 7.00 12.0\n", "MAANDHUI Halouoi 7.00 12\n",
"MASSONDI Nasma 7.33 12.0\n", "MASSONDI Nasma 7.33 12\n",
"SAIDALI Irichad 5.00 12.0" "SAIDALI Irichad 5.00 12"
] ]
}, },
"execution_count": 15, "execution_count": 7,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -351,8 +344,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 8,
"metadata": {}, "metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": false
}
}
}
}
},
"outputs": [ "outputs": [
{ {
"data": { "data": {
@ -368,7 +373,7 @@
"Name: Note, dtype: float64" "Name: Note, dtype: float64"
] ]
}, },
"execution_count": 7, "execution_count": 8,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -379,16 +384,38 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 9,
"metadata": {}, "metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": false
}
}
}
}
},
"outputs": [ "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
" return f(*args, **kwds)\n",
"/usr/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
" return f(*args, **kwds)\n"
]
},
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f0ae61e5cf8>" "<matplotlib.axes._subplots.AxesSubplot at 0x7f104b318090>"
] ]
}, },
"execution_count": 8, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
}, },
@ -413,13 +440,44 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"extensions": {
"jupyter_dashboards": {
"version": 1,
"views": {
"grid_default": {},
"report_default": {
"hidden": true
}
}
}
}
},
"outputs": [], "outputs": [],
"source": [] "source": []
} }
], ],
"metadata": { "metadata": {
"celltoolbar": "Tags", "celltoolbar": "Tags",
"extensions": {
"jupyter_dashboards": {
"activeView": "grid_default",
"version": 1,
"views": {
"grid_default": {
"cellMargin": 10,
"defaultCellHeight": 20,
"maxColumns": 12,
"name": "grid",
"type": "grid"
},
"report_default": {
"name": "report",
"type": "report"
}
}
}
},
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3",
"language": "python", "language": "python",
@ -435,7 +493,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.7.3" "version": "3.7.4"
} }
}, },
"nbformat": 4, "nbformat": 4,

File diff suppressed because it is too large Load Diff