Feat: Base for student exploration

2019-08-20 21:15:39 +02:00
parent 1fc7270bed
commit 7bb224a48f
4 changed files with 1281 additions and 165 deletions
--- a/recopytex/init.py
+++ b/recopytex/init.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
 # encoding: utf-8
-from .csv_extraction import flat_clear_csv
+from .csv_extraction import flat_df_students, flat_df_for
 from .df_marks_manip import pp_q_scores
--- a/recopytex/csv_extraction.py
+++ b/recopytex/csv_extraction.py
@@ -8,6 +8,7 @@ from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE
 pd.set_option("Precision", 2)
 def try_replace(x, old, new):
    try:
        return str(x).replace(old, new)
@@ -26,8 +27,10 @@ def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()):
    return students
-def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
+def flat_df_students(
-    """ Flat the ws for students
+    df, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
 ):
    """ Flat the dataframe by returning a dataframe with on student on each line
    :param df: the dataframe (one row per questions)
    :param no_student_columns: columns that are not students
@@ -52,18 +55,45 @@ def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()):
                value_name=COLUMNS["score"],
            ).dropna(subset=[COLUMNS["score"]])
        )
    if postprocessing:
        return postprocess(pd.concat(scores))
    return pd.concat(scores)
-def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
+def flat_df_for(
-    """ Flat and clear the dataframe extracted from csv
+    df, student, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
 ):
    """ Extract the data only for one student
-    :param csv_df: data frame read from csv
+    :param df: the dataframe (one row per questions)
    :param no_student_columns: columns that are not students
    :return: dataframe with one row per questions and students
    Columns of csv files:
    - NO_ST_COLUMNS meta data on questions
    - one for each students
    """
    students = extract_students(df, no_student_columns)
    if student not in students:
        raise KeyError("This student is not in the table")
    st_df = df[list(no_student_columns) + [student]]
    st_df = st_df.rename(columns={student: COLUMNS["score"]}).dropna(
        subset=[COLUMNS["score"]]
    )
    if postprocessing:
        return postprocess(st_df)
    return st_df
 def postprocess(df):
    """ Postprocessing score dataframe 
    - Replace na with an empty string
    - Replace "NOANSWER" with -1
    - Turn commas number to dot numbers
    """
    df = flat_df_students(csv_df)
    df[COLUMNS["question"]].fillna("", inplace=True)
    df[COLUMNS["exercise"]].fillna("", inplace=True)
@@ -76,9 +106,8 @@ def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()):
        .apply(lambda x: try_replace(x, ",", "."))
    )
    df[COLUMNS["score_rate"]] = pd.to_numeric(
-        df[COLUMNS["score_rate"]]
+        df[COLUMNS["score_rate"]].apply(lambda x: try_replace(x, ",", ".")),
-        .apply(lambda x: try_replace(x, ",", ".")),
+        errors="coerce",
        errors="coerce"
    )
    return df
--- a/templates/tpl_evaluation.ipynb
+++ b/templates/tpl_evaluation.ipynb
@@ -2,8 +2,20 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 1,
-   "metadata": {},
+   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "from IPython.display import Markdown as md\n",
@@ -11,7 +23,7 @@
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "from datetime import datetime\n",
-    "from recopytex import flat_clear_csv, pp_q_scores\n",
+    "from recopytex import flat_df_students, pp_q_scores\n",
    "#import prettytable as pt\n",
    "%matplotlib inline"
   ]
@@ -20,6 +32,17 @@
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    },
    "tags": [
     "parameters"
    ]
@@ -35,7 +58,19 @@
  {
   "cell_type": "code",
   "execution_count": 3,
-   "metadata": {},
+   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [
    {
     "data": {
@@ -59,107 +94,65 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
-   "metadata": {},
+   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "stack_scores = pd.read_csv(csv_file, encoding=\"latin_1\")\n",
-    "scores = flat_clear_csv(stack_scores).dropna(subset=[\"Score\"])\n",
+    "scores = flat_df_students(stack_scores).dropna(subset=[\"Score\"])\n",
    "scores = pp_q_scores(scores)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 6,
-   "metadata": {},
+   "metadata": {
-   "outputs": [
+    "extensions": {
-    {
+     "jupyter_dashboards": {
-     "data": {
+      "version": 1,
-      "text/html": [
+      "views": {
-       "<div>\n",
+       "grid_default": {},
-       "<style scoped>\n",
+       "report_default": {
-       "    .dataframe tbody tr th:only-of-type {\n",
+        "hidden": true
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Note</th>\n",
       "      <th>Bareme</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Exercice</th>\n",
       "      <th>Eleve</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">1</th>\n",
       "      <th>ABDOU Asmahane</th>\n",
       "      <td>3.67</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ABOU Roihim</th>\n",
       "      <td>0.00</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AHMED BOINALI Kouraichia</th>\n",
       "      <td>1.33</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AHMED Rahada</th>\n",
       "      <td>2.67</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ALI SAID Anchourati</th>\n",
       "      <td>0.00</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   Note  Bareme\n",
       "Exercice Eleve                                 \n",
       "1        ABDOU Asmahane            3.67     6.0\n",
       "         ABOU Roihim               0.00     6.0\n",
       "         AHMED BOINALI Kouraichia  1.33     6.0\n",
       "         AHMED Rahada              2.67     6.0\n",
       "         ALI SAID Anchourati       0.00     6.0"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
       }
-   ],
+      }
     }
    }
   },
   "outputs": [],
   "source": [
    "exercises_scores = scores.groupby([\"Exercice\", \"Eleve\"]).agg({\"Note\": \"sum\", \"Bareme\": \"sum\"})\n",
-    "exercises_scores.head()"
+    "#exercises_scores.head()"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 7,
-   "metadata": {},
+   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [
    {
     "data": {
@@ -195,117 +188,117 @@
       "    <tr>\n",
       "      <th>ABDOU Asmahane</th>\n",
       "      <td>5.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ABOU Roihim</th>\n",
       "      <td>0.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AHMED BOINALI Kouraichia</th>\n",
       "      <td>2.67</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AHMED Rahada</th>\n",
       "      <td>6.33</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ALI SAID Anchourati</th>\n",
       "      <td>0.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ASSANE Noussouraniya</th>\n",
       "      <td>4.67</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BACAR Issiaka</th>\n",
       "      <td>0.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BACAR Samina</th>\n",
       "      <td>3.67</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CHAIHANE Said</th>\n",
       "      <td>5.33</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>COMBO Houzaimati</th>\n",
       "      <td>5.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DAOUD Anzilati</th>\n",
       "      <td>5.17</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DAOUD Talaenti</th>\n",
       "      <td>5.67</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DARKAOUI Rachma</th>\n",
       "      <td>5.67</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DHAKIOINE Nabaouya</th>\n",
       "      <td>1.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DJANFAR Soioutinour</th>\n",
       "      <td>5.33</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DRISSA Ibrahim</th>\n",
       "      <td>0.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HACHIM SIDI Assani</th>\n",
       "      <td>7.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HAFIDHUI Zalifa</th>\n",
       "      <td>5.67</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HOUMADI Marie</th>\n",
       "      <td>6.67</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HOUMADI Sania</th>\n",
       "      <td>5.33</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MAANDHUI Halouoi</th>\n",
       "      <td>7.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MASSONDI Nasma</th>\n",
       "      <td>7.33</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SAIDALI Irichad</th>\n",
       "      <td>5.00</td>\n",
-       "      <td>12.0</td>\n",
+       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
@@ -314,32 +307,32 @@
      "text/plain": [
       "                          Note  Bareme\n",
       "Eleve                                 \n",
-       "ABDOU Asmahane            5.00    12.0\n",
+       "ABDOU Asmahane            5.00      12\n",
-       "ABOU Roihim               0.00    12.0\n",
+       "ABOU Roihim               0.00      12\n",
-       "AHMED BOINALI Kouraichia  2.67    12.0\n",
+       "AHMED BOINALI Kouraichia  2.67      12\n",
-       "AHMED Rahada              6.33    12.0\n",
+       "AHMED Rahada              6.33      12\n",
-       "ALI SAID Anchourati       0.00    12.0\n",
+       "ALI SAID Anchourati       0.00      12\n",
-       "ASSANE Noussouraniya      4.67    12.0\n",
+       "ASSANE Noussouraniya      4.67      12\n",
-       "BACAR Issiaka             0.00    12.0\n",
+       "BACAR Issiaka             0.00      12\n",
-       "BACAR Samina              3.67    12.0\n",
+       "BACAR Samina              3.67      12\n",
-       "CHAIHANE Said             5.33    12.0\n",
+       "CHAIHANE Said             5.33      12\n",
-       "COMBO Houzaimati          5.00    12.0\n",
+       "COMBO Houzaimati          5.00      12\n",
-       "DAOUD Anzilati            5.17    12.0\n",
+       "DAOUD Anzilati            5.17      12\n",
-       "DAOUD Talaenti            5.67    12.0\n",
+       "DAOUD Talaenti            5.67      12\n",
-       "DARKAOUI Rachma           5.67    12.0\n",
+       "DARKAOUI Rachma           5.67      12\n",
-       "DHAKIOINE Nabaouya        1.00    12.0\n",
+       "DHAKIOINE Nabaouya        1.00      12\n",
-       "DJANFAR Soioutinour       5.33    12.0\n",
+       "DJANFAR Soioutinour       5.33      12\n",
-       "DRISSA Ibrahim            0.00    12.0\n",
+       "DRISSA Ibrahim            0.00      12\n",
-       "HACHIM SIDI Assani        7.00    12.0\n",
+       "HACHIM SIDI Assani        7.00      12\n",
-       "HAFIDHUI Zalifa           5.67    12.0\n",
+       "HAFIDHUI Zalifa           5.67      12\n",
-       "HOUMADI Marie             6.67    12.0\n",
+       "HOUMADI Marie             6.67      12\n",
-       "HOUMADI Sania             5.33    12.0\n",
+       "HOUMADI Sania             5.33      12\n",
-       "MAANDHUI Halouoi          7.00    12.0\n",
+       "MAANDHUI Halouoi          7.00      12\n",
-       "MASSONDI Nasma            7.33    12.0\n",
+       "MASSONDI Nasma            7.33      12\n",
-       "SAIDALI Irichad           5.00    12.0"
+       "SAIDALI Irichad           5.00      12"
      ]
     },
-     "execution_count": 15,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -351,8 +344,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
-   "metadata": {},
+   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [
    {
     "data": {
@@ -368,7 +373,7 @@
       "Name: Note, dtype: float64"
      ]
     },
-     "execution_count": 7,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -379,16 +384,38 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
-   "metadata": {},
+   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
      "  return f(*args, **kwds)\n",
      "/usr/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n",
      "  return f(*args, **kwds)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
-       "<matplotlib.axes._subplots.AxesSubplot at 0x7f0ae61e5cf8>"
+       "<matplotlib.axes._subplots.AxesSubplot at 0x7f104b318090>"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
@@ -413,13 +440,44 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "extensions": {
   "jupyter_dashboards": {
    "activeView": "grid_default",
    "version": 1,
    "views": {
     "grid_default": {
      "cellMargin": 10,
      "defaultCellHeight": 20,
      "maxColumns": 12,
      "name": "grid",
      "type": "grid"
     },
     "report_default": {
      "name": "report",
      "type": "report"
     }
    }
   }
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
@@ -435,7 +493,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.4"
  }
 },
 "nbformat": 4,
--- a/templates/tpl_student.ipynb
+++ b/templates/tpl_student.ipynb