{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyse du ds 5 pour les 302" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import sqlite3\n", "import pandas as pd\n", "import numpy as np\n", "from math import ceil\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "from pprint import pprint" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from repytex.tools.evaluation import Evaluation" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "db = \"../recopytex.db\"\n", "conn = sqlite3.connect(db)\n", "c = conn.cursor()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "La tribu des 302 (id = 1)\n" ] } ], "source": [ "tribe_name = \"302\"\n", "tribe_id = c.execute(\"SELECT id from tribe WHERE tribe.name == ?\", (tribe_name,)).fetchone()[0]\n", "print(f\"La tribu des {tribe_name} (id = {tribe_id})\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Liste des évaluations:\n", " - DS1 mise en jambe (id = 1)\n", " - DS2 (id = 5)\n", " - Pyramide de pièces (id = 8)\n", " - DS3 (id = 10)\n", " - DM noel (id = 14)\n", " - DS4 (id = 16)\n", " - DNB blanc1 (id = 21)\n", " - DS5 (id = 29)\n", " - Téléphérique (id = 30)\n" ] } ], "source": [ "evals = c.execute(\"SELECT id, name from eval WHERE eval.tribe_id == ?\", (tribe_id,))\n", "print(\"Liste des évaluations:\")\n", "for e in evals:\n", " print(f\" - {e[1]} (id = {e[0]})\")" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ev_id = 29\n", "ev = Evaluation.from_sqlite(ev_id, conn)\n", "ev" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Il semble qu'il y est des doublons" ] }, { "cell_type": "code", "execution_count": 82, "metadata": { "collapsed": true }, "outputs": [], "source": [ "q_sc_df = ev.sc_df.set_index(\"id\")" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['question_id', 'student_id', 'value'], dtype='object')" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_sc_df.columns" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "question_id 2285\n", "student_id 2285\n", "value 2285\n", "dtype: int64" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_sc_df.count()" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
question_idstudent_id
id
79173021
79183031
79193041
79203022
79213032
\n", "
" ], "text/plain": [ " question_id student_id\n", "id \n", "7917 302 1\n", "7918 303 1\n", "7919 304 1\n", "7920 302 2\n", "7921 303 2" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "qst_filtered = q_sc_df[q_sc_df.duplicated(['question_id', 'student_id'], keep='first')]\n", "qst_filtered[['question_id', 'student_id']].head()" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
question_idstudent_idvalue
id
10616327262
\n", "
" ], "text/plain": [ " question_id student_id value\n", "id \n", "10616 327 26 2" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "qst_filtered.tail(1)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "question_id 1600\n", "student_id 1600\n", "value 1600\n", "dtype: int64" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_sc_df[q_sc_df.duplicated(['question_id', 'student_id', 'value'], keep='last')].count()" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "26" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(q_sc_df[\"question_id\"].unique())" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "25" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(q_sc_df[\"student_id\"].unique())" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "650" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "26 * 25" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Donc j'ai des notes qui sont differentes pour un même élève sur une même questions...\n", "\n", "Je veux maintenant trouver ces notes." ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "student_id question_id\n", "10 310 {0, 1}\n", "12 309 {1, 2}\n", "13 311 {2, 3}\n", "14 303 {2, 3}\n", "16 313 {2, 3}\n", "Name: value, dtype: object" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gped_sc = q_sc_df.groupby([\"student_id\", \"question_id\"])['value'].apply(set)\n", "gped_sc[gped_sc.apply(lambda x: len(x) != 1)].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "On garde la dernière note enregistré" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def keep_last(df):\n", " return df.sort_index().tail(1)" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
student_idquestion_idvalue
id
807913022
808013032
808113041
848413053
848513061
\n", "
" ], "text/plain": [ " student_id question_id value\n", "id \n", "8079 1 302 2\n", "8080 1 303 2\n", "8081 1 304 1\n", "8484 1 305 3\n", "8485 1 306 1" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gped_sc = q_sc_df.groupby([\"student_id\", \"question_id\"])['value'].apply(keep_last)\n", "q_sc_df = gped_sc.reset_index().set_index(\"id\")\n", "q_sc_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Recalcul des notes et compétences" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Jointure avec la dataframe sur les élèves" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['id', 'name_st', 'surname_st', 'mail_st', 'commment_st', 'tribe_id'], dtype='object')" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "st_df = ev.s_df\n", "st_df.columns = st_df.columns.map(lambda x: f\"{x}_st\" if 'id' not in x else x)\n", "st_df.columns" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
student_idquestion_idvalueidname_stsurname_stmail_stcommment_sttribe_id
0130221ABDALLAH ALLAOUITaiassimaNone1
1130321ABDALLAH ALLAOUITaiassimaNone1
2130411ABDALLAH ALLAOUITaiassimaNone1
3130531ABDALLAH ALLAOUITaiassimaNone1
4130611ABDALLAH ALLAOUITaiassimaNone1
\n", "
" ], "text/plain": [ " student_id question_id value id name_st surname_st mail_st \\\n", "0 1 302 2 1 ABDALLAH ALLAOUI Taiassima \n", "1 1 303 2 1 ABDALLAH ALLAOUI Taiassima \n", "2 1 304 1 1 ABDALLAH ALLAOUI Taiassima \n", "3 1 305 3 1 ABDALLAH ALLAOUI Taiassima \n", "4 1 306 1 1 ABDALLAH ALLAOUI Taiassima \n", "\n", " commment_st tribe_id \n", "0 None 1 \n", "1 None 1 \n", "2 None 1 \n", "3 None 1 \n", "4 None 1 " ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_sc_df = q_sc_df.merge(st_df, left_on=\"student_id\", right_on=\"id\", suffixes=(\"\", \"_st\"))\n", "q_sc_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Jointure avec la dataframe sur les questions" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['id', 'name_q', 'score_rate_q', 'is_leveled_q', 'exercise_id',\n", " 'competence_q', 'domain_q', 'comment_q'],\n", " dtype='object')" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_df = ev.q_df\n", "q_df.columns = q_df.columns.map(lambda x: f\"{x}_q\" if 'id' not in x else x)\n", "q_df.columns" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
student_idquestion_idvalueidname_stsurname_stmail_stcommment_sttribe_idid_qname_qscore_rate_qis_leveled_qexercise_idcompetence_qdomain_qcomment_q
64523331323MOUSSRIBakariNone1331101102ComPrésentationPrésentation de la copie
64624331324SAÏDFatoumiaNone1331101102ComPrésentationPrésentation de la copie
64725331325SAKOTRAClaudianaNone1331101102ComPrésentationPrésentation de la copie
64826331326TOUFAILSalahouNone1331101102ComPrésentationPrésentation de la copie
64976331376IbrahimChaharzadeNone1331101102ComPrésentationPrésentation de la copie
\n", "
" ], "text/plain": [ " student_id question_id value id name_st surname_st mail_st \\\n", "645 23 331 3 23 MOUSSRI Bakari \n", "646 24 331 3 24 SAÏD Fatoumia \n", "647 25 331 3 25 SAKOTRA Claudiana \n", "648 26 331 3 26 TOUFAIL Salahou \n", "649 76 331 3 76 Ibrahim Chaharzade \n", "\n", " commment_st tribe_id id_q name_q score_rate_q is_leveled_q \\\n", "645 None 1 331 10 1 \n", "646 None 1 331 10 1 \n", "647 None 1 331 10 1 \n", "648 None 1 331 10 1 \n", "649 None 1 331 10 1 \n", "\n", " exercise_id competence_q domain_q comment_q \n", "645 102 Com Présentation Présentation de la copie \n", "646 102 Com Présentation Présentation de la copie \n", "647 102 Com Présentation Présentation de la copie \n", "648 102 Com Présentation Présentation de la copie \n", "649 102 Com Présentation Présentation de la copie " ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_sc_df = q_sc_df.merge(q_df, left_on=\"question_id\", right_on=\"id\", suffixes=(\"\", \"_q\"))\n", "q_sc_df.tail()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Calcul des notes" ] }, { "cell_type": "code", "execution_count": 98, "metadata": { "collapsed": true }, "outputs": [], "source": [ "q_sc_df[\"value_no_dot\"] = q_sc_df[\"value\"].replace('.', 0)" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "collapsed": true }, "outputs": [], "source": [ "q_sc_df[\"mark\"] = q_sc_df[\"score_rate_q\"] * q_sc_df[\"value_no_dot\"] / 3" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['student_id', 'question_id', 'value', 'id', 'name_st', 'surname_st',\n", " 'mail_st', 'commment_st', 'tribe_id', 'id_q', 'name_q', 'score_rate_q',\n", " 'is_leveled_q', 'exercise_id', 'competence_q', 'domain_q', 'comment_q',\n", " 'value_no_dot', 'mark'],\n", " dtype='object')" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_sc_df.columns" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_rate_qmark
student_idexercise_id
195126.666667
96105.333333
97174.000000
98184.000000
99187.000000
100151.000000
1021010.000000
295126.666667
96104.666667
97179.666667
\n", "
" ], "text/plain": [ " score_rate_q mark\n", "student_id exercise_id \n", "1 95 12 6.666667\n", " 96 10 5.333333\n", " 97 17 4.000000\n", " 98 18 4.000000\n", " 99 18 7.000000\n", " 100 15 1.000000\n", " 102 10 10.000000\n", "2 95 12 6.666667\n", " 96 10 4.666667\n", " 97 17 9.666667" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_sc_df.groupby(['student_id', 'exercise_id'])[[\"score_rate_q\", 'mark']].sum().head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Voila... j'ai les notes..." ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_rate_qmark
student_idname_stsurname_st
1ABDALLAH ALLAOUITaiassima10038.000000
2ADANIIsmou10060.333333
4AHAMADIAsbahati10043.666667
5AHAMADI OUSSENIAnsufiddine10025.000000
6AHAMEDFayadhi10031.666667
7AHMED SAIDHadaïta10062.000000
8ALI MADIAnissa10077.666667
9ALIRaydel10062.333333
10ATTOUMANE ALIFatima10042.000000
11BACHIROUElzame10021.666667
12BINALIZalida10063.000000
13BOINAAbdillah Mze Limassi10069.333333
14BOUDRAZaankidine10035.666667
15BOURAKayssoiria10044.000000
16HALADIAsna10068.666667
17HALIDISoibrata10037.333333
18HAMEDALYDoulkifly10034.666667
19IBRAHIMNassur10056.666667
21MOHAMEDNadia10051.666667
22MOUHOUDHOIREIzak10036.666667
23MOUSSRIBakari10024.000000
24SAÏDFatoumia10058.333333
25SAKOTRAClaudiana10052.666667
26TOUFAILSalahou10056.000000
76IbrahimChaharzade10050.333333
\n", "
" ], "text/plain": [ " score_rate_q mark\n", "student_id name_st surname_st \n", "1 ABDALLAH ALLAOUI Taiassima 100 38.000000\n", "2 ADANI Ismou 100 60.333333\n", "4 AHAMADI Asbahati 100 43.666667\n", "5 AHAMADI OUSSENI Ansufiddine 100 25.000000\n", "6 AHAMED Fayadhi 100 31.666667\n", "7 AHMED SAID Hadaïta 100 62.000000\n", "8 ALI MADI Anissa 100 77.666667\n", "9 ALI Raydel 100 62.333333\n", "10 ATTOUMANE ALI Fatima 100 42.000000\n", "11 BACHIROU Elzame 100 21.666667\n", "12 BINALI Zalida 100 63.000000\n", "13 BOINA Abdillah Mze Limassi 100 69.333333\n", "14 BOUDRA Zaankidine 100 35.666667\n", "15 BOURA Kayssoiria 100 44.000000\n", "16 HALADI Asna 100 68.666667\n", "17 HALIDI Soibrata 100 37.333333\n", "18 HAMEDALY Doulkifly 100 34.666667\n", "19 IBRAHIM Nassur 100 56.666667\n", "21 MOHAMED Nadia 100 51.666667\n", "22 MOUHOUDHOIRE Izak 100 36.666667\n", "23 MOUSSRI Bakari 100 24.000000\n", "24 SAÏD Fatoumia 100 58.333333\n", "25 SAKOTRA Claudiana 100 52.666667\n", "26 TOUFAIL Salahou 100 56.000000\n", "76 Ibrahim Chaharzade 100 50.333333" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_sc_df.groupby(['student_id', 'name_st', 'surname_st'])[[\"score_rate_q\", 'mark']].sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }