diff --git a/recopytex/__init__.py b/recopytex/__init__.py index e69de29..050ae6a 100644 --- a/recopytex/__init__.py +++ b/recopytex/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# encoding: utf-8 + +NO_STUDENT_COLUMNS = [ + "Trimestre", + "Nom", + "Date", + "Exercice", + "Question", + "Competence", + "Domaine", + "Commentaire", + "Bareme", + "Niveau", +] diff --git a/recopytex/csv_extraction.py b/recopytex/csv_extraction.py new file mode 100644 index 0000000..8d3df1f --- /dev/null +++ b/recopytex/csv_extraction.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# encoding: utf-8 + +""" Extracting data from xlsx files """ + +import pandas as pd +from . import NO_STUDENT_COLUMNS + +pd.set_option("Precision", 2) + + +def extract_students(df, no_student_columns=NO_STUDENT_COLUMNS): + """ Extract the list of students from df + + :param df: the dataframe + :param no_student_columns: columns that are not students + :return: list of students + """ + students = df.columns.difference(no_student_columns) + return students + + +def flat_df_students(df, no_student_columns=NO_STUDENT_COLUMNS): + """ Flat the ws for students + + :param df: the dataframe (one row per questions) + :param no_student_columns: columns that are not students + :return: dataframe with one row per questions and students + + Columns of csv files: + + - NO_STUDENT_COLUMNS + - one for each students + + This function flat student's columns to "student" and "score" + """ + students = extract_students(df, no_student_columns) + scores = [] + for st in students: + scores.append( + pd.melt( + df, + id_vars=no_student_columns, + value_vars=st, + var_name="student", + value_name="score", + ) + ) + return pd.concat(scores) + + +def flat_clear_csv(csv_df, no_student_columns=NO_STUDENT_COLUMNS): + """ Flat and clear the dataframe extracted from csv + + :param csv_df: data frame read from csv + :param no_student_columns: columns that are not students + :return: dataframe with one row per questions and students + + """ + df = flat_df_students(csv_df) + + df.columns = df.columns.map(lambda x: x.lower()) + + df["question"].fillna("", inplace=True) + df["exercice"].fillna("", inplace=True) + df["commentaire"].fillna("", inplace=True) + df["competence"].fillna("", inplace=True) + + return df + + +# ----------------------------- +# Reglages pour 'vim' +# vim:set autoindent expandtab tabstop=4 shiftwidth=4: +# cursor: 16 del diff --git a/templates/tpl_evaluation.ipynb b/templates/tpl_evaluation.ipynb index f171e2b..026c8be 100644 --- a/templates/tpl_evaluation.ipynb +++ b/templates/tpl_evaluation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -10,12 +10,13 @@ "from IPython.display import DisplayHandle\n", "import pandas as pd\n", "from pathlib import Path\n", - "from datetime import datetime" + "from datetime import datetime\n", + "from recopytex.csv_extraction import flat_clear_csv" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": { "tags": [ "parameters" @@ -24,35 +25,20 @@ "outputs": [], "source": [ "tribe = \"308\"\n", - "assessment = \"161114_dm2\"\n", - "csv_file = Path(f\"./sheets/{tribe}/{assessment}.csv\")" + "assessment = \"DM1\"\n", + "date = \"15/09/16\"\n", + "csv_file = Path(f\"../sheets/{tribe}/160915_{assessment}.csv\")" ] }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "split_ass = assessment.split(\"_\")\n", - "if len(split_ass) > 1:\n", - " date, *assessment = assessment.split(\"_\")\n", - " date = datetime.strptime(date, \"%y%m%d\")\n", - " assessment = ' '.join(assessment)\n", - "else:\n", - " date = None\n", - " assessment = split_ass[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ - "# dm2 (14/11/2016) pour 308" + "# DM1 (15/09/16) pour 308" ], "text/plain": [ "" @@ -66,7 +52,154 @@ "if date is None:\n", " display(md(f\"# {assessment} pour {tribe}\"))\n", "else:\n", - " display(md(f\"# {assessment} ({date:%d/%m/%Y}) pour {tribe}\"))" + " display(md(f\"# {assessment} ({date}) pour {tribe}\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trimestrenomdateexercicequestioncompetencedomainecommentairebaremeniveaustudentscore
01DM115/09/1611.1CalPrio1.01ABDOU Asmahane2
11DM115/09/1611.2CalPrio1.01ABDOU Asmahane3
21DM115/09/1611.3CalPrio1.01ABDOU Asmahane2
31DM115/09/1611.4CalPrio1.01ABDOU Asmahane2
41DM115/09/1611.5CalPrio1.01ABDOU Asmahane2
\n", + "
" + ], + "text/plain": [ + " trimestre nom date exercice question competence domaine commentaire \\\n", + "0 1 DM1 15/09/16 1 1.1 Cal Prio \n", + "1 1 DM1 15/09/16 1 1.2 Cal Prio \n", + "2 1 DM1 15/09/16 1 1.3 Cal Prio \n", + "3 1 DM1 15/09/16 1 1.4 Cal Prio \n", + "4 1 DM1 15/09/16 1 1.5 Cal Prio \n", + "\n", + " bareme niveau student score \n", + "0 1.0 1 ABDOU Asmahane 2 \n", + "1 1.0 1 ABDOU Asmahane 3 \n", + "2 1.0 1 ABDOU Asmahane 2 \n", + "3 1.0 1 ABDOU Asmahane 2 \n", + "4 1.0 1 ABDOU Asmahane 2 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stack_scores = pd.read_csv(csv_file)\n", + "scores = flat_clear_csv(stack_scores)\n", + "scores.head()" ] }, {