diff --git a/recopytex/csv_extraction.py b/recopytex/csv_extraction.py index 5007052..0844e00 100644 --- a/recopytex/csv_extraction.py +++ b/recopytex/csv_extraction.py @@ -8,6 +8,12 @@ from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE pd.set_option("Precision", 2) +def try_replace(x, old, new): + try: + return str(x).replace(old, new) + except ValueError: + return x + def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()): """ Extract the list of students from df @@ -44,9 +50,9 @@ def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()): value_vars=st, var_name=COLUMNS["student"], value_name=COLUMNS["score"], - ) + ).dropna(subset=[COLUMNS["score"]]) ) - return pd.concat(scores).dropna(subset=[COLUMNS["score"]]) + return pd.concat(scores) def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()): @@ -67,11 +73,12 @@ def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()): df[COLUMNS["score"]] = pd.to_numeric( df[COLUMNS["score"]] .replace(VALIDSCORE["NOANSWER"], -1) - .apply(lambda x: str(x).replace(",", ".")) + .apply(lambda x: try_replace(x, ",", ".")) ) df[COLUMNS["score_rate"]] = pd.to_numeric( df[COLUMNS["score_rate"]] - .apply(lambda x: str(x).replace(",", ".")) + .apply(lambda x: try_replace(x, ",", ".")), + errors="coerce" ) return df diff --git a/recopytex/df_marks_manip.py b/recopytex/df_marks_manip.py index 964d416..cb57944 100644 --- a/recopytex/df_marks_manip.py +++ b/recopytex/df_marks_manip.py @@ -77,14 +77,14 @@ def score_to_level(x): >>> score_to_level(df.loc[10]) 2 """ - # -1 is no answer - if x[COLUMNS["score"]] == -1: - return x[COLUMNS["score"]] + # negatives are no answer or negatives points + if x[COLUMNS["score"]] <= -1: + return np.nan if x[COLUMNS["is_leveled"]]: return int(x[COLUMNS["score"]]) - else: - return int(ceil(x[COLUMNS["score"]] / x[COLUMNS["score_rate"]] * 3)) + + return int(ceil(x[COLUMNS["score"]] / x[COLUMNS["score_rate"]] * 3)) # DataFrame columns manipulations diff --git a/requirement.txt b/requirement.txt deleted file mode 100644 index 55bbf3a..0000000 --- a/requirement.txt +++ /dev/null @@ -1,66 +0,0 @@ -ansiwrap==0.8.4 -attrs==19.1.0 -backcall==0.1.0 -bleach==3.1.0 -certifi==2019.6.16 -chardet==3.0.4 -Click==7.0 -colorama==0.4.1 -cycler==0.10.0 -decorator==4.4.0 -defusedxml==0.6.0 -entrypoints==0.3 -future==0.17.1 -idna==2.8 -importlib-resources==1.0.2 -ipykernel==5.1.1 -ipython==7.7.0 -ipython-genutils==0.2.0 -ipywidgets==7.5.1 -jedi==0.14.1 -Jinja2==2.10.1 -jsonschema==3.0.2 -jupyter==1.0.0 -jupyter-client==5.3.1 -jupyter-console==6.0.0 -jupyter-core==4.5.0 -jupytex==0.0.3 -kiwisolver==1.1.0 -MarkupSafe==1.1.1 -matplotlib==3.1.1 -mistune==0.8.4 -nbconvert==5.5.0 -nbformat==4.4.0 -notebook==6.0.0 -numpy==1.17.0 -pandas==0.25.0 -pandocfilters==1.4.2 -papermill==1.0.1 -parso==0.5.1 -pexpect==4.7.0 -pickleshare==0.7.5 -prometheus-client==0.7.1 -prompt-toolkit==2.0.9 -ptyprocess==0.6.0 -Pygments==2.4.2 -pyparsing==2.4.2 -pyrsistent==0.15.4 -python-dateutil==2.8.0 -pytz==2019.2 -PyYAML==5.1.2 -pyzmq==18.0.2 -qtconsole==4.5.2 -requests==2.22.0 -Send2Trash==1.5.0 -six==1.12.0 -tenacity==5.0.4 -terminado==0.8.2 -testpath==0.4.2 -textwrap3==0.9.2 -tornado==6.0.3 -tqdm==4.32.2 -traitlets==4.3.2 -urllib3==1.25.3 -wcwidth==0.1.7 -webencodings==0.5.1 -widgetsnbextension==3.5.1 diff --git a/requirements.txt b/requirements.txt index 7355ce0..c57f4d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -50,6 +50,7 @@ pytz==2019.2 PyYAML==5.1.2 pyzmq==18.0.2 qtconsole==4.5.2 +-e git+git_opytex:/lafrite/recopytex.git@e9a8310f151ead60434ae944d726a2fd22b23d06#egg=Recopytex requests==2.22.0 scipy==1.3.0 seaborn==0.9.0 diff --git a/templates/tpl_evaluation.ipynb b/templates/tpl_evaluation.ipynb index 7b2b6b7..ac851cb 100644 --- a/templates/tpl_evaluation.ipynb +++ b/templates/tpl_evaluation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 20, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -12,12 +12,13 @@ "from pathlib import Path\n", "from datetime import datetime\n", "from recopytex import flat_clear_csv, pp_q_scores\n", + "#import prettytable as pt\n", "%matplotlib inline" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 2, "metadata": { "tags": [ "parameters" @@ -33,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -58,18 +59,18 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "stack_scores = pd.read_csv(csv_file)\n", + "stack_scores = pd.read_csv(csv_file, encoding=\"latin_1\")\n", "scores = flat_clear_csv(stack_scores).dropna(subset=[\"Score\"])\n", "scores = pp_q_scores(scores)" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -145,7 +146,7 @@ " ALI SAID Anchourati 0.00 6.0" ] }, - "execution_count": 24, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -157,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -338,7 +339,7 @@ "SAIDALI Irichad 5.00 12.0" ] }, - "execution_count": 25, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -350,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -367,7 +368,7 @@ "Name: Note, dtype: float64" ] }, - "execution_count": 26, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -378,16 +379,16 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 32, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" }, @@ -434,7 +435,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.7.3" } }, "nbformat": 4,