Feat: clean every thing

This commit is contained in:
Bertrand Benjamin 2022-12-20 06:00:50 +01:00
parent 7058c79975
commit a7aeb12844
14 changed files with 3 additions and 2789 deletions

View File

@ -1,8 +1,5 @@
# Encore une autre façon d'enregistrer et d'analyser mes notes # Recopytex
Cette fois ci, on utilise: ## Backend API
- Des fichiers csv pour stocker les notes ## Frontend
- Des fichiers yaml pour les infos sur les élèves
- Des notebooks pour l'analyse
- Papermill pour produire les notesbooks à partir de template

View File

@ -1,4 +0,0 @@
---
source: sheets/
output: reports/
templates: templates/

View File

@ -1,5 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
from .csv_extraction import flat_df_students, flat_df_for
from .df_marks_manip import pp_q_scores

View File

@ -1,30 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
NO_ST_COLUMNS = {
"assessment": "Nom",
"term": "Trimestre",
"date": "Date",
"exercise": "Exercice",
"question": "Question",
"competence": "Competence",
"theme": "Domaine",
"comment": "Commentaire",
"is_leveled": "Est_nivele",
"score_rate": "Bareme",
}
COLUMNS = {
**NO_ST_COLUMNS,
"student": "Eleve",
"score": "Score",
"mark": "Note",
"level": "Niveau",
"normalized": "Normalise",
}
VALIDSCORE = {
"NOTFILLED": "", # The item is not scored yet
"NOANSWER": ".", # Student gives no answer (this score will impact the fianl mark)
"ABS": "a", # Student has absent (this score won't be impact the final mark)
}

View File

@ -1,119 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
""" Extracting data from xlsx files """
import pandas as pd
from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE
pd.set_option("Precision", 2)
def try_replace(x, old, new):
try:
return str(x).replace(old, new)
except ValueError:
return x
def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()):
""" Extract the list of students from df
:param df: the dataframe
:param no_student_columns: columns that are not students
:return: list of students
"""
students = df.columns.difference(no_student_columns)
return students
def flat_df_students(
df, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
):
""" Flat the dataframe by returning a dataframe with on student on each line
:param df: the dataframe (one row per questions)
:param no_student_columns: columns that are not students
:return: dataframe with one row per questions and students
Columns of csv files:
- NO_ST_COLUMNS meta data on questions
- one for each students
This function flat student's columns to "student" and "score"
"""
students = extract_students(df, no_student_columns)
scores = []
for st in students:
scores.append(
pd.melt(
df,
id_vars=no_student_columns,
value_vars=st,
var_name=COLUMNS["student"],
value_name=COLUMNS["score"],
).dropna(subset=[COLUMNS["score"]])
)
if postprocessing:
return postprocess(pd.concat(scores))
return pd.concat(scores)
def flat_df_for(
df, student, no_student_columns=NO_ST_COLUMNS.values(), postprocessing=True
):
""" Extract the data only for one student
:param df: the dataframe (one row per questions)
:param no_student_columns: columns that are not students
:return: dataframe with one row per questions and students
Columns of csv files:
- NO_ST_COLUMNS meta data on questions
- one for each students
"""
students = extract_students(df, no_student_columns)
if student not in students:
raise KeyError("This student is not in the table")
st_df = df[list(no_student_columns) + [student]]
st_df = st_df.rename(columns={student: COLUMNS["score"]}).dropna(
subset=[COLUMNS["score"]]
)
if postprocessing:
return postprocess(st_df)
return st_df
def postprocess(df):
""" Postprocessing score dataframe
- Replace na with an empty string
- Replace "NOANSWER" with -1
- Turn commas number to dot numbers
"""
df[COLUMNS["question"]].fillna("", inplace=True)
df[COLUMNS["exercise"]].fillna("", inplace=True)
df[COLUMNS["comment"]].fillna("", inplace=True)
df[COLUMNS["competence"]].fillna("", inplace=True)
df[COLUMNS["score"]] = pd.to_numeric(
df[COLUMNS["score"]]
.replace(VALIDSCORE["NOANSWER"], -1)
.apply(lambda x: try_replace(x, ",", "."))
)
df[COLUMNS["score_rate"]] = pd.to_numeric(
df[COLUMNS["score_rate"]].apply(lambda x: try_replace(x, ",", ".")),
errors="coerce",
)
return df
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del

View File

@ -1,206 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
import numpy as np
from math import ceil, floor
from .config import COLUMNS, VALIDSCORE
# Values manipulations
def round_half_point(val):
try:
return 0.5 * ceil(2.0 * val)
except ValueError:
return val
except TypeError:
return val
def score_to_mark(x):
""" Compute the mark
if the item is leveled then the score is multiply by the score_rate
otherwise it copies the score
:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> score_to_mark(df.loc[0])
1.0
>>> score_to_mark(df.loc[10])
1.3333333333333333
"""
# -1 is no answer
if x[COLUMNS["score"]] == -1:
return 0
if x[COLUMNS["is_leveled"]]:
if x[COLUMNS["score"]] not in [0, 1, 2, 3]:
raise ValueError(f"The evaluation is out of range: {x[COLUMNS['score']]} at {x}")
#return round_half_point(x[COLUMNS["score"]] * x[COLUMNS["score_rate"]] / 3)
return round(x[COLUMNS["score"]] * x[COLUMNS["score_rate"]] / 3, 2)
if x[COLUMNS["score"]] > x[COLUMNS["score_rate"]]:
raise ValueError(
f"The score ({x['score']}) is greated than the rating scale ({x[COLUMNS['score_rate']]}) at {x}"
)
return x[COLUMNS["score"]]
def score_to_level(x):
""" Compute the level (".",0,1,2,3).
:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[1, 0.33, np.nan, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> score_to_level(df.loc[0])
3
>>> score_to_level(df.loc[1])
1
>>> score_to_level(df.loc[2])
'na'
>>> score_to_level(df.loc[3])
3
>>> score_to_level(df.loc[5])
3
>>> score_to_level(df.loc[10])
2
"""
# negatives are no answer or negatives points
if x[COLUMNS["score"]] <= -1:
return np.nan
if x[COLUMNS["is_leveled"]]:
return int(x[COLUMNS["score"]])
return int(ceil(x[COLUMNS["score"]] / x[COLUMNS["score_rate"]] * 3))
# DataFrame columns manipulations
def compute_mark(df):
""" Add Mark column to df
:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> compute_mark(df)
0 1.00
1 0.33
2 2.00
3 1.50
4 0.67
5 2.00
6 0.67
7 1.00
8 1.50
9 1.00
10 1.33
11 2.00
dtype: float64
"""
return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
score_to_mark, axis=1
)
def compute_level(df):
""" Add Mark column to df
:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[np.nan, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> compute_level(df)
0 na
1 1
2 3
3 3
4 1
5 3
6 2
7 3
8 3
9 2
10 2
11 3
dtype: object
"""
return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
score_to_level, axis=1
)
def compute_normalized(df):
""" Compute the normalized mark (Mark / score_rate)
:param df: DataFrame with "Mark" and COLUMNS["score_rate"] columns
>>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
... COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
... COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],
... }
>>> df = pd.DataFrame(d)
>>> df["Mark"] = compute_marks(df)
>>> compute_normalized(df)
0 1.00
1 0.33
2 1.00
3 0.75
4 0.33
5 1.00
6 0.67
7 1.00
8 0.75
9 0.50
10 0.67
11 1.00
dtype: float64
"""
return df[COLUMNS["mark"]] / df[COLUMNS["score_rate"]]
# Postprocessing question scores
def pp_q_scores(df):
""" Postprocessing questions scores dataframe
:param df: questions-scores dataframe
:return: same data frame with mark, level and normalize columns
"""
assign = {
COLUMNS["mark"]: compute_mark,
COLUMNS["level"]: compute_level,
COLUMNS["normalized"]: compute_normalized,
}
return df.assign(**assign)
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del

View File

@ -1,10 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
import yaml
CONFIGPATH = "recoconfig.yml"
with open(CONFIGPATH, "r") as configfile:
config = yaml.load(configfile, Loader=yaml.FullLoader)

View File

@ -1,160 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
import click
from pathlib import Path
from datetime import datetime
from PyInquirer import prompt, print_json
import pandas as pd
import numpy as np
from .config import config
from ..config import NO_ST_COLUMNS
class PromptAbortException(EOFError):
def __init__(self, message, errors=None):
# Call the base class constructor with the parameters it needs
super(PromptAbortException, self).__init__("Abort questionnary", errors)
def get_tribes(answers):
""" List tribes based on subdirectory of config["source"] which have an "eleves.csv" file inside """
return [
p.name for p in Path(config["source"]).iterdir() if (p / "eleves.csv").exists()
]
def prepare_csv():
items = new_eval()
item = items[0]
# item = {"tribe": "308", "date": datetime.today(), "assessment": "plop"}
csv_output = (
Path(config["source"])
/ item["tribe"]
/ f"{item['date']:%y%m%d}_{item['assessment']}.csv"
)
students = pd.read_csv(Path(config["source"]) / item["tribe"] / "eleves.csv")["Nom"]
columns = list(NO_ST_COLUMNS.keys())
items = [[it[c] for c in columns] for it in items]
columns = list(NO_ST_COLUMNS.values())
items_df = pd.DataFrame.from_records(items, columns=columns)
for s in students:
items_df[s] = np.nan
items_df.to_csv(csv_output, index=False, date_format="%d/%m/%Y")
click.echo(f"Saving csv file to {csv_output}")
def new_eval(answers={}):
click.echo(f"Préparation d'un nouveau devoir")
eval_questions = [
{"type": "input", "name": "assessment", "message": "Nom de l'évaluation",},
{
"type": "list",
"name": "tribe",
"message": "Classe concernée",
"choices": get_tribes,
},
{
"type": "input",
"name": "date",
"message": "Date du devoir (%y%m%d)",
"default": datetime.today().strftime("%y%m%d"),
"filter": lambda val: datetime.strptime(val, "%y%m%d"),
},
{
"type": "list",
"name": "term",
"message": "Trimestre",
"choices": ["1", "2", "3"],
},
]
eval_ans = prompt(eval_questions)
items = []
add_exo = True
while add_exo:
ex_items = new_exercice(eval_ans)
items += ex_items
add_exo = prompt(
[
{
"type": "confirm",
"name": "add_exo",
"message": "Ajouter un autre exercice",
"default": True,
}
]
)["add_exo"]
return items
def new_exercice(answers={}):
exercise_questions = [
{"type": "input", "name": "exercise", "message": "Nom de l'exercice"},
]
click.echo(f"Nouvel exercice")
exercise_ans = prompt(exercise_questions, answers=answers)
items = []
add_item = True
while add_item:
try:
item_ans = new_item(exercise_ans)
except PromptAbortException:
click.echo("Création de l'item annulée")
else:
items.append(item_ans)
add_item = prompt(
[
{
"type": "confirm",
"name": "add_item",
"message": f"Ajouter un autre item pour l'exercice {exercise_ans['exercise']}",
"default": True,
}
]
)["add_item"]
return items
def new_item(answers={}):
item_questions = [
{"type": "input", "name": "question", "message": "Nom de l'item",},
{"type": "input", "name": "comment", "message": "Commentaire",},
{
"type": "list",
"name": "competence",
"message": "Competence",
"choices": ["Cher", "Rep", "Mod", "Rai", "Cal", "Com"],
},
{"type": "input", "name": "theme", "message": "Domaine",},
{
"type": "confirm",
"name": "is_leveled",
"message": "Évaluation par niveau",
"default": True,
},
{"type": "input", "name": "score_rate", "message": "Bareme"},
{
"type": "confirm",
"name": "correct",
"message": "Tout est correct?",
"default": True,
},
]
click.echo(f"Nouvelle question pour l'exercice {answers['exercise']}")
item_ans = prompt(item_questions, answers=answers)
if item_ans["correct"]:
return item_ans
raise PromptAbortException("Abort item creation")

View File

@ -1,102 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
import click
from pathlib import Path
import yaml
import sys
import papermill as pm
from datetime import datetime
from .prepare_csv import prepare_csv
from .config import config
@click.group()
def cli():
pass
@cli.command()
def print_config():
click.echo(f"Config file is {CONFIGPATH}")
click.echo("It contains")
click.echo(config)
def reporting(csv_file):
# csv_file = Path(csv_file)
tribe_dir = csv_file.parent
csv_filename = csv_file.name.split(".")[0]
assessment = str(csv_filename).split("_")[-1].capitalize()
date = str(csv_filename).split("_")[0]
try:
date = datetime.strptime(date, "%y%m%d")
except ValueError:
date = datetime.today().strptime(date, "%y%m%d")
tribe = str(tribe_dir).split("/")[-1]
template = Path(config["templates"]) / "tpl_evaluation.ipynb"
dest = Path(config["output"]) / tribe / csv_filename
dest.mkdir(parents=True, exist_ok=True)
click.echo(f"Building {assessment} ({date:%d/%m/%y}) report")
pm.execute_notebook(
str(template),
str(dest / f"{assessment}.ipynb"),
parameters=dict(
tribe=tribe,
assessment=assessment,
date=f"{date:%d/%m/%y}",
csv_file=str(csv_file.absolute()),
),
)
@cli.command()
@click.argument("target", required=False)
def report(target=""):
""" Make a report for the eval
:param target: csv file or a directory where csvs are
"""
try:
if target.endswith(".csv"):
csv = Path(target)
if not csv.exists():
click.echo(f"{target} does not exists")
sys.exit(1)
if csv.suffix != ".csv":
click.echo(f"{target} has to be a csv file")
sys.exit(1)
csvs = [csv]
else:
csvs = list(Path(target).glob("**/*.csv"))
except AttributeError:
csvs = list(Path(config["source"]).glob("**/*.csv"))
for csv in csvs:
click.echo(f"Processing {csv}")
try:
reporting(csv)
except pm.exceptions.PapermillExecutionError as e:
click.echo(f"Error with {csv}: {e}")
@cli.command()
def prepare():
""" Prepare csv file """
items = prepare_csv()
click.echo(items)
@cli.command()
@click.argument("tribe")
def random_pick(tribe):
""" Randomly pick a student """
pass

View File

@ -1,76 +0,0 @@
ansiwrap==0.8.4
appdirs==1.4.3
attrs==19.1.0
backcall==0.1.0
black==19.10b0
bleach==3.1.0
certifi==2019.6.16
chardet==3.0.4
Click==7.0
colorama==0.4.1
cycler==0.10.0
decorator==4.4.0
defusedxml==0.6.0
entrypoints==0.3
future==0.17.1
idna==2.8
importlib-resources==1.0.2
ipykernel==5.1.3
ipython==7.11.1
ipython-genutils==0.2.0
ipywidgets==7.5.1
jedi==0.15.2
Jinja2==2.10.3
jsonschema==3.2.0
jupyter==1.0.0
jupyter-client==5.3.4
jupyter-console==6.1.0
jupyter-core==4.6.1
jupytex==0.0.3
kiwisolver==1.1.0
Markdown==3.1.1
MarkupSafe==1.1.1
matplotlib==3.1.2
mistune==0.8.4
nbconvert==5.6.1
nbformat==5.0.3
notebook==6.0.3
numpy==1.18.1
pandas==0.25.3
pandocfilters==1.4.2
papermill==1.2.1
parso==0.5.2
pathspec==0.7.0
pexpect==4.8.0
pickleshare==0.7.5
prometheus-client==0.7.1
prompt-toolkit==1.0.14
ptyprocess==0.6.0
Pygments==2.5.2
PyInquirer==1.0.3
pyparsing==2.4.6
pyrsistent==0.15.7
python-dateutil==2.8.0
pytz==2019.3
PyYAML==5.3
pyzmq==18.1.1
qtconsole==4.6.0
-e git+git_opytex:/lafrite/recopytex.git@7e026bedb24c1ca8bef3b71b3d63f8b0d6916e81#egg=Recopytex
regex==2020.1.8
requests==2.22.0
scipy==1.4.1
Send2Trash==1.5.0
six==1.12.0
tenacity==6.0.0
terminado==0.8.3
testpath==0.4.4
textwrap3==0.9.2
toml==0.10.0
tornado==6.0.3
tqdm==4.41.1
traitlets==4.3.2
typed-ast==1.4.1
urllib3==1.25.8
wcwidth==0.1.8
webencodings==0.5.1
widgetsnbextension==3.5.1

View File

@ -1,31 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
from setuptools import setup, find_packages
setup(
name='Recopytex',
version='1.1.1',
description='Assessment analysis',
author='Benjamin Bertrand',
author_email='',
packages=find_packages(),
include_package_data=True,
install_requires=[
'Click',
'pandas',
'numpy',
'papermill',
'pyyaml',
'PyInquirer',
],
entry_points='''
[console_scripts]
recopytex=recopytex.scripts.recopytex:cli
''',
)
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff