repytex/notes_tools/tools/extract.py

150 lines
3.7 KiB
Python
Raw Normal View History

2016-11-06 17:54:42 +00:00
#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
import numpy as np
import xlrd
from path import Path
notes_path = Path("./")
2016-11-08 08:06:06 +00:00
2017-03-07 05:32:44 +00:00
no_student_columns = ["Trimestre",
"Nom",
"Date",
"Exercice",
"Question",
"Competence",
"Domaine",
"Commentaire",
"Bareme",
"Niveau"]
2016-11-06 17:54:42 +00:00
2016-11-25 20:56:53 +00:00
pd.set_option("Precision",2)
2016-11-06 17:54:42 +00:00
def list_classes(path = notes_path):
"""
List classes available in notes_path
>>> list_classes()
2016-11-13 12:35:44 +00:00
[]
>>> p = Path("./samples/")
2016-11-06 17:54:42 +00:00
>>> list_classes(p)
2016-11-13 12:35:44 +00:00
['503', '312', '308']
>>> list_classes("./samples/")
['503', '312', '308']
2016-11-06 17:54:42 +00:00
"""
try:
return [n.namebase for n in path.files("*.xlsx")]
except AttributeError:
p = Path(path)
return [n.namebase for n in p.files("*.xlsx")]
def get_class_ws(classe, path = notes_path):
"""
From the name of a classe, returns pd.ExcelFile
"""
2016-11-08 08:06:06 +00:00
p = Path(path)
if classe in list_classes(p):
return pd.ExcelFile(p/classe+".xlsx")
2016-11-06 17:54:42 +00:00
else:
2016-11-08 08:06:06 +00:00
raise ValueError("This class is not disponible in {p}. You have to choose in {c}".format(p = p, c = list_classes(p)))
2016-11-06 17:54:42 +00:00
2017-03-07 05:32:44 +00:00
def extract_students(df, no_student_columns = no_student_columns):
2016-11-06 17:54:42 +00:00
""" Extract the list of students from df """
2017-03-07 05:32:44 +00:00
students = df.columns.difference(no_student_columns)
2016-11-06 17:54:42 +00:00
return students
2017-03-07 05:32:44 +00:00
def check_students(dfs, no_student_columns = no_student_columns):
2016-11-06 17:54:42 +00:00
""" Build students list """
dfs_students = [extract_students(df) for df in dfs]
if not are_equal(dfs_students):
2017-01-03 18:21:40 +00:00
raise ValueError("Not same list of students amoung worksheets")
2016-11-06 17:54:42 +00:00
return dfs_students[0]
def are_equal(elems):
""" Test if item of elems are equal
>>> L = [[1, 2, 3], [1, 3, 2], [1, 3, 2]]
>>> are_equal(L)
True
>>> L = [[0, 2, 3], [1, 3, 2], [1, 3, 2]]
>>> are_equal(L)
False
"""
first = sorted(elems[0])
others = [sorted(e) for e in elems[1:]]
diff = [e == first for e in others]
if False in diff:
return False
return True
def flat_df_students(df, students):
""" Flat the ws for students """
flat_df = pd.DataFrame()
flat_data = []
dfT = df.T
for n in dfT:
2017-03-07 05:32:44 +00:00
pre_di = dfT[n][no_student_columns].to_dict()
2016-11-06 17:54:42 +00:00
for e in students:
data = pre_di.copy()
data["Eleve"] = e
data["Note"] = dfT[n].loc[e]
flat_data.append(data)
return pd.DataFrame.from_dict(flat_data)
2016-11-13 12:35:44 +00:00
def parse_sheets(ws,
marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]):
""" Parse sheets from marks_sheetnames
2016-11-06 17:54:42 +00:00
2016-11-13 12:35:44 +00:00
:param ws: the worksheet
:param marks_sheetnames: names of sheets for extracting
2016-11-06 17:54:42 +00:00
"""
2016-11-13 12:35:44 +00:00
sheets = []
2016-11-06 17:54:42 +00:00
for sheetname in marks_sheetnames:
try:
2016-11-13 12:35:44 +00:00
sheets.append(ws.parse(sheetname))
2016-11-06 17:54:42 +00:00
except xlrd.biffh.XLRDError:
pass
2016-11-13 12:35:44 +00:00
return sheets
2016-11-06 17:54:42 +00:00
2016-11-13 12:35:44 +00:00
def extract_flat_marks(ws,
marks_sheetnames=["Notes", "Connaissances", "Calcul mental"]):
2016-11-06 17:54:42 +00:00
""" Extract, flat and contact marks from the worksheet
2016-11-13 12:35:44 +00:00
:param ws: the worksheet
:param marks_sheetnames: name of worksheets
2016-11-06 17:54:42 +00:00
:returns: TODO
"""
2016-11-13 12:35:44 +00:00
sheets = parse_sheets(ws, marks_sheetnames)
2016-11-06 17:54:42 +00:00
students = check_students(sheets)
flat_df = pd.DataFrame()
for sheet in sheets:
flat = flat_df_students(sheet, students)
flat_df = pd.concat([flat_df, flat])
2017-03-29 02:28:51 +00:00
flat_df["Question"].fillna("", inplace = True)
flat_df["Exercice"].fillna("", inplace = True)
flat_df["Commentaire"].fillna("", inplace = True)
flat_df["Competence"].fillna("", inplace = True)
2016-11-06 17:54:42 +00:00
return flat_df
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del