repytex/Repytex/tools/extract.py

150 lines
3.7 KiB
Python

#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
import numpy as np
import xlrd
from path import Path
notes_path = Path("./")
no_student_columns = ["Trimestre",
"Nom",
"Date",
"Exercice",
"Question",
"Competence",
"Domaine",
"Commentaire",
"Bareme",
"Niveau"]
pd.set_option("Precision",2)
def list_classes(path = notes_path):
"""
List classes available in notes_path
>>> list_classes()
[]
>>> p = Path("./samples/")
>>> list_classes(p)
['503', '312', '308']
>>> list_classes("./samples/")
['503', '312', '308']
"""
try:
return [n.namebase for n in path.files("*.xlsx")]
except AttributeError:
p = Path(path)
return [n.namebase for n in p.files("*.xlsx")]
def get_class_ws(classe, path = notes_path):
"""
From the name of a classe, returns pd.ExcelFile
"""
p = Path(path)
if classe in list_classes(p):
return pd.ExcelFile(p/classe+".xlsx")
else:
raise ValueError("This class is not disponible in {p}. You have to choose in {c}".format(p = p, c = list_classes(p)))
def extract_students(df, no_student_columns = no_student_columns):
""" Extract the list of students from df """
students = df.columns.difference(no_student_columns)
return students
def check_students(dfs, no_student_columns = no_student_columns):
""" Build students list """
dfs_students = [extract_students(df) for df in dfs]
if not are_equal(dfs_students):
raise ValueError("Not same list of students amoung worksheets")
return dfs_students[0]
def are_equal(elems):
""" Test if item of elems are equal
>>> L = [[1, 2, 3], [1, 3, 2], [1, 3, 2]]
>>> are_equal(L)
True
>>> L = [[0, 2, 3], [1, 3, 2], [1, 3, 2]]
>>> are_equal(L)
False
"""
first = sorted(elems[0])
others = [sorted(e) for e in elems[1:]]
diff = [e == first for e in others]
if False in diff:
return False
return True
def flat_df_students(df, students):
""" Flat the ws for students """
flat_df = pd.DataFrame()
flat_data = []
dfT = df.T
for n in dfT:
pre_di = dfT[n][no_student_columns].to_dict()
for e in students:
data = pre_di.copy()
data["Eleve"] = e
data["Note"] = dfT[n].loc[e]
flat_data.append(data)
return pd.DataFrame.from_dict(flat_data)
def parse_sheets(ws,
marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]):
""" Parse sheets from marks_sheetnames
:param ws: the worksheet
:param marks_sheetnames: names of sheets for extracting
"""
sheets = []
for sheetname in marks_sheetnames:
try:
sheets.append(ws.parse(sheetname))
except xlrd.biffh.XLRDError:
pass
return sheets
def extract_flat_marks(ws,
marks_sheetnames=["Notes", "Connaissances", "Calcul mental"]):
""" Extract, flat and contact marks from the worksheet
:param ws: the worksheet
:param marks_sheetnames: name of worksheets
:returns: TODO
"""
sheets = parse_sheets(ws, marks_sheetnames)
students = check_students(sheets)
flat_df = pd.DataFrame()
for sheet in sheets:
flat = flat_df_students(sheet, students)
flat_df = pd.concat([flat_df, flat])
flat_df["Question"].fillna("", inplace = True)
flat_df["Exercice"].fillna("", inplace = True)
flat_df["Commentaire"].fillna("", inplace = True)
flat_df["Competence"].fillna("", inplace = True)
return flat_df
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del