repytex/notes_tools/extract.py

136 lines
3.3 KiB
Python
Raw Normal View History

2016-11-06 17:54:42 +00:00
#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
import numpy as np
import xlrd
from path import Path
notes_path = Path("./")
2016-11-08 08:06:06 +00:00
2016-11-06 17:54:42 +00:00
notStudent = ["Trimestre", "Nom", "Date", "Exercice", "Question", "Competence", "Domaine", "Commentaire", "Bareme", "Niveau"]
def list_classes(path = notes_path):
"""
List classes available in notes_path
>>> list_classes()
['509', '503', '308', '312']
>>> p = Path("./")
>>> list_classes(p)
['509', '503', '308', '312']
>>> list_classes("./")
['509', '503', '308', '312']
"""
try:
return [n.namebase for n in path.files("*.xlsx")]
except AttributeError:
p = Path(path)
return [n.namebase for n in p.files("*.xlsx")]
def get_class_ws(classe, path = notes_path):
"""
From the name of a classe, returns pd.ExcelFile
"""
2016-11-08 08:06:06 +00:00
p = Path(path)
if classe in list_classes(p):
return pd.ExcelFile(p/classe+".xlsx")
2016-11-06 17:54:42 +00:00
else:
2016-11-08 08:06:06 +00:00
raise ValueError("This class is not disponible in {p}. You have to choose in {c}".format(p = p, c = list_classes(p)))
2016-11-06 17:54:42 +00:00
def extract_students(df, notStudent = notStudent):
""" Extract the list of students from df """
students = df.columns.difference(notStudent)
return students
def check_students(dfs, notStudent = notStudent):
""" Build students list """
dfs_students = [extract_students(df) for df in dfs]
if not are_equal(dfs_students):
raise ValueError("Not same list of students between df1 = {} ans df2 = {}".format(df1, df2))
return dfs_students[0]
def are_equal(elems):
""" Test if item of elems are equal
>>> L = [[1, 2, 3], [1, 3, 2], [1, 3, 2]]
>>> are_equal(L)
True
>>> L = [[0, 2, 3], [1, 3, 2], [1, 3, 2]]
>>> are_equal(L)
False
"""
first = sorted(elems[0])
others = [sorted(e) for e in elems[1:]]
diff = [e == first for e in others]
if False in diff:
return False
return True
def flat_df_students(df, students):
""" Flat the ws for students """
flat_df = pd.DataFrame()
flat_data = []
dfT = df.T
for n in dfT:
pre_di = dfT[n][notStudent].to_dict()
for e in students:
data = pre_di.copy()
data["Eleve"] = e
data["Note"] = dfT[n].loc[e]
flat_data.append(data)
return pd.DataFrame.from_dict(flat_data)
def get_all_marks(ws, marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]):
""" Extract marks from marks_sheetnames
:param ws: TODO
:returns: TODO
"""
for sheetname in marks_sheetnames:
try:
marks = ws.parse(sheetname)
except xlrd.biffh.XLRDError:
pass
def extract_flat_marks(ws):
""" Extract, flat and contact marks from the worksheet
:param ws: TODO
:returns: TODO
"""
marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]
sheets = []
for sheetname in marks_sheetnames:
try:
sheets.append(ws.parse(sheetname))
except xlrd.biffh.XLRDError:
pass
students = check_students(sheets)
flat_df = pd.DataFrame()
for sheet in sheets:
flat = flat_df_students(sheet, students)
flat_df = pd.concat([flat_df, flat])
return flat_df
# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del