repytex/notes_tools/extract.py

#!/usr/bin/env python
# encoding: utf-8

import pandas as pd
import numpy as np
import xlrd
from path import Path


notes_path = Path("./")

notStudent = ["Trimestre", "Nom", "Date", "Exercice", "Question", "Competence", "Domaine", "Commentaire", "Bareme", "Niveau"]

def list_classes(path = notes_path):
    """
    List classes available in notes_path

    >>> list_classes()
    ['509', '503', '308', '312']
    >>> p = Path("./")
    >>> list_classes(p)
    ['509', '503', '308', '312']
    >>> list_classes("./")
    ['509', '503', '308', '312']
    """
    try:
        return [n.namebase for n in path.files("*.xlsx")]
    except AttributeError:
        p = Path(path)
        return [n.namebase for n in p.files("*.xlsx")]

def get_class_ws(classe, path = notes_path):
    """
    From the name of a classe, returns pd.ExcelFile
    """
    p = Path(path)
    if classe in list_classes(p):
        return pd.ExcelFile(p/classe+".xlsx")
    else:
        raise ValueError("This class is not disponible in {p}. You have to choose in {c}".format(p = p, c = list_classes(p)))

def extract_students(df, notStudent = notStudent):
    """ Extract the list of students from df """
    students = df.columns.difference(notStudent)
    return students

def check_students(dfs, notStudent = notStudent):
    """ Build students list """
    dfs_students = [extract_students(df) for df in dfs]

    if not are_equal(dfs_students):
        raise ValueError("Not same list of students between df1 = {} ans df2 = {}".format(df1, df2))

    return dfs_students[0]

def are_equal(elems):
    """ Test if item of elems are equal

    >>> L = [[1, 2, 3], [1, 3, 2], [1, 3, 2]]
    >>> are_equal(L)
    True
    >>> L = [[0, 2, 3], [1, 3, 2], [1, 3, 2]]
    >>> are_equal(L)
    False

    """
    first = sorted(elems[0])
    others = [sorted(e) for e in elems[1:]]
    diff = [e == first for e in others]

    if False in diff:
        return False

    return True

def flat_df_students(df, students):
    """ Flat the ws for students """
    flat_df = pd.DataFrame()
    flat_data = []
    dfT = df.T
    for n in dfT:
        pre_di = dfT[n][notStudent].to_dict()
        for e in students:
            data = pre_di.copy()
            data["Eleve"] = e
            data["Note"] = dfT[n].loc[e]
            flat_data.append(data)
    return pd.DataFrame.from_dict(flat_data)

def get_all_marks(ws, marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]):
    """ Extract marks from marks_sheetnames

    :param ws: TODO
    :returns: TODO

    """
    for sheetname in marks_sheetnames:
        try:
            marks = ws.parse(sheetname)
        except xlrd.biffh.XLRDError:
            pass

def extract_flat_marks(ws):
    """ Extract, flat and contact marks from the worksheet

    :param ws: TODO
    :returns: TODO

    """
    marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]

    sheets = []
    for sheetname in marks_sheetnames:
        try:
            sheets.append(ws.parse(sheetname))
        except xlrd.biffh.XLRDError:
            pass

    students = check_students(sheets)

    flat_df = pd.DataFrame()
    for sheet in sheets:
        flat = flat_df_students(sheet, students)
        flat_df = pd.concat([flat_df, flat])

    return flat_df


# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del