repytex/notes_tools/tools/extract.py

#!/usr/bin/env python
# encoding: utf-8

import pandas as pd
import numpy as np
import xlrd
from path import Path


notes_path = Path("./")

no_student_columns = ["Trimestre",
        "Nom",
        "Date",
        "Exercice",
        "Question",
        "Competence",
        "Domaine",
        "Commentaire",
        "Bareme",
        "Niveau"]

pd.set_option("Precision",2)

def list_classes(path = notes_path):
    """
    List classes available in notes_path

    >>> list_classes()
    []
    >>> p = Path("./samples/")
    >>> list_classes(p)
    ['503', '312', '308']
    >>> list_classes("./samples/")
    ['503', '312', '308']
    """
    try:
        return [n.namebase for n in path.files("*.xlsx")]
    except AttributeError:
        p = Path(path)
        return [n.namebase for n in p.files("*.xlsx")]

def get_class_ws(classe, path = notes_path):
    """
    From the name of a classe, returns pd.ExcelFile
    """
    p = Path(path)
    if classe in list_classes(p):
        return pd.ExcelFile(p/classe+".xlsx")
    else:
        raise ValueError("This class is not disponible in {p}. You have to choose in {c}".format(p = p, c = list_classes(p)))

def extract_students(df, no_student_columns = no_student_columns):
    """ Extract the list of students from df """
    students = df.columns.difference(no_student_columns)
    return students

def check_students(dfs, no_student_columns = no_student_columns):
    """ Build students list """
    dfs_students = [extract_students(df) for df in dfs]

    if not are_equal(dfs_students):
        raise ValueError("Not same list of students amoung worksheets")

    return dfs_students[0]

def are_equal(elems):
    """ Test if item of elems are equal

    >>> L = [[1, 2, 3], [1, 3, 2], [1, 3, 2]]
    >>> are_equal(L)
    True
    >>> L = [[0, 2, 3], [1, 3, 2], [1, 3, 2]]
    >>> are_equal(L)
    False

    """
    first = sorted(elems[0])
    others = [sorted(e) for e in elems[1:]]
    diff = [e == first for e in others]

    if False in diff:
        return False

    return True

def flat_df_students(df, students):
    """ Flat the ws for students """
    flat_df = pd.DataFrame()
    flat_data = []
    dfT = df.T
    for n in dfT:
        pre_di = dfT[n][no_student_columns].to_dict()
        for e in students:
            data = pre_di.copy()
            data["Eleve"] = e
            data["Note"] = dfT[n].loc[e]
            flat_data.append(data)
    return pd.DataFrame.from_dict(flat_data)

def parse_sheets(ws,
        marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]):
    """ Parse sheets from marks_sheetnames

    :param ws: the worksheet
    :param marks_sheetnames: names of sheets for extracting

    """
    sheets = []
    for sheetname in marks_sheetnames:
        try:
            sheets.append(ws.parse(sheetname))
        except xlrd.biffh.XLRDError:
            pass
    return sheets

def extract_flat_marks(ws,
        marks_sheetnames=["Notes", "Connaissances", "Calcul mental"]):
    """ Extract, flat and contact marks from the worksheet

    :param ws: the worksheet
    :param marks_sheetnames: name of worksheets
    :returns: TODO

    """
    sheets = parse_sheets(ws, marks_sheetnames)

    students = check_students(sheets)

    flat_df = pd.DataFrame()
    for sheet in sheets:
        flat = flat_df_students(sheet, students)
        flat_df = pd.concat([flat_df, flat])

    return flat_df


# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del