repytex/notes_tools/extract.py

#!/usr/bin/env python
# encoding: utf-8

import pandas as pd
import numpy as np
import xlrd
from path import Path


notes_path = Path("./")

notStudent = ["Trimestre", "Nom", "Date", "Exercice", "Question", "Competence", "Domaine", "Commentaire", "Bareme", "Niveau"]

def list_classes(path = notes_path):
    """
    List classes available in notes_path

    >>> list_classes()
    ['509', '503', '308', '312']
    >>> p = Path("./")
    >>> list_classes(p)
    ['509', '503', '308', '312']
    >>> list_classes("./")
    ['509', '503', '308', '312']
    """
    try:
        return [n.namebase for n in path.files("*.xlsx")]
    except AttributeError:
        p = Path(path)
        return [n.namebase for n in p.files("*.xlsx")]

def get_class_ws(classe, path = notes_path):
    """
    From the name of a classe, returns pd.ExcelFile
    """
    p = Path(path)
    if classe in list_classes(p):
        return pd.ExcelFile(p/classe+".xlsx")
    else:
        raise ValueError("This class is not disponible in {p}. You have to choose in {c}".format(p = p, c = list_classes(p)))

def extract_students(df, notStudent = notStudent):
    """ Extract the list of students from df """
    students = df.columns.difference(notStudent)
    return students

def check_students(dfs, notStudent = notStudent):
    """ Build students list """
    dfs_students = [extract_students(df) for df in dfs]

    if not are_equal(dfs_students):
        raise ValueError("Not same list of students between df1 = {} ans df2 = {}".format(df1, df2))

    return dfs_students[0]

def are_equal(elems):
    """ Test if item of elems are equal

    >>> L = [[1, 2, 3], [1, 3, 2], [1, 3, 2]]
    >>> are_equal(L)
    True
    >>> L = [[0, 2, 3], [1, 3, 2], [1, 3, 2]]
    >>> are_equal(L)
    False

    """
    first = sorted(elems[0])
    others = [sorted(e) for e in elems[1:]]
    diff = [e == first for e in others]

    if False in diff:
        return False

    return True

def flat_df_students(df, students):
    """ Flat the ws for students """
    flat_df = pd.DataFrame()
    flat_data = []
    dfT = df.T
    for n in dfT:
        pre_di = dfT[n][notStudent].to_dict()
        for e in students:
            data = pre_di.copy()
            data["Eleve"] = e
            data["Note"] = dfT[n].loc[e]
            flat_data.append(data)
    return pd.DataFrame.from_dict(flat_data)

def get_all_marks(ws, marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]):
    """ Extract marks from marks_sheetnames

    :param ws: TODO
    :returns: TODO

    """
    for sheetname in marks_sheetnames:
        try:
            marks = ws.parse(sheetname)
        except xlrd.biffh.XLRDError:
            pass

def extract_flat_marks(ws):
    """ Extract, flat and contact marks from the worksheet

    :param ws: TODO
    :returns: TODO

    """
    marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]

    sheets = []
    for sheetname in marks_sheetnames:
        try:
            sheets.append(ws.parse(sheetname))
        except xlrd.biffh.XLRDError:
            pass

    students = check_students(sheets)

    flat_df = pd.DataFrame()
    for sheet in sheets:
        flat = flat_df_students(sheet, students)
        flat_df = pd.concat([flat_df, flat])

    return flat_df


# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del
import work 2016-11-06 17:54:42 +00:00			`#!/usr/bin/env python`
			`# encoding: utf-8`

			`import pandas as pd`
			`import numpy as np`
			`import xlrd`
			`from path import Path`


			`notes_path = Path("./")`
fix path issue 2016-11-08 08:06:06 +00:00
import work 2016-11-06 17:54:42 +00:00			`notStudent = ["Trimestre", "Nom", "Date", "Exercice", "Question", "Competence", "Domaine", "Commentaire", "Bareme", "Niveau"]`

			`def list_classes(path = notes_path):`
			`"""`
			`List classes available in notes_path`

			`>>> list_classes()`
			`['509', '503', '308', '312']`
			`>>> p = Path("./")`
			`>>> list_classes(p)`
			`['509', '503', '308', '312']`
			`>>> list_classes("./")`
			`['509', '503', '308', '312']`
			`"""`
			`try:`
			`return [n.namebase for n in path.files("*.xlsx")]`
			`except AttributeError:`
			`p = Path(path)`
			`return [n.namebase for n in p.files("*.xlsx")]`

			`def get_class_ws(classe, path = notes_path):`
			`"""`
			`From the name of a classe, returns pd.ExcelFile`
			`"""`
fix path issue 2016-11-08 08:06:06 +00:00			`p = Path(path)`
			`if classe in list_classes(p):`
			`return pd.ExcelFile(p/classe+".xlsx")`
import work 2016-11-06 17:54:42 +00:00			`else:`
fix path issue 2016-11-08 08:06:06 +00:00			`raise ValueError("This class is not disponible in {p}. You have to choose in {c}".format(p = p, c = list_classes(p)))`
import work 2016-11-06 17:54:42 +00:00
			`def extract_students(df, notStudent = notStudent):`
			`""" Extract the list of students from df """`
			`students = df.columns.difference(notStudent)`
			`return students`

			`def check_students(dfs, notStudent = notStudent):`
			`""" Build students list """`
			`dfs_students = [extract_students(df) for df in dfs]`

			`if not are_equal(dfs_students):`
			`raise ValueError("Not same list of students between df1 = {} ans df2 = {}".format(df1, df2))`

			`return dfs_students[0]`

			`def are_equal(elems):`
			`""" Test if item of elems are equal`

			`>>> L = [[1, 2, 3], [1, 3, 2], [1, 3, 2]]`
			`>>> are_equal(L)`
			`True`
			`>>> L = [[0, 2, 3], [1, 3, 2], [1, 3, 2]]`
			`>>> are_equal(L)`
			`False`

			`"""`
			`first = sorted(elems[0])`
			`others = [sorted(e) for e in elems[1:]]`
			`diff = [e == first for e in others]`

			`if False in diff:`
			`return False`

			`return True`

			`def flat_df_students(df, students):`
			`""" Flat the ws for students """`
			`flat_df = pd.DataFrame()`
			`flat_data = []`
			`dfT = df.T`
			`for n in dfT:`
			`pre_di = dfT[n][notStudent].to_dict()`
			`for e in students:`
			`data = pre_di.copy()`
			`data["Eleve"] = e`
			`data["Note"] = dfT[n].loc[e]`
			`flat_data.append(data)`
			`return pd.DataFrame.from_dict(flat_data)`

			`def get_all_marks(ws, marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]):`
			`""" Extract marks from marks_sheetnames`

			`:param ws: TODO`
			`:returns: TODO`

			`"""`
			`for sheetname in marks_sheetnames:`
			`try:`
			`marks = ws.parse(sheetname)`
			`except xlrd.biffh.XLRDError:`
			`pass`

			`def extract_flat_marks(ws):`
			`""" Extract, flat and contact marks from the worksheet`

			`:param ws: TODO`
			`:returns: TODO`

			`"""`
			`marks_sheetnames = ["Notes", "Connaissances", "Calcul mental"]`

			`sheets = []`
			`for sheetname in marks_sheetnames:`
			`try:`
			`sheets.append(ws.parse(sheetname))`
			`except xlrd.biffh.XLRDError:`
			`pass`

			`students = check_students(sheets)`

			`flat_df = pd.DataFrame()`
			`for sheet in sheets:`
			`flat = flat_df_students(sheet, students)`
			`flat_df = pd.concat([flat_df, flat])`

			`return flat_df`





			`# -----------------------------`
			`# Reglages pour 'vim'`
			`# vim:set autoindent expandtab tabstop=4 shiftwidth=4:`
			`# cursor: 16 del`