#!/usr/bin/env python # encoding: utf-8 """ Extracting data from xlsx files """ import pandas as pd from .config import NO_ST_COLUMNS, COLUMNS, VALIDSCORE pd.set_option("Precision", 2) def try_replace(x, old, new): try: return str(x).replace(old, new) except ValueError: return x def extract_students(df, no_student_columns=NO_ST_COLUMNS.values()): """ Extract the list of students from df :param df: the dataframe :param no_student_columns: columns that are not students :return: list of students """ students = df.columns.difference(no_student_columns) return students def flat_df_students(df, no_student_columns=NO_ST_COLUMNS.values()): """ Flat the ws for students :param df: the dataframe (one row per questions) :param no_student_columns: columns that are not students :return: dataframe with one row per questions and students Columns of csv files: - NO_ST_COLUMNS meta data on questions - one for each students This function flat student's columns to "student" and "score" """ students = extract_students(df, no_student_columns) scores = [] for st in students: scores.append( pd.melt( df, id_vars=no_student_columns, value_vars=st, var_name=COLUMNS["student"], value_name=COLUMNS["score"], ).dropna(subset=[COLUMNS["score"]]) ) return pd.concat(scores) def flat_clear_csv(csv_df, no_student_columns=NO_ST_COLUMNS.values()): """ Flat and clear the dataframe extracted from csv :param csv_df: data frame read from csv :param no_student_columns: columns that are not students :return: dataframe with one row per questions and students """ df = flat_df_students(csv_df) df[COLUMNS["question"]].fillna("", inplace=True) df[COLUMNS["exercise"]].fillna("", inplace=True) df[COLUMNS["comment"]].fillna("", inplace=True) df[COLUMNS["competence"]].fillna("", inplace=True) df[COLUMNS["score"]] = pd.to_numeric( df[COLUMNS["score"]] .replace(VALIDSCORE["NOANSWER"], -1) .apply(lambda x: try_replace(x, ",", ".")) ) df[COLUMNS["score_rate"]] = pd.to_numeric( df[COLUMNS["score_rate"]] .apply(lambda x: try_replace(x, ",", ".")), errors="coerce" ) return df # ----------------------------- # Reglages pour 'vim' # vim:set autoindent expandtab tabstop=4 shiftwidth=4: # cursor: 16 del