Feat: csv extraction and flattening
This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
NO_STUDENT_COLUMNS = [
|
||||
"Trimestre",
|
||||
"Nom",
|
||||
"Date",
|
||||
"Exercice",
|
||||
"Question",
|
||||
"Competence",
|
||||
"Domaine",
|
||||
"Commentaire",
|
||||
"Bareme",
|
||||
"Niveau",
|
||||
]
|
||||
|
||||
75
recopytex/csv_extraction.py
Normal file
75
recopytex/csv_extraction.py
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
""" Extracting data from xlsx files """
|
||||
|
||||
import pandas as pd
|
||||
from . import NO_STUDENT_COLUMNS
|
||||
|
||||
pd.set_option("Precision", 2)
|
||||
|
||||
|
||||
def extract_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
""" Extract the list of students from df
|
||||
|
||||
:param df: the dataframe
|
||||
:param no_student_columns: columns that are not students
|
||||
:return: list of students
|
||||
"""
|
||||
students = df.columns.difference(no_student_columns)
|
||||
return students
|
||||
|
||||
|
||||
def flat_df_students(df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
""" Flat the ws for students
|
||||
|
||||
:param df: the dataframe (one row per questions)
|
||||
:param no_student_columns: columns that are not students
|
||||
:return: dataframe with one row per questions and students
|
||||
|
||||
Columns of csv files:
|
||||
|
||||
- NO_STUDENT_COLUMNS
|
||||
- one for each students
|
||||
|
||||
This function flat student's columns to "student" and "score"
|
||||
"""
|
||||
students = extract_students(df, no_student_columns)
|
||||
scores = []
|
||||
for st in students:
|
||||
scores.append(
|
||||
pd.melt(
|
||||
df,
|
||||
id_vars=no_student_columns,
|
||||
value_vars=st,
|
||||
var_name="student",
|
||||
value_name="score",
|
||||
)
|
||||
)
|
||||
return pd.concat(scores)
|
||||
|
||||
|
||||
def flat_clear_csv(csv_df, no_student_columns=NO_STUDENT_COLUMNS):
|
||||
""" Flat and clear the dataframe extracted from csv
|
||||
|
||||
:param csv_df: data frame read from csv
|
||||
:param no_student_columns: columns that are not students
|
||||
:return: dataframe with one row per questions and students
|
||||
|
||||
"""
|
||||
df = flat_df_students(csv_df)
|
||||
|
||||
df.columns = df.columns.map(lambda x: x.lower())
|
||||
|
||||
df["question"].fillna("", inplace=True)
|
||||
df["exercice"].fillna("", inplace=True)
|
||||
df["commentaire"].fillna("", inplace=True)
|
||||
df["competence"].fillna("", inplace=True)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Reglages pour 'vim'
|
||||
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
|
||||
# cursor: 16 del
|
||||
Reference in New Issue
Block a user