recopytex/recopytex/df_marks_manip.py

#!/usr/bin/env python
# encoding: utf-8

import pandas as pd
import numpy as np
from math import ceil, floor
from .config import COLUMNS, VALIDSCORE

# Values manipulations


def round_half_point(val):
    try:
        return 0.5 * ceil(2.0 * val)
    except ValueError:
        return val
    except TypeError:
        return val


def score_to_mark(x):
    """ Compute the mark

    if the item is leveled then the score is multiply by the score_rate
    otherwise it copies the score

    :param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> score_to_mark(df.loc[0])
    1.0
    >>> score_to_mark(df.loc[10])
    1.3333333333333333
    """
    # -1 is no answer
    if x[COLUMNS["score"]] == -1:
        return 0

    if x[COLUMNS["is_leveled"]]:
        if x[COLUMNS["score"]] not in [0, 1, 2, 3]:
            raise ValueError(f"The evaluation is out of range: {x[COLUMNS['score']]} at {x}")
        return x[COLUMNS["score"]] * x[COLUMNS["score_rate"]] / 3

    if x[COLUMNS["score"]] > x[COLUMNS["score_rate"]]:
        raise ValueError(
            f"The score ({x['score']}) is greated than the rating scale ({x[COLUMNS['score_rate']]}) at {x}"
        )
    return x[COLUMNS["score"]]


def score_to_level(x):
    """ Compute the level (".",0,1,2,3).

    :param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    COLUMNS["score"]:[1, 0.33, np.nan, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> score_to_level(df.loc[0])
    3
    >>> score_to_level(df.loc[1])
    1
    >>> score_to_level(df.loc[2])
    'na'
    >>> score_to_level(df.loc[3])
    3
    >>> score_to_level(df.loc[5])
    3
    >>> score_to_level(df.loc[10])
    2
    """
    # -1 is no answer
    if x[COLUMNS["score"]] == -1:
        return x[COLUMNS["score"]]

    if x[COLUMNS["is_leveled"]]:
        return int(x[COLUMNS["score"]])
    else:
        return int(ceil(x[COLUMNS["score"]] / x[COLUMNS["score_rate"]] * 3))


# DataFrame columns manipulations


def compute_mark(df):
    """ Add Mark column to df

    :param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> compute_mark(df)
    0     1.00
    1     0.33
    2     2.00
    3     1.50
    4     0.67
    5     2.00
    6     0.67
    7     1.00
    8     1.50
    9     1.00
    10    1.33
    11    2.00
    dtype: float64
    """
    return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
        score_to_mark, axis=1
    )


def compute_level(df):
    """ Add Mark column to df

    :param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    COLUMNS["score"]:[np.nan, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> compute_level(df)
    0     na
    1      1
    2      3
    3      3
    4      1
    5      3
    6      2
    7      3
    8      3
    9      2
    10     2
    11     3
    dtype: object
    """
    return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(
        score_to_level, axis=1
    )


def compute_normalized(df):
    """ Compute the normalized mark (Mark / score_rate)

    :param df: DataFrame with "Mark" and COLUMNS["score_rate"] columns

    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
    ...    COLUMNS["score_rate"]:[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
    ...    COLUMNS["is_leveled"]:[0]*4+[1]*2 + [0]*4+[1]*2,
    ...    COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
    ...    }
    >>> df = pd.DataFrame(d)
    >>> df["Mark"] = compute_marks(df)
    >>> compute_normalized(df)
    0     1.00
    1     0.33
    2     1.00
    3     0.75
    4     0.33
    5     1.00
    6     0.67
    7     1.00
    8     0.75
    9     0.50
    10    0.67
    11    1.00
    dtype: float64
    """
    return df[COLUMNS["mark"]] / df[COLUMNS["score_rate"]]


# Postprocessing question scores


def pp_q_scores(df):
    """ Postprocessing questions scores dataframe

    :param df: questions-scores dataframe
    :return: same data frame with mark, level and normalize columns
    """
    assign = {
        COLUMNS["mark"]: compute_mark,
        COLUMNS["level"]: compute_level,
        COLUMNS["normalized"]: compute_normalized,
    }
    return df.assign(**assign)


# -----------------------------
# Reglages pour 'vim'
# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
# cursor: 16 del
Feat: first analysis and plots 2019-08-04 21:24:32 +00:00			`#!/usr/bin/env python`
			`# encoding: utf-8`

			`import pandas as pd`
			`import numpy as np`
			`from math import ceil, floor`
			`from .config import COLUMNS, VALIDSCORE`

			`# Values manipulations`


			`def round_half_point(val):`
			`try:`
			`return 0.5 * ceil(2.0 * val)`
			`except ValueError:`
			`return val`
			`except TypeError:`
			`return val`


			`def score_to_mark(x):`
			`""" Compute the mark`

			`if the item is leveled then the score is multiply by the score_rate`
			`otherwise it copies the score`

			`:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys`

			`>>> d = {"Eleve":["E1"]6 + ["E2"]6,`
			`... COLUMNS["score_rate"]:[1]2+[2]2+[2]2 + [1]2+[2]2+[2]2,`
			`... COLUMNS["is_leveled"]:[0]4+[1]2 + [0]4+[1]2,`
			`... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],`
			`... }`
			`>>> df = pd.DataFrame(d)`
			`>>> score_to_mark(df.loc[0])`
			`1.0`
			`>>> score_to_mark(df.loc[10])`
			`1.3333333333333333`
			`"""`
			`# -1 is no answer`
			`if x[COLUMNS["score"]] == -1:`
			`return 0`

			`if x[COLUMNS["is_leveled"]]:`
			`if x[COLUMNS["score"]] not in [0, 1, 2, 3]:`
			`raise ValueError(f"The evaluation is out of range: {x[COLUMNS['score']]} at {x}")`
			`return x[COLUMNS["score"]] * x[COLUMNS["score_rate"]] / 3`

			`if x[COLUMNS["score"]] > x[COLUMNS["score_rate"]]:`
			`raise ValueError(`
			`f"The score ({x['score']}) is greated than the rating scale ({x[COLUMNS['score_rate']]}) at {x}"`
			`)`
			`return x[COLUMNS["score"]]`


			`def score_to_level(x):`
			`""" Compute the level (".",0,1,2,3).`

			`:param x: dictionnary with COLUMNS["is_leveled"], COLUMNS["score"] and COLUMNS["score_rate"] keys`

			`>>> d = {"Eleve":["E1"]6 + ["E2"]6,`
			`... COLUMNS["score_rate"]:[1]2+[2]2+[2]2 + [1]2+[2]2+[2]2,`
			`... COLUMNS["is_leveled"]:[0]4+[1]2 + [0]4+[1]2,`
			`... COLUMNS["score"]:[1, 0.33, np.nan, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],`
			`... }`
			`>>> df = pd.DataFrame(d)`
			`>>> score_to_level(df.loc[0])`
			`3`
			`>>> score_to_level(df.loc[1])`
			`1`
			`>>> score_to_level(df.loc[2])`
			`'na'`
			`>>> score_to_level(df.loc[3])`
			`3`
			`>>> score_to_level(df.loc[5])`
			`3`
			`>>> score_to_level(df.loc[10])`
			`2`
			`"""`
			`# -1 is no answer`
			`if x[COLUMNS["score"]] == -1:`
			`return x[COLUMNS["score"]]`

			`if x[COLUMNS["is_leveled"]]:`
			`return int(x[COLUMNS["score"]])`
			`else:`
			`return int(ceil(x[COLUMNS["score"]] / x[COLUMNS["score_rate"]] * 3))`


			`# DataFrame columns manipulations`


			`def compute_mark(df):`
			`""" Add Mark column to df`

			`:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.`

			`>>> d = {"Eleve":["E1"]6 + ["E2"]6,`
			`... COLUMNS["score_rate"]:[1]2+[2]2+[2]2 + [1]2+[2]2+[2]2,`
			`... COLUMNS["is_leveled"]:[0]4+[1]2 + [0]4+[1]2,`
			`... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],`
			`... }`
			`>>> df = pd.DataFrame(d)`
			`>>> compute_mark(df)`
			`0 1.00`
			`1 0.33`
			`2 2.00`
			`3 1.50`
			`4 0.67`
			`5 2.00`
			`6 0.67`
			`7 1.00`
			`8 1.50`
			`9 1.00`
			`10 1.33`
			`11 2.00`
			`dtype: float64`
			`"""`
			`return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(`
			`score_to_mark, axis=1`
			`)`


			`def compute_level(df):`
			`""" Add Mark column to df`

			`:param df: DataFrame with COLUMNS["score"], COLUMNS["is_leveled"] and COLUMNS["score_rate"] columns.`

			`>>> d = {"Eleve":["E1"]6 + ["E2"]6,`
			`... COLUMNS["score_rate"]:[1]2+[2]2+[2]2 + [1]2+[2]2+[2]2,`
			`... COLUMNS["is_leveled"]:[0]4+[1]2 + [0]4+[1]2,`
			`... COLUMNS["score"]:[np.nan, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],`
			`... }`
			`>>> df = pd.DataFrame(d)`
			`>>> compute_level(df)`
			`0 na`
			`1 1`
			`2 3`
			`3 3`
			`4 1`
			`5 3`
			`6 2`
			`7 3`
			`8 3`
			`9 2`
			`10 2`
			`11 3`
			`dtype: object`
			`"""`
			`return df[[COLUMNS["score"], COLUMNS["is_leveled"], COLUMNS["score_rate"]]].apply(`
			`score_to_level, axis=1`
			`)`


			`def compute_normalized(df):`
			`""" Compute the normalized mark (Mark / score_rate)`

			`:param df: DataFrame with "Mark" and COLUMNS["score_rate"] columns`

			`>>> d = {"Eleve":["E1"]6 + ["E2"]6,`
			`... COLUMNS["score_rate"]:[1]2+[2]2+[2]2 + [1]2+[2]2+[2]2,`
			`... COLUMNS["is_leveled"]:[0]4+[1]2 + [0]4+[1]2,`
			`... COLUMNS["score"]:[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3],`
			`... }`
			`>>> df = pd.DataFrame(d)`
			`>>> df["Mark"] = compute_marks(df)`
			`>>> compute_normalized(df)`
			`0 1.00`
			`1 0.33`
			`2 1.00`
			`3 0.75`
			`4 0.33`
			`5 1.00`
			`6 0.67`
			`7 1.00`
			`8 0.75`
			`9 0.50`
			`10 0.67`
			`11 1.00`
			`dtype: float64`
			`"""`
			`return df[COLUMNS["mark"]] / df[COLUMNS["score_rate"]]`


			`# Postprocessing question scores`


			`def pp_q_scores(df):`
			`""" Postprocessing questions scores dataframe`

			`:param df: questions-scores dataframe`
			`:return: same data frame with mark, level and normalize columns`
			`"""`
			`assign = {`
			`COLUMNS["mark"]: compute_mark,`
			`COLUMNS["level"]: compute_level,`
			`COLUMNS["normalized"]: compute_normalized,`
			`}`
			`return df.assign(**assign)`


			`# -----------------------------`
			`# Reglages pour 'vim'`
			`# vim:set autoindent expandtab tabstop=4 shiftwidth=4:`
			`# cursor: 16 del`