Feat: import score dataframe functions

2021-04-18 22:43:46 +02:00
parent 7553628306
commit 10b9954c05
4 changed files with 312 additions and 0 deletions
--- a/recopytex/datalib/init.py
+++ b/recopytex/datalib/init.py
--- a/recopytex/datalib/on_score_column.py
+++ b/recopytex/datalib/on_score_column.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from math import ceil
+
+
+def score_to_mark(x, score_max, rounding=lambda x: round(x, 2)):
+    """Compute the mark from the score
+
+    if the item is leveled then the score is multiply by the score_rate
+    otherwise it copies the score
+
+    :param x: dictionnary with "is_leveled", "score" and "score_rate" keys
+    :param score_max:
+    :param rounding: rounding mark function
+    :return: the mark
+
+    >>> import pandas as pd
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "score":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1.2, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> df.loc[0]
+    Eleve          E1
+    score_rate      1
+    is_leveled      0
+    score         1.0
+    Name: 0, dtype: object
+    >>> score_to_mark(df.loc[0], 3)
+    1.0
+    >>> df.loc[10]
+    Eleve          E2
+    score_rate      2
+    is_leveled      1
+    score         2.0
+    Name: 10, dtype: object
+    >>> score_to_mark(df.loc[10], 3)
+    1.33
+    >>> from .on_value import round_half_point
+    >>> score_to_mark(df.loc[10], 3, round_half_point)
+    1.5
+    >>> df.loc[1]
+    Eleve           E1
+    score_rate       1
+    is_leveled       0
+    score         0.33
+    Name: 1, dtype: object
+    >>> score_to_mark(df.loc[1], 3)
+    0.33
+    """
+    if x["is_leveled"]:
+        if x["score"] not in list(range(score_max + 1)):
+            raise ValueError(f"The evaluation is out of range: {x['score']} at {x}")
+        return rounding(x["score"] * x["score_rate"] / score_max)
+
+    return rounding(x["score"])
+
+
+def score_to_level(x, level_max=3):
+    """Compute the level (".",0,1,2,3).
+
+    :param x: dictionnary with "is_leveled", "score" and "score_rate" keys
+    :return: the level
+
+    >>> import pandas as pd
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "score":[1, 0.33, 0, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> df
+       Eleve  score_rate  is_leveled  score
+    0     E1           1           0  1.000
+    1     E1           1           0  0.330
+    2     E1           2           0  0.000
+    3     E1           2           0  1.500
+    4     E1           2           1  1.000
+    5     E1           2           1  3.000
+    6     E2           1           0  0.666
+    7     E2           1           0  1.000
+    8     E2           2           0  1.500
+    9     E2           2           0  1.000
+    10    E2           2           1  2.000
+    11    E2           2           1  3.000
+    >>> df.apply(score_to_level, axis=1)
+    0     3
+    1     1
+    2     0
+    3     3
+    4     1
+    5     3
+    6     2
+    7     3
+    8     3
+    9     2
+    10    2
+    11    3
+    dtype: int64
+    >>> df.apply(lambda x: score_to_level(x, 5), axis=1)
+    0     5
+    1     2
+    2     0
+    3     4
+    4     1
+    5     3
+    6     4
+    7     5
+    8     4
+    9     3
+    10    2
+    11    3
+    dtype: int64
+    """
+    if x["is_leveled"]:
+        return int(x["score"])
+
+    if x["score"] > x["score_rate"]:
+        raise ValueError(
+            f"score is higher than score_rate ({x['score']} > {x['score_rate']}) for {x}"
+        )
+
+    return int(ceil(x["score"] / x["score_rate"] * level_max))
+
+
+# -----------------------------
+# Reglages pour 'vim'
+# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
+# cursor: 16 del
--- a/recopytex/datalib/on_score_dataframe.py
+++ b/recopytex/datalib/on_score_dataframe.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from .on_score_column import score_to_mark, score_to_level
+
+
+def compute_marks(df, score_max, rounding=lambda x: round(x, 2)):
+    """Compute the mark for the dataframe
+
+    apply score_to_mark to each row
+
+    :param df: DataFrame with "score", "is_leveled" and "score_rate" columns.
+
+    >>> import pandas as pd
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "score":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> df
+       Eleve  score_rate  is_leveled  score
+    0     E1           1           0  1.000
+    1     E1           1           0  0.330
+    2     E1           2           0  2.000
+    3     E1           2           0  1.500
+    4     E1           2           1  1.000
+    5     E1           2           1  3.000
+    6     E2           1           0  0.666
+    7     E2           1           0  1.000
+    8     E2           2           0  1.500
+    9     E2           2           0  1.000
+    10    E2           2           1  2.000
+    11    E2           2           1  3.000
+    >>> compute_marks(df, 3)
+    0     1.00
+    1     0.33
+    2     2.00
+    3     1.50
+    4     0.67
+    5     2.00
+    6     0.67
+    7     1.00
+    8     1.50
+    9     1.00
+    10    1.33
+    11    2.00
+    dtype: float64
+    >>> from .on_value import round_half_point
+    >>> compute_marks(df, 3, round_half_point)
+    0     1.0
+    1     0.5
+    2     2.0
+    3     1.5
+    4     0.5
+    5     2.0
+    6     0.5
+    7     1.0
+    8     1.5
+    9     1.0
+    10    1.5
+    11    2.0
+    dtype: float64
+    """
+    return df[["score", "is_leveled", "score_rate"]].apply(
+        lambda x: score_to_mark(x, score_max, rounding), axis=1
+    )
+
+
+def compute_level(df, level_max=3):
+    """Compute level for the dataframe
+
+    Applies score_to_level to each row
+
+    :param df: DataFrame with "score", "is_leveled" and "score_rate" columns.
+    :return: Columns with level
+
+    >>> import pandas as pd
+    >>> import numpy as np
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "score":[0, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> compute_level(df)
+    0     0
+    1     1
+    2     3
+    3     3
+    4     1
+    5     3
+    6     2
+    7     3
+    8     3
+    9     2
+    10    2
+    11    3
+    dtype: int64
+    """
+    return df[["score", "is_leveled", "score_rate"]].apply(
+        lambda x: score_to_level(x, level_max), axis=1
+    )
+
+
+def compute_normalized(df, rounding=lambda x: round(x, 2)):
+    """Compute the normalized mark (Mark / score_rate)
+
+    :param df: DataFrame with "Mark" and "score_rate" columns
+    :return: column with normalized mark
+
+    >>> import pandas as pd
+    >>> d = {"Eleve":["E1"]*6 + ["E2"]*6,
+    ...    "score_rate":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2,
+    ...    "is_leveled":[0]*4+[1]*2 + [0]*4+[1]*2,
+    ...    "score":[1, 0.33, 2, 1.5, 1, 3,   0.666, 1, 1.5, 1, 2, 3],
+    ...    }
+    >>> df = pd.DataFrame(d)
+    >>> df["mark"] = compute_marks(df, 3)
+    >>> compute_normalized(df)
+    0     1.00
+    1     0.33
+    2     1.00
+    3     0.75
+    4     0.34
+    5     1.00
+    6     0.67
+    7     1.00
+    8     0.75
+    9     0.50
+    10    0.66
+    11    1.00
+    dtype: float64
+    """
+    return rounding(df["mark"] / df["score_rate"])
+
+
+# -----------------------------
+# Reglages pour 'vim'
+# vim:set autoindent expandtab tabstop=4 shiftwidth=4:
+# cursor: 16 del
--- a/recopytex/datalib/on_value.py
+++ b/recopytex/datalib/on_value.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from math import ceil, floor
+
+
+def round_with_base(x, base=0.5):
+    """Round to a multiple of base
+
+    :example:
+    >>> round_with_base(1.33, 0.1)
+    1.3
+    >>> round_with_base(1.33, 0.2)
+    1.4
+    >>> round_with_base(1.33, 1)
+    1
+    >>> round_with_base(1.33, 2)
+    2
+    """
+    try:
+        prec = len(str(base).split(".")[1])
+    except IndexError:
+        prec = 0
+    return round(base * round(float(x) / base), prec)
+
+
+def round_half_point(x):
+    """Round to nearest half point
+
+    :example:
+    >>> round_half_point(1.33)
+    1.5
+    >>> round_half_point(1.1)
+    1.0
+    >>> round_half_point(1.66)
+    1.5
+    >>> round_half_point(1.76)
+    2.0
+    """
+    return round_with_base(x, base=0.5)