2019-2020/1ST/Probabilite_statistiques/Croisement2variables/creation_donnees.ipynb

2542 lines
83 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Création des données pour le chapitre"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'fr_FR.UTF-8'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import Markdown as md\n",
"from IPython.display import display, HTML\n",
"import pandas as pd\n",
"import ipywidgets as widgets\n",
"from pathlib import Path\n",
"from datetime import datetime\n",
"import locale\n",
"locale.setlocale(locale.LC_ALL, 'fr_FR.UTF-8')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Les données viennent de https://github.com/chris1610/pbpython/blob/master/data/2018_Sales_Total.xlsx"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_excel('2018_Sales_Total.xlsx')\n",
"df = df.rename(columns={\"Nom\": \"Vendeur\"})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Numero de compte</th>\n",
" <th>Vendeur</th>\n",
" <th>Code</th>\n",
" <th>Prix</th>\n",
" <th>date</th>\n",
" <th>Produit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>740150</td>\n",
" <td>Barton LLC</td>\n",
" <td>B1-20000</td>\n",
" <td>86.69</td>\n",
" <td>2018-01-01 07:21:51</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>714466</td>\n",
" <td>Trantow-Barrows</td>\n",
" <td>S2-77896</td>\n",
" <td>63.16</td>\n",
" <td>2018-01-01 10:00:47</td>\n",
" <td>Ordinateur</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>218895</td>\n",
" <td>Kulas Inc</td>\n",
" <td>B1-69924</td>\n",
" <td>90.70</td>\n",
" <td>2018-01-01 13:24:58</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>307599</td>\n",
" <td>Kassulke, Ondricka and Metz</td>\n",
" <td>S1-65481</td>\n",
" <td>21.05</td>\n",
" <td>2018-01-01 15:05:22</td>\n",
" <td>Impression</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>S2-34077</td>\n",
" <td>83.21</td>\n",
" <td>2018-01-01 23:26:55</td>\n",
" <td>Ordinateur</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Numero de compte Vendeur Code Prix \\\n",
"0 740150 Barton LLC B1-20000 86.69 \n",
"1 714466 Trantow-Barrows S2-77896 63.16 \n",
"2 218895 Kulas Inc B1-69924 90.70 \n",
"3 307599 Kassulke, Ondricka and Metz S1-65481 21.05 \n",
"4 412290 Jerde-Hilpert S2-34077 83.21 \n",
"\n",
" date Produit \n",
"0 2018-01-01 07:21:51 Logiciel \n",
"1 2018-01-01 10:00:47 Ordinateur \n",
"2 2018-01-01 13:24:58 Logiciel \n",
"3 2018-01-01 15:05:22 Impression \n",
"4 2018-01-01 23:26:55 Ordinateur "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Réduction des codes produits avec un type de produit"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['B1-20000', 'S2-77896', 'B1-69924', 'S1-65481', 'S2-34077',\n",
" 'B1-65551', 'S1-30248', 'S1-50961', 'S2-82423', 'S2-00301',\n",
" 'S2-23246', 'S2-10342', 'B1-53102', 'S1-06532', 'B1-50809',\n",
" 'S1-82801', 'B1-04202', 'S2-83881', 'S2-78676', 'B1-53636',\n",
" 'B1-38851', 'B1-86481', 'S1-93683', 'B1-33087', 'S1-27722',\n",
" 'B1-05914', 'B1-33364', 'S2-16558', 'S1-47412', 'S2-11481'],\n",
" dtype=object)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.Code.unique()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['B1', 'S2', 'S1'], dtype=object)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.apply(lambda x: x.Code[:2], axis=1).unique()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Numero de compte</th>\n",
" <th>Vendeur</th>\n",
" <th>Code</th>\n",
" <th>Prix</th>\n",
" <th>date</th>\n",
" <th>Produit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>740150</td>\n",
" <td>Barton LLC</td>\n",
" <td>B1-20000</td>\n",
" <td>86.69</td>\n",
" <td>2018-01-01 07:21:51</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>714466</td>\n",
" <td>Trantow-Barrows</td>\n",
" <td>S2-77896</td>\n",
" <td>63.16</td>\n",
" <td>2018-01-01 10:00:47</td>\n",
" <td>Ordinateur</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>218895</td>\n",
" <td>Kulas Inc</td>\n",
" <td>B1-69924</td>\n",
" <td>90.70</td>\n",
" <td>2018-01-01 13:24:58</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>307599</td>\n",
" <td>Kassulke, Ondricka and Metz</td>\n",
" <td>S1-65481</td>\n",
" <td>21.05</td>\n",
" <td>2018-01-01 15:05:22</td>\n",
" <td>Impression</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>S2-34077</td>\n",
" <td>83.21</td>\n",
" <td>2018-01-01 23:26:55</td>\n",
" <td>Ordinateur</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Numero de compte Vendeur Code Prix \\\n",
"0 740150 Barton LLC B1-20000 86.69 \n",
"1 714466 Trantow-Barrows S2-77896 63.16 \n",
"2 218895 Kulas Inc B1-69924 90.70 \n",
"3 307599 Kassulke, Ondricka and Metz S1-65481 21.05 \n",
"4 412290 Jerde-Hilpert S2-34077 83.21 \n",
"\n",
" date Produit \n",
"0 2018-01-01 07:21:51 Logiciel \n",
"1 2018-01-01 10:00:47 Ordinateur \n",
"2 2018-01-01 13:24:58 Logiciel \n",
"3 2018-01-01 15:05:22 Impression \n",
"4 2018-01-01 23:26:55 Ordinateur "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"code_prod = {\n",
" \"B1\": \"Logiciel\",\n",
" \"S1\": \"Impression\",\n",
" \"S2\": \"Ordinateur\"\n",
"}\n",
"df[\"Produit\"] = df.apply(lambda x: code_prod[x.Code[:2]], axis=1)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Réduction du nombre de noms"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Barton LLC', 'Trantow-Barrows', 'Kulas Inc',\n",
" 'Kassulke, Ondricka and Metz', 'Jerde-Hilpert'], dtype=object)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"uniq_names = df.Vendeur.unique()\n",
"uniq_names"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Numero de compte</th>\n",
" <th>Vendeur</th>\n",
" <th>Code</th>\n",
" <th>Prix</th>\n",
" <th>date</th>\n",
" <th>Produit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>740150</td>\n",
" <td>Barton LLC</td>\n",
" <td>B1-20000</td>\n",
" <td>86.69</td>\n",
" <td>2018-01-01 07:21:51</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>714466</td>\n",
" <td>Trantow-Barrows</td>\n",
" <td>S2-77896</td>\n",
" <td>63.16</td>\n",
" <td>2018-01-01 10:00:47</td>\n",
" <td>Ordinateur</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>218895</td>\n",
" <td>Kulas Inc</td>\n",
" <td>B1-69924</td>\n",
" <td>90.70</td>\n",
" <td>2018-01-01 13:24:58</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>307599</td>\n",
" <td>Kassulke, Ondricka and Metz</td>\n",
" <td>S1-65481</td>\n",
" <td>21.05</td>\n",
" <td>2018-01-01 15:05:22</td>\n",
" <td>Impression</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>S2-34077</td>\n",
" <td>83.21</td>\n",
" <td>2018-01-01 23:26:55</td>\n",
" <td>Ordinateur</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Numero de compte Vendeur Code Prix \\\n",
"0 740150 Barton LLC B1-20000 86.69 \n",
"1 714466 Trantow-Barrows S2-77896 63.16 \n",
"2 218895 Kulas Inc B1-69924 90.70 \n",
"3 307599 Kassulke, Ondricka and Metz S1-65481 21.05 \n",
"4 412290 Jerde-Hilpert S2-34077 83.21 \n",
"\n",
" date Produit \n",
"0 2018-01-01 07:21:51 Logiciel \n",
"1 2018-01-01 10:00:47 Ordinateur \n",
"2 2018-01-01 13:24:58 Logiciel \n",
"3 2018-01-01 15:05:22 Impression \n",
"4 2018-01-01 23:26:55 Ordinateur "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nbr_names = 5\n",
"names = {orig: uniq_names[i%nbr_names] for (i, orig) in enumerate(uniq_names)}\n",
"names\n",
"df[\"Vendeur\"] = df.apply(lambda x: names[x.Vendeur], axis=1)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 1500 entries, 0 to 1499\n",
"Data columns (total 6 columns):\n",
"Numero de compte 1500 non-null int64\n",
"Vendeur 1500 non-null object\n",
"Code 1500 non-null object\n",
"Prix 1500 non-null float64\n",
"date 1500 non-null datetime64[ns]\n",
"Produit 1500 non-null object\n",
"dtypes: datetime64[ns](1), float64(1), int64(1), object(3)\n",
"memory usage: 70.4+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df.to_excel(\"2018_Sales_Total.xlsx\", index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Données à traiter à la main - 10 premiers jours de janvier"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Numero de compte</th>\n",
" <th>Vendeur</th>\n",
" <th>Code</th>\n",
" <th>Prix</th>\n",
" <th>date</th>\n",
" <th>Produit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>740150</td>\n",
" <td>Barton LLC</td>\n",
" <td>B1-20000</td>\n",
" <td>86.69</td>\n",
" <td>2018-01-01 07:21:51</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>714466</td>\n",
" <td>Trantow-Barrows</td>\n",
" <td>S2-77896</td>\n",
" <td>63.16</td>\n",
" <td>2018-01-01 10:00:47</td>\n",
" <td>Ordinateur</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>218895</td>\n",
" <td>Kulas Inc</td>\n",
" <td>B1-69924</td>\n",
" <td>90.70</td>\n",
" <td>2018-01-01 13:24:58</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>307599</td>\n",
" <td>Kassulke, Ondricka and Metz</td>\n",
" <td>S1-65481</td>\n",
" <td>21.05</td>\n",
" <td>2018-01-01 15:05:22</td>\n",
" <td>Impression</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>S2-34077</td>\n",
" <td>83.21</td>\n",
" <td>2018-01-01 23:26:55</td>\n",
" <td>Ordinateur</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Numero de compte Vendeur Code Prix \\\n",
"0 740150 Barton LLC B1-20000 86.69 \n",
"1 714466 Trantow-Barrows S2-77896 63.16 \n",
"2 218895 Kulas Inc B1-69924 90.70 \n",
"3 307599 Kassulke, Ondricka and Metz S1-65481 21.05 \n",
"4 412290 Jerde-Hilpert S2-34077 83.21 \n",
"\n",
" date Produit \n",
"0 2018-01-01 07:21:51 Logiciel \n",
"1 2018-01-01 10:00:47 Ordinateur \n",
"2 2018-01-01 13:24:58 Logiciel \n",
"3 2018-01-01 15:05:22 Impression \n",
"4 2018-01-01 23:26:55 Ordinateur "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hand_df = df.loc[df.date < \"2018-01-11\"]\n",
"hand_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"hand_df.to_excel(\"201801_sales.xlsx\", index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Quelques tableau d'effectifs et de fréquences"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
"count = lambda x: x.count()\n",
"count.__name__ = \"Effectif\"\n",
"\n",
"#freq = lambda x: f\"{x.count()/hand_df['Prix'].count():.2%}\"\n",
"#freq.__name__ = \"Fréquence marginale\"\n",
"\n",
"def freq(total):\n",
" func = lambda x: f\"{x.count()/total:.2%}\"\n",
" func.__name__ = \"Fréquence marginale\"\n",
" return func"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Par rapport aux vendeurs"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Effectif</th>\n",
" <th>Fréquence marginale</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Vendeur</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barton LLC</th>\n",
" <td>8</td>\n",
" <td>19.51%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jerde-Hilpert</th>\n",
" <td>7</td>\n",
" <td>17.07%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kassulke, Ondricka and Metz</th>\n",
" <td>5</td>\n",
" <td>12.20%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kulas Inc</th>\n",
" <td>6</td>\n",
" <td>14.63%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trantow-Barrows</th>\n",
" <td>15</td>\n",
" <td>36.59%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Total</th>\n",
" <td>41</td>\n",
" <td>100.00%</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Effectif Fréquence marginale\n",
"Vendeur \n",
"Barton LLC 8 19.51%\n",
"Jerde-Hilpert 7 17.07%\n",
"Kassulke, Ondricka and Metz 5 12.20%\n",
"Kulas Inc 6 14.63%\n",
"Trantow-Barrows 15 36.59%\n",
"Total 41 100.00%"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pt_sellers = pd.pivot_table(hand_df,\n",
" index=\"Vendeur\",\n",
" values=\"Code\",\n",
" aggfunc=[count, freq(hand_df[\"Prix\"].count())],\n",
" margins=True,\n",
" margins_name=\"Total\")\n",
"pt_sellers.columns = pt_sellers.columns.droplevel(1)\n",
"pt_sellers"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Par rapport aux Produits"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Effectif</th>\n",
" <th>Fréquence marginale</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Produit</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Impression</th>\n",
" <td>12</td>\n",
" <td>29.27%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Logiciel</th>\n",
" <td>13</td>\n",
" <td>31.71%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ordinateur</th>\n",
" <td>16</td>\n",
" <td>39.02%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Total</th>\n",
" <td>41</td>\n",
" <td>100.00%</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Effectif Fréquence marginale\n",
"Produit \n",
"Impression 12 29.27%\n",
"Logiciel 13 31.71%\n",
"Ordinateur 16 39.02%\n",
"Total 41 100.00%"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pt_sellers = pd.pivot_table(hand_df,\n",
" index=\"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=[count, freq(hand_df[\"Prix\"].count())],\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )\n",
"pt_sellers.columns = pt_sellers.columns.droplevel(1)\n",
"pt_sellers"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Croisement de ventes entre vendeurs et produits"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Produit</th>\n",
" <th>Impression</th>\n",
" <th>Logiciel</th>\n",
" <th>Ordinateur</th>\n",
" <th>Total</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Vendeur</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barton LLC</th>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jerde-Hilpert</th>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kassulke, Ondricka and Metz</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kulas Inc</th>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trantow-Barrows</th>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Total</th>\n",
" <td>12.0</td>\n",
" <td>13.0</td>\n",
" <td>16.0</td>\n",
" <td>41</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Produit Impression Logiciel Ordinateur Total\n",
"Vendeur \n",
"Barton LLC 3.0 4.0 1.0 8\n",
"Jerde-Hilpert 2.0 1.0 4.0 7\n",
"Kassulke, Ondricka and Metz 1.0 NaN 4.0 5\n",
"Kulas Inc 2.0 2.0 2.0 6\n",
"Trantow-Barrows 4.0 6.0 5.0 15\n",
"Total 12.0 13.0 16.0 41"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(hand_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=\"count\",\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\begin{tabular}{lrrrr}\n",
"\\toprule\n",
"\\textbf{Produit} & Impression & Logiciel & Ordinateur & Total \\\\\n",
"\\textbf{Vendeur } & & & & \\\\\n",
"\\midrule\n",
"\\textbf{Barton LLC } & 3 & 4 & 1 & 8 \\\\\n",
"\\textbf{Jerde-Hilpert } & 2 & 1 & 4 & 7 \\\\\n",
"\\textbf{Kassulke, Ondricka and Metz} & 1 & nan & 4 & 5 \\\\\n",
"\\textbf{Kulas Inc } & 2 & 2 & 2 & 6 \\\\\n",
"\\textbf{Trantow-Barrows } & 4 & 6 & 5 & 15 \\\\\n",
"\\textbf{Total } & 12 & 13 & 16 & 41 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n"
]
}
],
"source": [
"print(pd.pivot_table(hand_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=\"count\",\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )\\\n",
" .to_latex(\n",
" float_format=\"{:0.0f}\".format,\n",
" bold_rows=True\n",
")\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fréquences marginales"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Produit</th>\n",
" <th>Impression</th>\n",
" <th>Logiciel</th>\n",
" <th>Ordinateur</th>\n",
" <th>Total</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Vendeur</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barton LLC</th>\n",
" <td>7.32%</td>\n",
" <td>9.76%</td>\n",
" <td>2.44%</td>\n",
" <td>19.51%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jerde-Hilpert</th>\n",
" <td>4.88%</td>\n",
" <td>2.44%</td>\n",
" <td>9.76%</td>\n",
" <td>17.07%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kassulke, Ondricka and Metz</th>\n",
" <td>2.44%</td>\n",
" <td>NaN</td>\n",
" <td>9.76%</td>\n",
" <td>12.20%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kulas Inc</th>\n",
" <td>4.88%</td>\n",
" <td>4.88%</td>\n",
" <td>4.88%</td>\n",
" <td>14.63%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trantow-Barrows</th>\n",
" <td>9.76%</td>\n",
" <td>14.63%</td>\n",
" <td>12.20%</td>\n",
" <td>36.59%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Total</th>\n",
" <td>29.27%</td>\n",
" <td>31.71%</td>\n",
" <td>39.02%</td>\n",
" <td>100.00%</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Produit Impression Logiciel Ordinateur Total\n",
"Vendeur \n",
"Barton LLC 7.32% 9.76% 2.44% 19.51%\n",
"Jerde-Hilpert 4.88% 2.44% 9.76% 17.07%\n",
"Kassulke, Ondricka and Metz 2.44% NaN 9.76% 12.20%\n",
"Kulas Inc 4.88% 4.88% 4.88% 14.63%\n",
"Trantow-Barrows 9.76% 14.63% 12.20% 36.59%\n",
"Total 29.27% 31.71% 39.02% 100.00%"
]
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(hand_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=freq(hand_df[\"Prix\"].count()),\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ventes sur le mois de Février"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Numero de compte</th>\n",
" <th>Vendeur</th>\n",
" <th>Code</th>\n",
" <th>Prix</th>\n",
" <th>date</th>\n",
" <th>Produit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>383080</td>\n",
" <td>Trantow-Barrows</td>\n",
" <td>B1-20000</td>\n",
" <td>33.69</td>\n",
" <td>2018-02-01 09:04:59</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>S1-27722</td>\n",
" <td>21.12</td>\n",
" <td>2018-02-01 11:51:46</td>\n",
" <td>Impression</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>B1-86481</td>\n",
" <td>35.99</td>\n",
" <td>2018-02-01 17:24:32</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>B1-20000</td>\n",
" <td>78.90</td>\n",
" <td>2018-02-01 19:56:48</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>672390</td>\n",
" <td>Trantow-Barrows</td>\n",
" <td>S1-06532</td>\n",
" <td>55.82</td>\n",
" <td>2018-02-02 03:45:20</td>\n",
" <td>Impression</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Numero de compte Vendeur Code Prix date \\\n",
"134 383080 Trantow-Barrows B1-20000 33.69 2018-02-01 09:04:59 \n",
"135 412290 Jerde-Hilpert S1-27722 21.12 2018-02-01 11:51:46 \n",
"136 412290 Jerde-Hilpert B1-86481 35.99 2018-02-01 17:24:32 \n",
"137 412290 Jerde-Hilpert B1-20000 78.90 2018-02-01 19:56:48 \n",
"138 672390 Trantow-Barrows S1-06532 55.82 2018-02-02 03:45:20 \n",
"\n",
" Produit \n",
"134 Logiciel \n",
"135 Impression \n",
"136 Logiciel \n",
"137 Logiciel \n",
"138 Impression "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feb_df = df.loc[(df.date < \"2018-03-01\") & (df.date >= \"2018-02-01\")]\n",
"feb_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Produit</th>\n",
" <th>Impression</th>\n",
" <th>Logiciel</th>\n",
" <th>Ordinateur</th>\n",
" <th>Total</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Vendeur</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barton LLC</th>\n",
" <td>5</td>\n",
" <td>12</td>\n",
" <td>2</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jerde-Hilpert</th>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kassulke, Ondricka and Metz</th>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kulas Inc</th>\n",
" <td>6</td>\n",
" <td>15</td>\n",
" <td>3</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trantow-Barrows</th>\n",
" <td>13</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Total</th>\n",
" <td>36</td>\n",
" <td>49</td>\n",
" <td>23</td>\n",
" <td>108</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Produit Impression Logiciel Ordinateur Total\n",
"Vendeur \n",
"Barton LLC 5 12 2 19\n",
"Jerde-Hilpert 6 5 6 17\n",
"Kassulke, Ondricka and Metz 6 9 5 20\n",
"Kulas Inc 6 15 3 24\n",
"Trantow-Barrows 13 8 7 28\n",
"Total 36 49 23 108"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(feb_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=\"count\",\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\begin{tabular}{lrrrr}\n",
"\\toprule\n",
"\\textbf{Produit} & Impression & Logiciel & Ordinateur & Total \\\\\n",
"\\textbf{Vendeur } & & & & \\\\\n",
"\\midrule\n",
"\\textbf{Barton LLC } & 5 & 12 & 2 & 19 \\\\\n",
"\\textbf{Jerde-Hilpert } & 6 & 5 & 6 & 17 \\\\\n",
"\\textbf{Kassulke, Ondricka and Metz} & 6 & 9 & 5 & 20 \\\\\n",
"\\textbf{Kulas Inc } & 6 & 15 & 3 & 24 \\\\\n",
"\\textbf{Trantow-Barrows } & 13 & 8 & 7 & 28 \\\\\n",
"\\textbf{Total } & 36 & 49 & 23 & 108 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n"
]
}
],
"source": [
"print(pd.pivot_table(feb_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=\"count\",\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )\\\n",
" .to_latex(\n",
" float_format=\"{:0.0f}\".format,\n",
" bold_rows=True\n",
")\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Tableau des fréquences marginales"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Produit</th>\n",
" <th>Impression</th>\n",
" <th>Logiciel</th>\n",
" <th>Ordinateur</th>\n",
" <th>Total</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Vendeur</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barton LLC</th>\n",
" <td>4.63%</td>\n",
" <td>11.11%</td>\n",
" <td>1.85%</td>\n",
" <td>17.59%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jerde-Hilpert</th>\n",
" <td>5.56%</td>\n",
" <td>4.63%</td>\n",
" <td>5.56%</td>\n",
" <td>15.74%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kassulke, Ondricka and Metz</th>\n",
" <td>5.56%</td>\n",
" <td>8.33%</td>\n",
" <td>4.63%</td>\n",
" <td>18.52%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kulas Inc</th>\n",
" <td>5.56%</td>\n",
" <td>13.89%</td>\n",
" <td>2.78%</td>\n",
" <td>22.22%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trantow-Barrows</th>\n",
" <td>12.04%</td>\n",
" <td>7.41%</td>\n",
" <td>6.48%</td>\n",
" <td>25.93%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Total</th>\n",
" <td>33.33%</td>\n",
" <td>45.37%</td>\n",
" <td>21.30%</td>\n",
" <td>100.00%</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Produit Impression Logiciel Ordinateur Total\n",
"Vendeur \n",
"Barton LLC 4.63% 11.11% 1.85% 17.59%\n",
"Jerde-Hilpert 5.56% 4.63% 5.56% 15.74%\n",
"Kassulke, Ondricka and Metz 5.56% 8.33% 4.63% 18.52%\n",
"Kulas Inc 5.56% 13.89% 2.78% 22.22%\n",
"Trantow-Barrows 12.04% 7.41% 6.48% 25.93%\n",
"Total 33.33% 45.37% 21.30% 100.00%"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(feb_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=freq(feb_df[\"Prix\"].count()),\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\begin{tabular}{lllll}\n",
"\\toprule\n",
"\\textbf{Produit} & Impression & Logiciel & Ordinateur & Total \\\\\n",
"\\textbf{Vendeur } & & & & \\\\\n",
"\\midrule\n",
"\\textbf{Barton LLC } & 4.63\\% & 11.11\\% & 1.85\\% & 17.59\\% \\\\\n",
"\\textbf{Jerde-Hilpert } & 5.56\\% & 4.63\\% & 5.56\\% & 15.74\\% \\\\\n",
"\\textbf{Kassulke, Ondricka and Metz} & 5.56\\% & 8.33\\% & 4.63\\% & 18.52\\% \\\\\n",
"\\textbf{Kulas Inc } & 5.56\\% & 13.89\\% & 2.78\\% & 22.22\\% \\\\\n",
"\\textbf{Trantow-Barrows } & 12.04\\% & 7.41\\% & 6.48\\% & 25.93\\% \\\\\n",
"\\textbf{Total } & 33.33\\% & 45.37\\% & 21.30\\% & 100.00\\% \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n"
]
}
],
"source": [
"print(pd.pivot_table(feb_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=freq(feb_df[\"Prix\"].count()),\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )\\\n",
" .to_latex(\n",
" float_format=\"{:0.0f}\".format,\n",
" bold_rows=True\n",
")\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Tableau des fréquences conditionnelles au vendeur"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\" >\n",
"</style><table id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11d\" ><thead> <tr> <th class=\"index_name level0\" >Produit</th> <th class=\"col_heading level0 col0\" >Impression</th> <th class=\"col_heading level0 col1\" >Logiciel</th> <th class=\"col_heading level0 col2\" >Ordinateur</th> <th class=\"col_heading level0 col3\" >Total</th> </tr> <tr> <th class=\"index_name level0\" >Vendeur</th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> </tr></thead><tbody>\n",
" <tr>\n",
" <th id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11dlevel0_row0\" class=\"row_heading level0 row0\" >Barton LLC</th>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow0_col0\" class=\"data row0 col0\" >26.32%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow0_col1\" class=\"data row0 col1\" >63.16%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow0_col2\" class=\"data row0 col2\" >10.53%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow0_col3\" class=\"data row0 col3\" >100.00%</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11dlevel0_row1\" class=\"row_heading level0 row1\" >Jerde-Hilpert</th>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow1_col0\" class=\"data row1 col0\" >35.29%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow1_col1\" class=\"data row1 col1\" >29.41%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow1_col2\" class=\"data row1 col2\" >35.29%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow1_col3\" class=\"data row1 col3\" >100.00%</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11dlevel0_row2\" class=\"row_heading level0 row2\" >Kassulke, Ondricka and Metz</th>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow2_col0\" class=\"data row2 col0\" >30.00%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow2_col1\" class=\"data row2 col1\" >45.00%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow2_col2\" class=\"data row2 col2\" >25.00%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow2_col3\" class=\"data row2 col3\" >100.00%</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11dlevel0_row3\" class=\"row_heading level0 row3\" >Kulas Inc</th>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow3_col0\" class=\"data row3 col0\" >25.00%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow3_col1\" class=\"data row3 col1\" >62.50%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow3_col2\" class=\"data row3 col2\" >12.50%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow3_col3\" class=\"data row3 col3\" >100.00%</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11dlevel0_row4\" class=\"row_heading level0 row4\" >Trantow-Barrows</th>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow4_col0\" class=\"data row4 col0\" >46.43%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow4_col1\" class=\"data row4 col1\" >28.57%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow4_col2\" class=\"data row4 col2\" >25.00%</td>\n",
" <td id=\"T_83340c58_c4ac_11e9_a18a_902b34d9e11drow4_col3\" class=\"data row4 col3\" >100.00%</td>\n",
" </tr>\n",
" </tbody></table>"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x7f0b80feb4d0>"
]
},
"execution_count": 143,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vp_df = feb_df.groupby([\"Vendeur\", \"Produit\"])[\"Code\"]\\\n",
" .agg(len)\\\n",
" .unstack()\\\n",
" .assign(\n",
" Total=lambda x: x.T.sum()\n",
" )\\\n",
" .apply(\n",
" lambda x: x / x.T.Total,\n",
" axis=1\n",
" )\n",
"vp_df.style.format(\"{:.2%}\")"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\begin{tabular}{lrrrr}\n",
"\\toprule\n",
"\\textbf{Produit} & Impression & Logiciel & Ordinateur & Total \\\\\n",
"\\textbf{Vendeur } & & & & \\\\\n",
"\\midrule\n",
"\\textbf{Barton LLC } & 26.32\\% & 63.16\\% & 10.53\\% & 100.00\\% \\\\\n",
"\\textbf{Jerde-Hilpert } & 35.29\\% & 29.41\\% & 35.29\\% & 100.00\\% \\\\\n",
"\\textbf{Kassulke, Ondricka and Metz} & 30.00\\% & 45.00\\% & 25.00\\% & 100.00\\% \\\\\n",
"\\textbf{Kulas Inc } & 25.00\\% & 62.50\\% & 12.50\\% & 100.00\\% \\\\\n",
"\\textbf{Trantow-Barrows } & 46.43\\% & 28.57\\% & 25.00\\% & 100.00\\% \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n"
]
}
],
"source": [
"print(vp_df.to_latex(\n",
" float_format=\"{:.2%}\".format,\n",
" bold_rows=True\n",
"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Tableau des fréquences conditionnelles au produit"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\" >\n",
"</style><table id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11d\" ><thead> <tr> <th class=\"index_name level0\" >Produit</th> <th class=\"col_heading level0 col0\" >Impression</th> <th class=\"col_heading level0 col1\" >Logiciel</th> <th class=\"col_heading level0 col2\" >Ordinateur</th> </tr> <tr> <th class=\"index_name level0\" >Vendeur</th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> </tr></thead><tbody>\n",
" <tr>\n",
" <th id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11dlevel0_row0\" class=\"row_heading level0 row0\" >Barton LLC</th>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow0_col0\" class=\"data row0 col0\" >13.89%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow0_col1\" class=\"data row0 col1\" >24.49%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow0_col2\" class=\"data row0 col2\" >8.70%</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11dlevel0_row1\" class=\"row_heading level0 row1\" >Jerde-Hilpert</th>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow1_col0\" class=\"data row1 col0\" >16.67%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow1_col1\" class=\"data row1 col1\" >10.20%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow1_col2\" class=\"data row1 col2\" >26.09%</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11dlevel0_row2\" class=\"row_heading level0 row2\" >Kassulke, Ondricka and Metz</th>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow2_col0\" class=\"data row2 col0\" >16.67%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow2_col1\" class=\"data row2 col1\" >18.37%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow2_col2\" class=\"data row2 col2\" >21.74%</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11dlevel0_row3\" class=\"row_heading level0 row3\" >Kulas Inc</th>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow3_col0\" class=\"data row3 col0\" >16.67%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow3_col1\" class=\"data row3 col1\" >30.61%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow3_col2\" class=\"data row3 col2\" >13.04%</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11dlevel0_row4\" class=\"row_heading level0 row4\" >Trantow-Barrows</th>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow4_col0\" class=\"data row4 col0\" >36.11%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow4_col1\" class=\"data row4 col1\" >16.33%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow4_col2\" class=\"data row4 col2\" >30.43%</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11dlevel0_row5\" class=\"row_heading level0 row5\" >Total</th>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow5_col0\" class=\"data row5 col0\" >100.00%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow5_col1\" class=\"data row5 col1\" >100.00%</td>\n",
" <td id=\"T_8a002ec2_c4ac_11e9_a18a_902b34d9e11drow5_col2\" class=\"data row5 col2\" >100.00%</td>\n",
" </tr>\n",
" </tbody></table>"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x7f0b80ff8bd0>"
]
},
"execution_count": 145,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vp_df = feb_df.groupby([\"Produit\", \"Vendeur\"])[\"Code\"]\\\n",
" .agg(len)\\\n",
" .unstack()\\\n",
" .assign(\n",
" Total=lambda x: x.T.sum()\n",
" )\\\n",
" .apply(\n",
" lambda x: x / x.T.Total,\n",
" axis=1\n",
" )\n",
"vp_df.T.style.format(\"{:.2%}\")"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\begin{tabular}{lrrr}\n",
"\\toprule\n",
"\\textbf{Produit} & Impression & Logiciel & Ordinateur \\\\\n",
"\\textbf{Vendeur } & & & \\\\\n",
"\\midrule\n",
"\\textbf{Barton LLC } & 13.89\\% & 24.49\\% & 8.70\\% \\\\\n",
"\\textbf{Jerde-Hilpert } & 16.67\\% & 10.20\\% & 26.09\\% \\\\\n",
"\\textbf{Kassulke, Ondricka and Metz} & 16.67\\% & 18.37\\% & 21.74\\% \\\\\n",
"\\textbf{Kulas Inc } & 16.67\\% & 30.61\\% & 13.04\\% \\\\\n",
"\\textbf{Trantow-Barrows } & 36.11\\% & 16.33\\% & 30.43\\% \\\\\n",
"\\textbf{Total } & 100.00\\% & 100.00\\% & 100.00\\% \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n"
]
}
],
"source": [
"print(vp_df.T.to_latex(\n",
" float_format=\"{:.2%}\".format,\n",
" bold_rows=True\n",
"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ventes sur le mois de Mars"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Numero de compte</th>\n",
" <th>Vendeur</th>\n",
" <th>Code</th>\n",
" <th>Prix</th>\n",
" <th>date</th>\n",
" <th>Produit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>242</th>\n",
" <td>163416</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>S1-30248</td>\n",
" <td>65.03</td>\n",
" <td>2018-03-01 16:07:40</td>\n",
" <td>Impression</td>\n",
" </tr>\n",
" <tr>\n",
" <th>243</th>\n",
" <td>527099</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>S2-82423</td>\n",
" <td>76.21</td>\n",
" <td>2018-03-01 17:18:01</td>\n",
" <td>Ordinateur</td>\n",
" </tr>\n",
" <tr>\n",
" <th>244</th>\n",
" <td>527099</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>B1-50809</td>\n",
" <td>70.78</td>\n",
" <td>2018-03-01 18:53:09</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>245</th>\n",
" <td>737550</td>\n",
" <td>Trantow-Barrows</td>\n",
" <td>B1-50809</td>\n",
" <td>50.11</td>\n",
" <td>2018-03-01 23:47:17</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246</th>\n",
" <td>688981</td>\n",
" <td>Kassulke, Ondricka and Metz</td>\n",
" <td>B1-86481</td>\n",
" <td>97.16</td>\n",
" <td>2018-03-02 01:46:44</td>\n",
" <td>Logiciel</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Numero de compte Vendeur Code Prix \\\n",
"242 163416 Jerde-Hilpert S1-30248 65.03 \n",
"243 527099 Jerde-Hilpert S2-82423 76.21 \n",
"244 527099 Jerde-Hilpert B1-50809 70.78 \n",
"245 737550 Trantow-Barrows B1-50809 50.11 \n",
"246 688981 Kassulke, Ondricka and Metz B1-86481 97.16 \n",
"\n",
" date Produit \n",
"242 2018-03-01 16:07:40 Impression \n",
"243 2018-03-01 17:18:01 Ordinateur \n",
"244 2018-03-01 18:53:09 Logiciel \n",
"245 2018-03-01 23:47:17 Logiciel \n",
"246 2018-03-02 01:46:44 Logiciel "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"march_df = df.loc[(df.date < \"2018-04-01\") & (df.date >= \"2018-03-01\")]\n",
"march_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Croisement de ventes entre vendeurs et produits"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Produit</th>\n",
" <th>Impression</th>\n",
" <th>Logiciel</th>\n",
" <th>Ordinateur</th>\n",
" <th>Total</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Vendeur</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barton LLC</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jerde-Hilpert</th>\n",
" <td>9</td>\n",
" <td>13</td>\n",
" <td>9</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kassulke, Ondricka and Metz</th>\n",
" <td>6</td>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kulas Inc</th>\n",
" <td>13</td>\n",
" <td>5</td>\n",
" <td>13</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trantow-Barrows</th>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" <td>6</td>\n",
" <td>28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Total</th>\n",
" <td>46</td>\n",
" <td>48</td>\n",
" <td>48</td>\n",
" <td>142</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Produit Impression Logiciel Ordinateur Total\n",
"Vendeur \n",
"Barton LLC 7 7 8 22\n",
"Jerde-Hilpert 9 13 9 31\n",
"Kassulke, Ondricka and Metz 6 12 12 30\n",
"Kulas Inc 13 5 13 31\n",
"Trantow-Barrows 11 11 6 28\n",
"Total 46 48 48 142"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(march_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=\"count\",\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\begin{tabular}{lrrrr}\n",
"\\toprule\n",
"\\textbf{Produit} & Impression & Logiciel & Ordinateur & Total \\\\\n",
"\\textbf{Vendeur } & & & & \\\\\n",
"\\midrule\n",
"\\textbf{Barton LLC } & 7 & 7 & 8 & 22 \\\\\n",
"\\textbf{Jerde-Hilpert } & 9 & 13 & 9 & 31 \\\\\n",
"\\textbf{Kassulke, Ondricka and Metz} & 6 & 12 & 12 & 30 \\\\\n",
"\\textbf{Kulas Inc } & 13 & 5 & 13 & 31 \\\\\n",
"\\textbf{Trantow-Barrows } & 11 & 11 & 6 & 28 \\\\\n",
"\\textbf{Total } & 46 & 48 & 48 & 142 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n"
]
}
],
"source": [
"print(pd.pivot_table(march_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=\"count\",\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )\\\n",
" .to_latex(\n",
" float_format=\"{:0.0f}\".format,\n",
" bold_rows=True\n",
")\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fréquences marginales"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Produit</th>\n",
" <th>Impression</th>\n",
" <th>Logiciel</th>\n",
" <th>Ordinateur</th>\n",
" <th>Total</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Vendeur</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barton LLC</th>\n",
" <td>17.07%</td>\n",
" <td>17.07%</td>\n",
" <td>19.51%</td>\n",
" <td>53.66%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jerde-Hilpert</th>\n",
" <td>21.95%</td>\n",
" <td>31.71%</td>\n",
" <td>21.95%</td>\n",
" <td>75.61%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kassulke, Ondricka and Metz</th>\n",
" <td>14.63%</td>\n",
" <td>29.27%</td>\n",
" <td>29.27%</td>\n",
" <td>73.17%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kulas Inc</th>\n",
" <td>31.71%</td>\n",
" <td>12.20%</td>\n",
" <td>31.71%</td>\n",
" <td>75.61%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trantow-Barrows</th>\n",
" <td>26.83%</td>\n",
" <td>26.83%</td>\n",
" <td>14.63%</td>\n",
" <td>68.29%</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Total</th>\n",
" <td>112.20%</td>\n",
" <td>117.07%</td>\n",
" <td>117.07%</td>\n",
" <td>346.34%</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Produit Impression Logiciel Ordinateur Total\n",
"Vendeur \n",
"Barton LLC 17.07% 17.07% 19.51% 53.66%\n",
"Jerde-Hilpert 21.95% 31.71% 21.95% 75.61%\n",
"Kassulke, Ondricka and Metz 14.63% 29.27% 29.27% 73.17%\n",
"Kulas Inc 31.71% 12.20% 31.71% 75.61%\n",
"Trantow-Barrows 26.83% 26.83% 14.63% 68.29%\n",
"Total 112.20% 117.07% 117.07% 346.34%"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(march_df,\n",
" index=\"Vendeur\",\n",
" columns = \"Produit\",\n",
" values=\"Code\",\n",
" aggfunc=freq,\n",
" margins=True,\n",
" margins_name=\"Total\"\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Et les 2 en même temps?"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Numero de compte</th>\n",
" <th>Vendeur</th>\n",
" <th>Code</th>\n",
" <th>Prix</th>\n",
" <th>date</th>\n",
" <th>Produit</th>\n",
" <th>Mois</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>383080</td>\n",
" <td>Trantow-Barrows</td>\n",
" <td>B1-20000</td>\n",
" <td>33.69</td>\n",
" <td>2018-02-01 09:04:59</td>\n",
" <td>Logiciel</td>\n",
" <td>février</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>S1-27722</td>\n",
" <td>21.12</td>\n",
" <td>2018-02-01 11:51:46</td>\n",
" <td>Impression</td>\n",
" <td>février</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>B1-86481</td>\n",
" <td>35.99</td>\n",
" <td>2018-02-01 17:24:32</td>\n",
" <td>Logiciel</td>\n",
" <td>février</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>412290</td>\n",
" <td>Jerde-Hilpert</td>\n",
" <td>B1-20000</td>\n",
" <td>78.90</td>\n",
" <td>2018-02-01 19:56:48</td>\n",
" <td>Logiciel</td>\n",
" <td>février</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>672390</td>\n",
" <td>Trantow-Barrows</td>\n",
" <td>S1-06532</td>\n",
" <td>55.82</td>\n",
" <td>2018-02-02 03:45:20</td>\n",
" <td>Impression</td>\n",
" <td>février</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Numero de compte Vendeur Code Prix date \\\n",
"134 383080 Trantow-Barrows B1-20000 33.69 2018-02-01 09:04:59 \n",
"135 412290 Jerde-Hilpert S1-27722 21.12 2018-02-01 11:51:46 \n",
"136 412290 Jerde-Hilpert B1-86481 35.99 2018-02-01 17:24:32 \n",
"137 412290 Jerde-Hilpert B1-20000 78.90 2018-02-01 19:56:48 \n",
"138 672390 Trantow-Barrows S1-06532 55.82 2018-02-02 03:45:20 \n",
"\n",
" Produit Mois \n",
"134 Logiciel février \n",
"135 Impression février \n",
"136 Logiciel février \n",
"137 Logiciel février \n",
"138 Impression février "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"both_df = df.loc[(df.date < \"2018-04-01\") & (df.date >= \"2018-02-01\")].assign(\n",
" Mois=lambda x: x.date.dt.strftime(\"%B\")\n",
")\n",
"both_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th>Mois</th>\n",
" <th colspan=\"3\" halign=\"left\">février</th>\n",
" <th colspan=\"3\" halign=\"left\">mars</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Produit</th>\n",
" <th>Impression</th>\n",
" <th>Logiciel</th>\n",
" <th>Ordinateur</th>\n",
" <th>Impression</th>\n",
" <th>Logiciel</th>\n",
" <th>Ordinateur</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Vendeur</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barton LLC</th>\n",
" <td>5</td>\n",
" <td>12</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jerde-Hilpert</th>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>13</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kassulke, Ondricka and Metz</th>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kulas Inc</th>\n",
" <td>6</td>\n",
" <td>15</td>\n",
" <td>3</td>\n",
" <td>13</td>\n",
" <td>5</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trantow-Barrows</th>\n",
" <td>13</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Mois février mars \\\n",
"Produit Impression Logiciel Ordinateur Impression \n",
"Vendeur \n",
"Barton LLC 5 12 2 7 \n",
"Jerde-Hilpert 6 5 6 9 \n",
"Kassulke, Ondricka and Metz 6 9 5 6 \n",
"Kulas Inc 6 15 3 13 \n",
"Trantow-Barrows 13 8 7 11 \n",
"\n",
"Mois \n",
"Produit Logiciel Ordinateur \n",
"Vendeur \n",
"Barton LLC 7 8 \n",
"Jerde-Hilpert 13 9 \n",
"Kassulke, Ondricka and Metz 12 12 \n",
"Kulas Inc 5 13 \n",
"Trantow-Barrows 11 6 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(both_df,\n",
" index=\"Vendeur\",\n",
" columns = [\"Mois\",\"Produit\"],\n",
" values=\"Code\",\n",
" aggfunc=\"count\",\n",
" #margins=True,\n",
" #margins_name=\"Total\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\begin{tabular}{lrrrrrr}\n",
"\\toprule\n",
"\\textbf{Mois} & \\multicolumn{3}{l}{février} & \\multicolumn{3}{l}{mars} \\\\\n",
"\\textbf{Produit} & Impression & Logiciel & Ordinateur & Impression & Logiciel & Ordinateur \\\\\n",
"\\textbf{Vendeur } & & & & & & \\\\\n",
"\\midrule\n",
"\\textbf{Barton LLC } & 5 & 12 & 2 & 7 & 7 & 8 \\\\\n",
"\\textbf{Jerde-Hilpert } & 6 & 5 & 6 & 9 & 13 & 9 \\\\\n",
"\\textbf{Kassulke, Ondricka and Metz} & 6 & 9 & 5 & 6 & 12 & 12 \\\\\n",
"\\textbf{Kulas Inc } & 6 & 15 & 3 & 13 & 5 & 13 \\\\\n",
"\\textbf{Trantow-Barrows } & 13 & 8 & 7 & 11 & 11 & 6 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n"
]
}
],
"source": [
"print(pd.pivot_table(both_df,\n",
" index=\"Vendeur\",\n",
" columns = [\"Mois\",\"Produit\"],\n",
" values=\"Code\",\n",
" aggfunc=\"count\",\n",
" #margins=True,\n",
" #margins_name=\"Total\"\n",
" )\\\n",
".to_latex(\n",
" float_format=\"{:0.0f}\".format,\n",
" bold_rows=True\n",
")\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}