alnoda-workspaces/workspaces/notebook-workspace/tutorials/pandas.ipynb
2022-06-20 18:24:28 +00:00

2106 lines
67 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "f9b64f7b-33b0-4b79-af2c-0c0fe65c0cf6",
"metadata": {},
"source": [
"# Pandas examples"
]
},
{
"cell_type": "markdown",
"id": "251ece9a-7e7f-4cd3-8618-a1db063524e4",
"metadata": {},
"source": [
"This notebook contains selected Pandas examples"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "db6bec36-b7ef-4a27-b13b-e5ae2cb68453",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "8997c867-8e08-4d82-b796-eb0f4adb1012",
"metadata": {},
"source": [
"## Load data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8a3a0ab7-a80a-4b39-aeeb-9a7cccb8ebae",
"metadata": {},
"outputs": [],
"source": [
"# Load csv file into Pandas data frame\n",
"df = pd.read_csv('housing.csv') "
]
},
{
"cell_type": "markdown",
"id": "66faa79d-0dbd-4233-a48b-c717122e6877",
"metadata": {},
"source": [
"## Explore dataframe"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f70a1cd4-a66d-479f-967b-eb07e3d88a35",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>crim</th>\n",
" <th>zn</th>\n",
" <th>indus</th>\n",
" <th>chas</th>\n",
" <th>nox</th>\n",
" <th>rm</th>\n",
" <th>age</th>\n",
" <th>dis</th>\n",
" <th>rad</th>\n",
" <th>tax</th>\n",
" <th>ptratio</th>\n",
" <th>b</th>\n",
" <th>lstat</th>\n",
" <th>medv</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.00632</td>\n",
" <td>18.0</td>\n",
" <td>2.31</td>\n",
" <td>0</td>\n",
" <td>0.538</td>\n",
" <td>6.575</td>\n",
" <td>65.2</td>\n",
" <td>4.0900</td>\n",
" <td>1</td>\n",
" <td>296</td>\n",
" <td>15.3</td>\n",
" <td>396.90</td>\n",
" <td>4.98</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.02731</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>6.421</td>\n",
" <td>78.9</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>396.90</td>\n",
" <td>9.14</td>\n",
" <td>21.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.02729</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>7.185</td>\n",
" <td>61.1</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>392.83</td>\n",
" <td>4.03</td>\n",
" <td>34.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.03237</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0</td>\n",
" <td>0.458</td>\n",
" <td>6.998</td>\n",
" <td>45.8</td>\n",
" <td>6.0622</td>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>18.7</td>\n",
" <td>394.63</td>\n",
" <td>2.94</td>\n",
" <td>33.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.06905</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0</td>\n",
" <td>0.458</td>\n",
" <td>7.147</td>\n",
" <td>54.2</td>\n",
" <td>6.0622</td>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>18.7</td>\n",
" <td>396.90</td>\n",
" <td>5.33</td>\n",
" <td>36.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>501</th>\n",
" <td>0.06263</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.593</td>\n",
" <td>69.1</td>\n",
" <td>2.4786</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>391.99</td>\n",
" <td>9.67</td>\n",
" <td>22.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>502</th>\n",
" <td>0.04527</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.120</td>\n",
" <td>76.7</td>\n",
" <td>2.2875</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>396.90</td>\n",
" <td>9.08</td>\n",
" <td>20.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>503</th>\n",
" <td>0.06076</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.976</td>\n",
" <td>91.0</td>\n",
" <td>2.1675</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>396.90</td>\n",
" <td>5.64</td>\n",
" <td>23.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>504</th>\n",
" <td>0.10959</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.794</td>\n",
" <td>89.3</td>\n",
" <td>2.3889</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>393.45</td>\n",
" <td>6.48</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>505</th>\n",
" <td>0.04741</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.030</td>\n",
" <td>80.8</td>\n",
" <td>2.5050</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>396.90</td>\n",
" <td>7.88</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>506 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" crim zn indus chas nox rm age dis rad tax \\\n",
"0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 \n",
"1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 \n",
"2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 \n",
"3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222 \n",
"4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222 \n",
".. ... ... ... ... ... ... ... ... ... ... \n",
"501 0.06263 0.0 11.93 0 0.573 6.593 69.1 2.4786 1 273 \n",
"502 0.04527 0.0 11.93 0 0.573 6.120 76.7 2.2875 1 273 \n",
"503 0.06076 0.0 11.93 0 0.573 6.976 91.0 2.1675 1 273 \n",
"504 0.10959 0.0 11.93 0 0.573 6.794 89.3 2.3889 1 273 \n",
"505 0.04741 0.0 11.93 0 0.573 6.030 80.8 2.5050 1 273 \n",
"\n",
" ptratio b lstat medv \n",
"0 15.3 396.90 4.98 24.0 \n",
"1 17.8 396.90 9.14 21.6 \n",
"2 17.8 392.83 4.03 34.7 \n",
"3 18.7 394.63 2.94 33.4 \n",
"4 18.7 396.90 5.33 36.2 \n",
".. ... ... ... ... \n",
"501 21.0 391.99 9.67 22.4 \n",
"502 21.0 396.90 9.08 20.6 \n",
"503 21.0 396.90 5.64 23.9 \n",
"504 21.0 393.45 6.48 22.0 \n",
"505 21.0 396.90 7.88 11.9 \n",
"\n",
"[506 rows x 14 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Display pandas dataframe (standard visualization)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2497a29a-66fa-4a48-87f6-f9ea681ab89c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>crim</th>\n",
" <th>zn</th>\n",
" <th>indus</th>\n",
" <th>chas</th>\n",
" <th>nox</th>\n",
" <th>rm</th>\n",
" <th>age</th>\n",
" <th>dis</th>\n",
" <th>rad</th>\n",
" <th>tax</th>\n",
" <th>ptratio</th>\n",
" <th>b</th>\n",
" <th>lstat</th>\n",
" <th>medv</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>3.613524</td>\n",
" <td>11.363636</td>\n",
" <td>11.136779</td>\n",
" <td>0.069170</td>\n",
" <td>0.554695</td>\n",
" <td>6.284634</td>\n",
" <td>68.574901</td>\n",
" <td>3.795043</td>\n",
" <td>9.549407</td>\n",
" <td>408.237154</td>\n",
" <td>18.455534</td>\n",
" <td>356.674032</td>\n",
" <td>12.653063</td>\n",
" <td>22.532806</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>8.601545</td>\n",
" <td>23.322453</td>\n",
" <td>6.860353</td>\n",
" <td>0.253994</td>\n",
" <td>0.115878</td>\n",
" <td>0.702617</td>\n",
" <td>28.148861</td>\n",
" <td>2.105710</td>\n",
" <td>8.707259</td>\n",
" <td>168.537116</td>\n",
" <td>2.164946</td>\n",
" <td>91.294864</td>\n",
" <td>7.141062</td>\n",
" <td>9.197104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.006320</td>\n",
" <td>0.000000</td>\n",
" <td>0.460000</td>\n",
" <td>0.000000</td>\n",
" <td>0.385000</td>\n",
" <td>3.561000</td>\n",
" <td>2.900000</td>\n",
" <td>1.129600</td>\n",
" <td>1.000000</td>\n",
" <td>187.000000</td>\n",
" <td>12.600000</td>\n",
" <td>0.320000</td>\n",
" <td>1.730000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.082045</td>\n",
" <td>0.000000</td>\n",
" <td>5.190000</td>\n",
" <td>0.000000</td>\n",
" <td>0.449000</td>\n",
" <td>5.885500</td>\n",
" <td>45.025000</td>\n",
" <td>2.100175</td>\n",
" <td>4.000000</td>\n",
" <td>279.000000</td>\n",
" <td>17.400000</td>\n",
" <td>375.377500</td>\n",
" <td>6.950000</td>\n",
" <td>17.025000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.256510</td>\n",
" <td>0.000000</td>\n",
" <td>9.690000</td>\n",
" <td>0.000000</td>\n",
" <td>0.538000</td>\n",
" <td>6.208500</td>\n",
" <td>77.500000</td>\n",
" <td>3.207450</td>\n",
" <td>5.000000</td>\n",
" <td>330.000000</td>\n",
" <td>19.050000</td>\n",
" <td>391.440000</td>\n",
" <td>11.360000</td>\n",
" <td>21.200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>3.677083</td>\n",
" <td>12.500000</td>\n",
" <td>18.100000</td>\n",
" <td>0.000000</td>\n",
" <td>0.624000</td>\n",
" <td>6.623500</td>\n",
" <td>94.075000</td>\n",
" <td>5.188425</td>\n",
" <td>24.000000</td>\n",
" <td>666.000000</td>\n",
" <td>20.200000</td>\n",
" <td>396.225000</td>\n",
" <td>16.955000</td>\n",
" <td>25.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>88.976200</td>\n",
" <td>100.000000</td>\n",
" <td>27.740000</td>\n",
" <td>1.000000</td>\n",
" <td>0.871000</td>\n",
" <td>8.780000</td>\n",
" <td>100.000000</td>\n",
" <td>12.126500</td>\n",
" <td>24.000000</td>\n",
" <td>711.000000</td>\n",
" <td>22.000000</td>\n",
" <td>396.900000</td>\n",
" <td>37.970000</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" crim zn indus chas nox rm \\\n",
"count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \n",
"mean 3.613524 11.363636 11.136779 0.069170 0.554695 6.284634 \n",
"std 8.601545 23.322453 6.860353 0.253994 0.115878 0.702617 \n",
"min 0.006320 0.000000 0.460000 0.000000 0.385000 3.561000 \n",
"25% 0.082045 0.000000 5.190000 0.000000 0.449000 5.885500 \n",
"50% 0.256510 0.000000 9.690000 0.000000 0.538000 6.208500 \n",
"75% 3.677083 12.500000 18.100000 0.000000 0.624000 6.623500 \n",
"max 88.976200 100.000000 27.740000 1.000000 0.871000 8.780000 \n",
"\n",
" age dis rad tax ptratio b \\\n",
"count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \n",
"mean 68.574901 3.795043 9.549407 408.237154 18.455534 356.674032 \n",
"std 28.148861 2.105710 8.707259 168.537116 2.164946 91.294864 \n",
"min 2.900000 1.129600 1.000000 187.000000 12.600000 0.320000 \n",
"25% 45.025000 2.100175 4.000000 279.000000 17.400000 375.377500 \n",
"50% 77.500000 3.207450 5.000000 330.000000 19.050000 391.440000 \n",
"75% 94.075000 5.188425 24.000000 666.000000 20.200000 396.225000 \n",
"max 100.000000 12.126500 24.000000 711.000000 22.000000 396.900000 \n",
"\n",
" lstat medv \n",
"count 506.000000 506.000000 \n",
"mean 12.653063 22.532806 \n",
"std 7.141062 9.197104 \n",
"min 1.730000 5.000000 \n",
"25% 6.950000 17.025000 \n",
"50% 11.360000 21.200000 \n",
"75% 16.955000 25.000000 \n",
"max 37.970000 50.000000 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# shows a quick statistic summary of your data\n",
"df.describe() "
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a2cb9a43-c6e0-4989-a508-d600a8718bac",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 65.2\n",
"1 78.9\n",
"2 61.1\n",
"3 45.8\n",
"4 54.2\n",
" ... \n",
"501 69.1\n",
"502 76.7\n",
"503 91.0\n",
"504 89.3\n",
"505 80.8\n",
"Name: age, Length: 506, dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Select a single column (creates Series)\n",
"df[\"age\"]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "710f04a4-5663-46f2-ad80-f4467003da31",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>crim</th>\n",
" <th>zn</th>\n",
" <th>indus</th>\n",
" <th>chas</th>\n",
" <th>nox</th>\n",
" <th>rm</th>\n",
" <th>age</th>\n",
" <th>dis</th>\n",
" <th>rad</th>\n",
" <th>tax</th>\n",
" <th>ptratio</th>\n",
" <th>b</th>\n",
" <th>lstat</th>\n",
" <th>medv</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.00632</td>\n",
" <td>18.0</td>\n",
" <td>2.31</td>\n",
" <td>0</td>\n",
" <td>0.538</td>\n",
" <td>6.575</td>\n",
" <td>65.2</td>\n",
" <td>4.0900</td>\n",
" <td>1</td>\n",
" <td>296</td>\n",
" <td>15.3</td>\n",
" <td>396.90</td>\n",
" <td>4.98</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.02731</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>6.421</td>\n",
" <td>78.9</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>396.90</td>\n",
" <td>9.14</td>\n",
" <td>21.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.02729</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>7.185</td>\n",
" <td>61.1</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>392.83</td>\n",
" <td>4.03</td>\n",
" <td>34.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" crim zn indus chas nox rm age dis rad tax ptratio \\\n",
"0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 \n",
"1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 \n",
"2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 \n",
"\n",
" b lstat medv \n",
"0 396.90 4.98 24.0 \n",
"1 396.90 9.14 21.6 \n",
"2 392.83 4.03 34.7 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Selecting N rows\n",
"df[0:3]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "a0bc33fd-b58c-4d08-b57d-f1b7dd4fb422",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>dis</th>\n",
" <th>lstat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>85.7</td>\n",
" <td>4.4546</td>\n",
" <td>16.51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>90.3</td>\n",
" <td>4.6820</td>\n",
" <td>14.81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>88.8</td>\n",
" <td>4.4534</td>\n",
" <td>17.28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>94.4</td>\n",
" <td>4.4547</td>\n",
" <td>12.80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>87.3</td>\n",
" <td>4.2390</td>\n",
" <td>11.98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>94.1</td>\n",
" <td>4.2330</td>\n",
" <td>22.60</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age dis lstat\n",
"25 85.7 4.4546 16.51\n",
"26 90.3 4.6820 14.81\n",
"27 88.8 4.4534 17.28\n",
"28 94.4 4.4547 12.80\n",
"29 87.3 4.2390 11.98\n",
"30 94.1 4.2330 22.60"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Select slice of the data frame - Rows from 25 to 30, specified columns\n",
"df.loc[25:30, [\"age\", \"dis\", \"lstat\"]]"
]
},
{
"cell_type": "markdown",
"id": "50c8de29-7e80-4404-8668-6fc4c8a898a0",
"metadata": {},
"source": [
"## \"Querying\" Pandas"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "ec185a3b-be15-4725-9e6e-a524fd604f1b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>crim</th>\n",
" <th>zn</th>\n",
" <th>indus</th>\n",
" <th>chas</th>\n",
" <th>nox</th>\n",
" <th>rm</th>\n",
" <th>age</th>\n",
" <th>dis</th>\n",
" <th>rad</th>\n",
" <th>tax</th>\n",
" <th>ptratio</th>\n",
" <th>b</th>\n",
" <th>lstat</th>\n",
" <th>medv</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>0.38735</td>\n",
" <td>0.0</td>\n",
" <td>25.65</td>\n",
" <td>0</td>\n",
" <td>0.581</td>\n",
" <td>5.613</td>\n",
" <td>95.6</td>\n",
" <td>1.7572</td>\n",
" <td>2</td>\n",
" <td>188</td>\n",
" <td>19.1</td>\n",
" <td>359.29</td>\n",
" <td>27.26</td>\n",
" <td>15.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>0.32543</td>\n",
" <td>0.0</td>\n",
" <td>21.89</td>\n",
" <td>0</td>\n",
" <td>0.624</td>\n",
" <td>6.431</td>\n",
" <td>98.8</td>\n",
" <td>1.8125</td>\n",
" <td>4</td>\n",
" <td>437</td>\n",
" <td>21.2</td>\n",
" <td>396.90</td>\n",
" <td>15.39</td>\n",
" <td>18.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130</th>\n",
" <td>0.34006</td>\n",
" <td>0.0</td>\n",
" <td>21.89</td>\n",
" <td>0</td>\n",
" <td>0.624</td>\n",
" <td>6.458</td>\n",
" <td>98.9</td>\n",
" <td>2.1185</td>\n",
" <td>4</td>\n",
" <td>437</td>\n",
" <td>21.2</td>\n",
" <td>395.04</td>\n",
" <td>12.60</td>\n",
" <td>19.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133</th>\n",
" <td>0.32982</td>\n",
" <td>0.0</td>\n",
" <td>21.89</td>\n",
" <td>0</td>\n",
" <td>0.624</td>\n",
" <td>5.822</td>\n",
" <td>95.4</td>\n",
" <td>2.4699</td>\n",
" <td>4</td>\n",
" <td>437</td>\n",
" <td>21.2</td>\n",
" <td>388.69</td>\n",
" <td>15.03</td>\n",
" <td>18.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>0.32264</td>\n",
" <td>0.0</td>\n",
" <td>21.89</td>\n",
" <td>0</td>\n",
" <td>0.624</td>\n",
" <td>5.942</td>\n",
" <td>93.5</td>\n",
" <td>1.9669</td>\n",
" <td>4</td>\n",
" <td>437</td>\n",
" <td>21.2</td>\n",
" <td>378.25</td>\n",
" <td>16.90</td>\n",
" <td>17.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>0.35233</td>\n",
" <td>0.0</td>\n",
" <td>21.89</td>\n",
" <td>0</td>\n",
" <td>0.624</td>\n",
" <td>6.454</td>\n",
" <td>98.4</td>\n",
" <td>1.8498</td>\n",
" <td>4</td>\n",
" <td>437</td>\n",
" <td>21.2</td>\n",
" <td>394.08</td>\n",
" <td>14.59</td>\n",
" <td>17.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>211</th>\n",
" <td>0.37578</td>\n",
" <td>0.0</td>\n",
" <td>10.59</td>\n",
" <td>1</td>\n",
" <td>0.489</td>\n",
" <td>5.404</td>\n",
" <td>88.6</td>\n",
" <td>3.6650</td>\n",
" <td>4</td>\n",
" <td>277</td>\n",
" <td>18.6</td>\n",
" <td>395.24</td>\n",
" <td>23.98</td>\n",
" <td>19.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220</th>\n",
" <td>0.35809</td>\n",
" <td>0.0</td>\n",
" <td>6.20</td>\n",
" <td>1</td>\n",
" <td>0.507</td>\n",
" <td>6.951</td>\n",
" <td>88.5</td>\n",
" <td>2.8617</td>\n",
" <td>8</td>\n",
" <td>307</td>\n",
" <td>17.4</td>\n",
" <td>391.70</td>\n",
" <td>9.71</td>\n",
" <td>26.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>224</th>\n",
" <td>0.31533</td>\n",
" <td>0.0</td>\n",
" <td>6.20</td>\n",
" <td>0</td>\n",
" <td>0.504</td>\n",
" <td>8.266</td>\n",
" <td>78.3</td>\n",
" <td>2.8944</td>\n",
" <td>8</td>\n",
" <td>307</td>\n",
" <td>17.4</td>\n",
" <td>385.05</td>\n",
" <td>4.14</td>\n",
" <td>44.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>226</th>\n",
" <td>0.38214</td>\n",
" <td>0.0</td>\n",
" <td>6.20</td>\n",
" <td>0</td>\n",
" <td>0.504</td>\n",
" <td>8.040</td>\n",
" <td>86.5</td>\n",
" <td>3.2157</td>\n",
" <td>8</td>\n",
" <td>307</td>\n",
" <td>17.4</td>\n",
" <td>387.38</td>\n",
" <td>3.13</td>\n",
" <td>37.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>233</th>\n",
" <td>0.33147</td>\n",
" <td>0.0</td>\n",
" <td>6.20</td>\n",
" <td>0</td>\n",
" <td>0.507</td>\n",
" <td>8.247</td>\n",
" <td>70.4</td>\n",
" <td>3.6519</td>\n",
" <td>8</td>\n",
" <td>307</td>\n",
" <td>17.4</td>\n",
" <td>378.95</td>\n",
" <td>3.95</td>\n",
" <td>48.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>235</th>\n",
" <td>0.33045</td>\n",
" <td>0.0</td>\n",
" <td>6.20</td>\n",
" <td>0</td>\n",
" <td>0.507</td>\n",
" <td>6.086</td>\n",
" <td>61.5</td>\n",
" <td>3.6519</td>\n",
" <td>8</td>\n",
" <td>307</td>\n",
" <td>17.4</td>\n",
" <td>376.75</td>\n",
" <td>10.88</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246</th>\n",
" <td>0.33983</td>\n",
" <td>22.0</td>\n",
" <td>5.86</td>\n",
" <td>0</td>\n",
" <td>0.431</td>\n",
" <td>6.108</td>\n",
" <td>34.9</td>\n",
" <td>8.0555</td>\n",
" <td>7</td>\n",
" <td>330</td>\n",
" <td>19.1</td>\n",
" <td>390.18</td>\n",
" <td>9.16</td>\n",
" <td>24.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>253</th>\n",
" <td>0.36894</td>\n",
" <td>22.0</td>\n",
" <td>5.86</td>\n",
" <td>0</td>\n",
" <td>0.431</td>\n",
" <td>8.259</td>\n",
" <td>8.4</td>\n",
" <td>8.9067</td>\n",
" <td>7</td>\n",
" <td>330</td>\n",
" <td>19.1</td>\n",
" <td>396.90</td>\n",
" <td>3.54</td>\n",
" <td>42.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>309</th>\n",
" <td>0.34940</td>\n",
" <td>0.0</td>\n",
" <td>9.90</td>\n",
" <td>0</td>\n",
" <td>0.544</td>\n",
" <td>5.972</td>\n",
" <td>76.7</td>\n",
" <td>3.1025</td>\n",
" <td>4</td>\n",
" <td>304</td>\n",
" <td>18.4</td>\n",
" <td>396.24</td>\n",
" <td>9.97</td>\n",
" <td>20.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>314</th>\n",
" <td>0.36920</td>\n",
" <td>0.0</td>\n",
" <td>9.90</td>\n",
" <td>0</td>\n",
" <td>0.544</td>\n",
" <td>6.567</td>\n",
" <td>87.3</td>\n",
" <td>3.6023</td>\n",
" <td>4</td>\n",
" <td>304</td>\n",
" <td>18.4</td>\n",
" <td>395.69</td>\n",
" <td>9.28</td>\n",
" <td>23.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>316</th>\n",
" <td>0.31827</td>\n",
" <td>0.0</td>\n",
" <td>9.90</td>\n",
" <td>0</td>\n",
" <td>0.544</td>\n",
" <td>5.914</td>\n",
" <td>83.2</td>\n",
" <td>3.9986</td>\n",
" <td>4</td>\n",
" <td>304</td>\n",
" <td>18.4</td>\n",
" <td>390.70</td>\n",
" <td>18.33</td>\n",
" <td>17.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>322</th>\n",
" <td>0.35114</td>\n",
" <td>0.0</td>\n",
" <td>7.38</td>\n",
" <td>0</td>\n",
" <td>0.493</td>\n",
" <td>6.041</td>\n",
" <td>49.9</td>\n",
" <td>4.7211</td>\n",
" <td>5</td>\n",
" <td>287</td>\n",
" <td>19.6</td>\n",
" <td>396.90</td>\n",
" <td>7.70</td>\n",
" <td>20.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>324</th>\n",
" <td>0.34109</td>\n",
" <td>0.0</td>\n",
" <td>7.38</td>\n",
" <td>0</td>\n",
" <td>0.493</td>\n",
" <td>6.415</td>\n",
" <td>40.1</td>\n",
" <td>4.7211</td>\n",
" <td>5</td>\n",
" <td>287</td>\n",
" <td>19.6</td>\n",
" <td>396.90</td>\n",
" <td>6.12</td>\n",
" <td>25.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>326</th>\n",
" <td>0.30347</td>\n",
" <td>0.0</td>\n",
" <td>7.38</td>\n",
" <td>0</td>\n",
" <td>0.493</td>\n",
" <td>6.312</td>\n",
" <td>28.9</td>\n",
" <td>5.4159</td>\n",
" <td>5</td>\n",
" <td>287</td>\n",
" <td>19.6</td>\n",
" <td>396.90</td>\n",
" <td>6.15</td>\n",
" <td>23.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" crim zn indus chas nox rm age dis rad tax \\\n",
"126 0.38735 0.0 25.65 0 0.581 5.613 95.6 1.7572 2 188 \n",
"128 0.32543 0.0 21.89 0 0.624 6.431 98.8 1.8125 4 437 \n",
"130 0.34006 0.0 21.89 0 0.624 6.458 98.9 2.1185 4 437 \n",
"133 0.32982 0.0 21.89 0 0.624 5.822 95.4 2.4699 4 437 \n",
"136 0.32264 0.0 21.89 0 0.624 5.942 93.5 1.9669 4 437 \n",
"137 0.35233 0.0 21.89 0 0.624 6.454 98.4 1.8498 4 437 \n",
"211 0.37578 0.0 10.59 1 0.489 5.404 88.6 3.6650 4 277 \n",
"220 0.35809 0.0 6.20 1 0.507 6.951 88.5 2.8617 8 307 \n",
"224 0.31533 0.0 6.20 0 0.504 8.266 78.3 2.8944 8 307 \n",
"226 0.38214 0.0 6.20 0 0.504 8.040 86.5 3.2157 8 307 \n",
"233 0.33147 0.0 6.20 0 0.507 8.247 70.4 3.6519 8 307 \n",
"235 0.33045 0.0 6.20 0 0.507 6.086 61.5 3.6519 8 307 \n",
"246 0.33983 22.0 5.86 0 0.431 6.108 34.9 8.0555 7 330 \n",
"253 0.36894 22.0 5.86 0 0.431 8.259 8.4 8.9067 7 330 \n",
"309 0.34940 0.0 9.90 0 0.544 5.972 76.7 3.1025 4 304 \n",
"314 0.36920 0.0 9.90 0 0.544 6.567 87.3 3.6023 4 304 \n",
"316 0.31827 0.0 9.90 0 0.544 5.914 83.2 3.9986 4 304 \n",
"322 0.35114 0.0 7.38 0 0.493 6.041 49.9 4.7211 5 287 \n",
"324 0.34109 0.0 7.38 0 0.493 6.415 40.1 4.7211 5 287 \n",
"326 0.30347 0.0 7.38 0 0.493 6.312 28.9 5.4159 5 287 \n",
"\n",
" ptratio b lstat medv \n",
"126 19.1 359.29 27.26 15.7 \n",
"128 21.2 396.90 15.39 18.0 \n",
"130 21.2 395.04 12.60 19.2 \n",
"133 21.2 388.69 15.03 18.4 \n",
"136 21.2 378.25 16.90 17.4 \n",
"137 21.2 394.08 14.59 17.1 \n",
"211 18.6 395.24 23.98 19.3 \n",
"220 17.4 391.70 9.71 26.7 \n",
"224 17.4 385.05 4.14 44.8 \n",
"226 17.4 387.38 3.13 37.6 \n",
"233 17.4 378.95 3.95 48.3 \n",
"235 17.4 376.75 10.88 24.0 \n",
"246 19.1 390.18 9.16 24.3 \n",
"253 19.1 396.90 3.54 42.8 \n",
"309 18.4 396.24 9.97 20.3 \n",
"314 18.4 395.69 9.28 23.8 \n",
"316 18.4 390.70 18.33 17.8 \n",
"322 19.6 396.90 7.70 20.4 \n",
"324 19.6 396.90 6.12 25.0 \n",
"326 19.6 396.90 6.15 23.0 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Conditional slicing, both conditions must apply\n",
"df.loc[(df[\"crim\"] > 0.3) & (df[\"crim\"] < 0.4)]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2dbf4316-0ade-4933-8898-a4708ac30d28",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>crim</th>\n",
" <th>zn</th>\n",
" <th>indus</th>\n",
" <th>chas</th>\n",
" <th>nox</th>\n",
" <th>rm</th>\n",
" <th>age</th>\n",
" <th>dis</th>\n",
" <th>rad</th>\n",
" <th>tax</th>\n",
" <th>ptratio</th>\n",
" <th>b</th>\n",
" <th>lstat</th>\n",
" <th>medv</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.00632</td>\n",
" <td>18.0</td>\n",
" <td>2.31</td>\n",
" <td>0</td>\n",
" <td>0.538</td>\n",
" <td>6.575</td>\n",
" <td>65.2</td>\n",
" <td>4.0900</td>\n",
" <td>1</td>\n",
" <td>296</td>\n",
" <td>15.3</td>\n",
" <td>396.90</td>\n",
" <td>4.98</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.08829</td>\n",
" <td>12.5</td>\n",
" <td>7.87</td>\n",
" <td>0</td>\n",
" <td>0.524</td>\n",
" <td>6.012</td>\n",
" <td>66.6</td>\n",
" <td>5.5605</td>\n",
" <td>5</td>\n",
" <td>311</td>\n",
" <td>15.2</td>\n",
" <td>395.60</td>\n",
" <td>12.43</td>\n",
" <td>22.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.14455</td>\n",
" <td>12.5</td>\n",
" <td>7.87</td>\n",
" <td>0</td>\n",
" <td>0.524</td>\n",
" <td>6.172</td>\n",
" <td>96.1</td>\n",
" <td>5.9505</td>\n",
" <td>5</td>\n",
" <td>311</td>\n",
" <td>15.2</td>\n",
" <td>396.90</td>\n",
" <td>19.15</td>\n",
" <td>27.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.21124</td>\n",
" <td>12.5</td>\n",
" <td>7.87</td>\n",
" <td>0</td>\n",
" <td>0.524</td>\n",
" <td>5.631</td>\n",
" <td>100.0</td>\n",
" <td>6.0821</td>\n",
" <td>5</td>\n",
" <td>311</td>\n",
" <td>15.2</td>\n",
" <td>386.63</td>\n",
" <td>29.93</td>\n",
" <td>16.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.17004</td>\n",
" <td>12.5</td>\n",
" <td>7.87</td>\n",
" <td>0</td>\n",
" <td>0.524</td>\n",
" <td>6.004</td>\n",
" <td>85.9</td>\n",
" <td>6.5921</td>\n",
" <td>5</td>\n",
" <td>311</td>\n",
" <td>15.2</td>\n",
" <td>386.71</td>\n",
" <td>17.10</td>\n",
" <td>18.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>501</th>\n",
" <td>0.06263</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.593</td>\n",
" <td>69.1</td>\n",
" <td>2.4786</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>391.99</td>\n",
" <td>9.67</td>\n",
" <td>22.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>502</th>\n",
" <td>0.04527</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.120</td>\n",
" <td>76.7</td>\n",
" <td>2.2875</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>396.90</td>\n",
" <td>9.08</td>\n",
" <td>20.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>503</th>\n",
" <td>0.06076</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.976</td>\n",
" <td>91.0</td>\n",
" <td>2.1675</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>396.90</td>\n",
" <td>5.64</td>\n",
" <td>23.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>504</th>\n",
" <td>0.10959</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.794</td>\n",
" <td>89.3</td>\n",
" <td>2.3889</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>393.45</td>\n",
" <td>6.48</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>505</th>\n",
" <td>0.04741</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.030</td>\n",
" <td>80.8</td>\n",
" <td>2.5050</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>396.90</td>\n",
" <td>7.88</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>140 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" crim zn indus chas nox rm age dis rad tax \\\n",
"0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 \n",
"6 0.08829 12.5 7.87 0 0.524 6.012 66.6 5.5605 5 311 \n",
"7 0.14455 12.5 7.87 0 0.524 6.172 96.1 5.9505 5 311 \n",
"8 0.21124 12.5 7.87 0 0.524 5.631 100.0 6.0821 5 311 \n",
"9 0.17004 12.5 7.87 0 0.524 6.004 85.9 6.5921 5 311 \n",
".. ... ... ... ... ... ... ... ... ... ... \n",
"501 0.06263 0.0 11.93 0 0.573 6.593 69.1 2.4786 1 273 \n",
"502 0.04527 0.0 11.93 0 0.573 6.120 76.7 2.2875 1 273 \n",
"503 0.06076 0.0 11.93 0 0.573 6.976 91.0 2.1675 1 273 \n",
"504 0.10959 0.0 11.93 0 0.573 6.794 89.3 2.3889 1 273 \n",
"505 0.04741 0.0 11.93 0 0.573 6.030 80.8 2.5050 1 273 \n",
"\n",
" ptratio b lstat medv \n",
"0 15.3 396.90 4.98 24.0 \n",
"6 15.2 395.60 12.43 22.9 \n",
"7 15.2 396.90 19.15 27.1 \n",
"8 15.2 386.63 29.93 16.5 \n",
"9 15.2 386.71 17.10 18.9 \n",
".. ... ... ... ... \n",
"501 21.0 391.99 9.67 22.4 \n",
"502 21.0 396.90 9.08 20.6 \n",
"503 21.0 396.90 5.64 23.9 \n",
"504 21.0 393.45 6.48 22.0 \n",
"505 21.0 396.90 7.88 11.9 \n",
"\n",
"[140 rows x 14 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Conditional slicing, either condition apply\n",
"df.loc[(df[\"rad\"] == 1) | (df[\"zn\"] != 0)]"
]
},
{
"cell_type": "markdown",
"id": "25f5747a-bce9-4242-953b-8c251f201670",
"metadata": {},
"source": [
"## Transform"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "1a3f996f-bef1-4f1d-b68e-c55638f748b6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>crim</th>\n",
" <th>zn</th>\n",
" <th>indus</th>\n",
" <th>chas</th>\n",
" <th>nox</th>\n",
" <th>rm</th>\n",
" <th>age</th>\n",
" <th>dis</th>\n",
" <th>rad</th>\n",
" <th>tax</th>\n",
" <th>ptratio</th>\n",
" <th>b</th>\n",
" <th>lstat</th>\n",
" <th>medv</th>\n",
" <th>double_tax</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.00632</td>\n",
" <td>18.0</td>\n",
" <td>2.31</td>\n",
" <td>0</td>\n",
" <td>0.538</td>\n",
" <td>6.575</td>\n",
" <td>65.2</td>\n",
" <td>4.0900</td>\n",
" <td>1</td>\n",
" <td>296</td>\n",
" <td>15.3</td>\n",
" <td>396.90</td>\n",
" <td>4.98</td>\n",
" <td>24.0</td>\n",
" <td>592</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.02731</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>6.421</td>\n",
" <td>78.9</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>396.90</td>\n",
" <td>9.14</td>\n",
" <td>21.6</td>\n",
" <td>484</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.02729</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>7.185</td>\n",
" <td>61.1</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>392.83</td>\n",
" <td>4.03</td>\n",
" <td>34.7</td>\n",
" <td>484</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.03237</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0</td>\n",
" <td>0.458</td>\n",
" <td>6.998</td>\n",
" <td>45.8</td>\n",
" <td>6.0622</td>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>18.7</td>\n",
" <td>394.63</td>\n",
" <td>2.94</td>\n",
" <td>33.4</td>\n",
" <td>444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.06905</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0</td>\n",
" <td>0.458</td>\n",
" <td>7.147</td>\n",
" <td>54.2</td>\n",
" <td>6.0622</td>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>18.7</td>\n",
" <td>396.90</td>\n",
" <td>5.33</td>\n",
" <td>36.2</td>\n",
" <td>444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>501</th>\n",
" <td>0.06263</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.593</td>\n",
" <td>69.1</td>\n",
" <td>2.4786</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>391.99</td>\n",
" <td>9.67</td>\n",
" <td>22.4</td>\n",
" <td>546</td>\n",
" </tr>\n",
" <tr>\n",
" <th>502</th>\n",
" <td>0.04527</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.120</td>\n",
" <td>76.7</td>\n",
" <td>2.2875</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>396.90</td>\n",
" <td>9.08</td>\n",
" <td>20.6</td>\n",
" <td>546</td>\n",
" </tr>\n",
" <tr>\n",
" <th>503</th>\n",
" <td>0.06076</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.976</td>\n",
" <td>91.0</td>\n",
" <td>2.1675</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>396.90</td>\n",
" <td>5.64</td>\n",
" <td>23.9</td>\n",
" <td>546</td>\n",
" </tr>\n",
" <tr>\n",
" <th>504</th>\n",
" <td>0.10959</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.794</td>\n",
" <td>89.3</td>\n",
" <td>2.3889</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>393.45</td>\n",
" <td>6.48</td>\n",
" <td>22.0</td>\n",
" <td>546</td>\n",
" </tr>\n",
" <tr>\n",
" <th>505</th>\n",
" <td>0.04741</td>\n",
" <td>0.0</td>\n",
" <td>11.93</td>\n",
" <td>0</td>\n",
" <td>0.573</td>\n",
" <td>6.030</td>\n",
" <td>80.8</td>\n",
" <td>2.5050</td>\n",
" <td>1</td>\n",
" <td>273</td>\n",
" <td>21.0</td>\n",
" <td>396.90</td>\n",
" <td>7.88</td>\n",
" <td>11.9</td>\n",
" <td>546</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>506 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" crim zn indus chas nox rm age dis rad tax \\\n",
"0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 \n",
"1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 \n",
"2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 \n",
"3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222 \n",
"4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222 \n",
".. ... ... ... ... ... ... ... ... ... ... \n",
"501 0.06263 0.0 11.93 0 0.573 6.593 69.1 2.4786 1 273 \n",
"502 0.04527 0.0 11.93 0 0.573 6.120 76.7 2.2875 1 273 \n",
"503 0.06076 0.0 11.93 0 0.573 6.976 91.0 2.1675 1 273 \n",
"504 0.10959 0.0 11.93 0 0.573 6.794 89.3 2.3889 1 273 \n",
"505 0.04741 0.0 11.93 0 0.573 6.030 80.8 2.5050 1 273 \n",
"\n",
" ptratio b lstat medv double_tax \n",
"0 15.3 396.90 4.98 24.0 592 \n",
"1 17.8 396.90 9.14 21.6 484 \n",
"2 17.8 392.83 4.03 34.7 484 \n",
"3 18.7 394.63 2.94 33.4 444 \n",
"4 18.7 396.90 5.33 36.2 444 \n",
".. ... ... ... ... ... \n",
"501 21.0 391.99 9.67 22.4 546 \n",
"502 21.0 396.90 9.08 20.6 546 \n",
"503 21.0 396.90 5.64 23.9 546 \n",
"504 21.0 393.45 6.48 22.0 546 \n",
"505 21.0 396.90 7.88 11.9 546 \n",
"\n",
"[506 rows x 15 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create column from another column multiplied by 2\n",
"df[\"double_tax\"] = df[\"tax\"] * 2\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "86515934-a439-4d2b-b924-5e7725de25a5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>crim</th>\n",
" <th>zn</th>\n",
" <th>indus</th>\n",
" <th>chas</th>\n",
" <th>nox</th>\n",
" <th>rm</th>\n",
" <th>age</th>\n",
" <th>dis</th>\n",
" <th>tax</th>\n",
" <th>ptratio</th>\n",
" <th>b</th>\n",
" <th>lstat</th>\n",
" <th>medv</th>\n",
" <th>double_tax</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rad</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.72057</td>\n",
" <td>798.0</td>\n",
" <td>101.32</td>\n",
" <td>1</td>\n",
" <td>9.2578</td>\n",
" <td>131.117</td>\n",
" <td>900.5</td>\n",
" <td>120.5565</td>\n",
" <td>5829</td>\n",
" <td>351.3</td>\n",
" <td>7785.46</td>\n",
" <td>147.40</td>\n",
" <td>487.3</td>\n",
" <td>11658</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.99885</td>\n",
" <td>490.0</td>\n",
" <td>231.14</td>\n",
" <td>0</td>\n",
" <td>11.6380</td>\n",
" <td>159.599</td>\n",
" <td>1554.5</td>\n",
" <td>98.3282</td>\n",
" <td>6256</td>\n",
" <td>415.0</td>\n",
" <td>9273.93</td>\n",
" <td>240.59</td>\n",
" <td>644.0</td>\n",
" <td>12512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3.69966</td>\n",
" <td>622.5</td>\n",
" <td>167.92</td>\n",
" <td>2</td>\n",
" <td>17.1921</td>\n",
" <td>245.975</td>\n",
" <td>1873.8</td>\n",
" <td>195.5667</td>\n",
" <td>9351</td>\n",
" <td>690.4</td>\n",
" <td>14911.88</td>\n",
" <td>344.89</td>\n",
" <td>1061.3</td>\n",
" <td>18702</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>43.32938</td>\n",
" <td>1620.5</td>\n",
" <td>1182.13</td>\n",
" <td>8</td>\n",
" <td>55.4742</td>\n",
" <td>674.643</td>\n",
" <td>6692.6</td>\n",
" <td>487.6300</td>\n",
" <td>36958</td>\n",
" <td>2105.0</td>\n",
" <td>42099.34</td>\n",
" <td>1341.90</td>\n",
" <td>2352.6</td>\n",
" <td>73916</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>79.09604</td>\n",
" <td>1277.5</td>\n",
" <td>1122.38</td>\n",
" <td>11</td>\n",
" <td>65.6516</td>\n",
" <td>735.213</td>\n",
" <td>7960.7</td>\n",
" <td>425.1928</td>\n",
" <td>38162</td>\n",
" <td>1902.9</td>\n",
" <td>42457.13</td>\n",
" <td>1225.58</td>\n",
" <td>2956.3</td>\n",
" <td>76324</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>3.90140</td>\n",
" <td>337.5</td>\n",
" <td>213.15</td>\n",
" <td>0</td>\n",
" <td>13.3860</td>\n",
" <td>158.723</td>\n",
" <td>1563.7</td>\n",
" <td>104.6478</td>\n",
" <td>9695</td>\n",
" <td>463.2</td>\n",
" <td>10071.53</td>\n",
" <td>319.95</td>\n",
" <td>545.4</td>\n",
" <td>19390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2.55679</td>\n",
" <td>454.0</td>\n",
" <td>85.59</td>\n",
" <td>0</td>\n",
" <td>7.4970</td>\n",
" <td>113.012</td>\n",
" <td>682.4</td>\n",
" <td>110.4296</td>\n",
" <td>5175</td>\n",
" <td>312.9</td>\n",
" <td>6603.36</td>\n",
" <td>135.80</td>\n",
" <td>460.8</td>\n",
" <td>10350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8.91383</td>\n",
" <td>150.0</td>\n",
" <td>142.38</td>\n",
" <td>5</td>\n",
" <td>11.8200</td>\n",
" <td>166.819</td>\n",
" <td>1616.4</td>\n",
" <td>105.8545</td>\n",
" <td>7230</td>\n",
" <td>431.4</td>\n",
" <td>9246.66</td>\n",
" <td>191.06</td>\n",
" <td>728.6</td>\n",
" <td>14460</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>1684.22640</td>\n",
" <td>0.0</td>\n",
" <td>2389.20</td>\n",
" <td>8</td>\n",
" <td>88.7590</td>\n",
" <td>794.924</td>\n",
" <td>11854.3</td>\n",
" <td>272.0855</td>\n",
" <td>87912</td>\n",
" <td>2666.4</td>\n",
" <td>38027.77</td>\n",
" <td>2455.28</td>\n",
" <td>2165.3</td>\n",
" <td>175824</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" crim zn indus chas nox rm age dis \\\n",
"rad \n",
"1 0.72057 798.0 101.32 1 9.2578 131.117 900.5 120.5565 \n",
"2 1.99885 490.0 231.14 0 11.6380 159.599 1554.5 98.3282 \n",
"3 3.69966 622.5 167.92 2 17.1921 245.975 1873.8 195.5667 \n",
"4 43.32938 1620.5 1182.13 8 55.4742 674.643 6692.6 487.6300 \n",
"5 79.09604 1277.5 1122.38 11 65.6516 735.213 7960.7 425.1928 \n",
"6 3.90140 337.5 213.15 0 13.3860 158.723 1563.7 104.6478 \n",
"7 2.55679 454.0 85.59 0 7.4970 113.012 682.4 110.4296 \n",
"8 8.91383 150.0 142.38 5 11.8200 166.819 1616.4 105.8545 \n",
"24 1684.22640 0.0 2389.20 8 88.7590 794.924 11854.3 272.0855 \n",
"\n",
" tax ptratio b lstat medv double_tax \n",
"rad \n",
"1 5829 351.3 7785.46 147.40 487.3 11658 \n",
"2 6256 415.0 9273.93 240.59 644.0 12512 \n",
"3 9351 690.4 14911.88 344.89 1061.3 18702 \n",
"4 36958 2105.0 42099.34 1341.90 2352.6 73916 \n",
"5 38162 1902.9 42457.13 1225.58 2956.3 76324 \n",
"6 9695 463.2 10071.53 319.95 545.4 19390 \n",
"7 5175 312.9 6603.36 135.80 460.8 10350 \n",
"8 7230 431.4 9246.66 191.06 728.6 14460 \n",
"24 87912 2666.4 38027.77 2455.28 2165.3 175824 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Aggregate\n",
"df.groupby(\"rad\").sum()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}