from siuba import _, group_by, summarize
from siuba.data import mtcars20 Siub - Intro
(mtcars
>> group_by(_.cyl)
>> summarize(avg_hp = _.hp.mean())
)| cyl | avg_hp | |
|---|---|---|
| 0 | 4 | 82.636364 |
| 1 | 6 | 122.285714 |
| 2 | 8 | 209.214286 |
20.1 Filter Rows
from siuba import _, filter, group_by
from siuba.data import mtcars
mtcars >> filter(_.cyl == 4, _.gear == 5)| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 26 | 26.0 | 4 | 120.3 | 91 | 4.43 | 2.140 | 16.7 | 0 | 1 | 5 | 2 |
| 27 | 30.4 | 4 | 95.1 | 113 | 3.77 | 1.513 | 16.9 | 1 | 1 | 5 | 2 |
mtcars >> filter((_.cyl == 4) | (_.gear == 5))| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
| 7 | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
| 8 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 |
| 17 | 32.4 | 4 | 78.7 | 66 | 4.08 | 2.200 | 19.47 | 1 | 1 | 4 | 1 |
| 18 | 30.4 | 4 | 75.7 | 52 | 4.93 | 1.615 | 18.52 | 1 | 1 | 4 | 2 |
| 19 | 33.9 | 4 | 71.1 | 65 | 4.22 | 1.835 | 19.90 | 1 | 1 | 4 | 1 |
| 20 | 21.5 | 4 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 |
| 25 | 27.3 | 4 | 79.0 | 66 | 4.08 | 1.935 | 18.90 | 1 | 1 | 4 | 1 |
| 26 | 26.0 | 4 | 120.3 | 91 | 4.43 | 2.140 | 16.70 | 0 | 1 | 5 | 2 |
| 27 | 30.4 | 4 | 95.1 | 113 | 3.77 | 1.513 | 16.90 | 1 | 1 | 5 | 2 |
| 28 | 15.8 | 8 | 351.0 | 264 | 4.22 | 3.170 | 14.50 | 0 | 1 | 5 | 4 |
| 29 | 19.7 | 6 | 145.0 | 175 | 3.62 | 2.770 | 15.50 | 0 | 1 | 5 | 6 |
| 30 | 15.0 | 8 | 301.0 | 335 | 3.54 | 3.570 | 14.60 | 0 | 1 | 5 | 8 |
| 31 | 21.4 | 4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 |
20.2 Select
import pandas as pd
pd.set_option("display.max_rows", 5)
from siuba import _, select
from siuba.data import penguins
penguins| species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex | year | |
|---|---|---|---|---|---|---|---|---|
| 0 | Adelie | Torgersen | 39.1 | 18.7 | 181.0 | 3750.0 | male | 2007 |
| 1 | Adelie | Torgersen | 39.5 | 17.4 | 186.0 | 3800.0 | female | 2007 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 342 | Chinstrap | Dream | 50.8 | 19.0 | 210.0 | 4100.0 | male | 2009 |
| 343 | Chinstrap | Dream | 50.2 | 18.7 | 198.0 | 3775.0 | female | 2009 |
344 rows × 8 columns
# select a column to keep is to refer to it by name or position.
select(penguins, _.species, _.island, 6, -1)| species | island | sex | year | |
|---|---|---|---|---|
| 0 | Adelie | Torgersen | male | 2007 |
| 1 | Adelie | Torgersen | female | 2007 |
| ... | ... | ... | ... | ... |
| 342 | Chinstrap | Dream | male | 2009 |
| 343 | Chinstrap | Dream | female | 2009 |
344 rows × 4 columns