20  Siub - Intro

from siuba import _, group_by, summarize
from siuba.data import mtcars
(mtcars
  >> group_by(_.cyl)
  >> summarize(avg_hp = _.hp.mean())
  )
cyl avg_hp
0 4 82.636364
1 6 122.285714
2 8 209.214286

20.1 Filter Rows

from siuba import _, filter, group_by
from siuba.data import mtcars

mtcars >> filter(_.cyl == 4, _.gear == 5)
mpg cyl disp hp drat wt qsec vs am gear carb
26 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
27 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
mtcars >> filter((_.cyl == 4) | (_.gear == 5))
mpg cyl disp hp drat wt qsec vs am gear carb
2 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
7 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
8 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
17 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
18 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
19 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
20 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
25 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
26 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
27 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
28 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
29 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
30 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
31 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2

20.2 Select

import pandas as pd

pd.set_option("display.max_rows", 5)

from siuba import _, select
from siuba.data import penguins

penguins
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 male 2007
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 female 2007
... ... ... ... ... ... ... ... ...
342 Chinstrap Dream 50.8 19.0 210.0 4100.0 male 2009
343 Chinstrap Dream 50.2 18.7 198.0 3775.0 female 2009

344 rows × 8 columns

#  select a column to keep is to refer to it by name or position.
select(penguins, _.species, _.island, 6, -1)
species island sex year
0 Adelie Torgersen male 2007
1 Adelie Torgersen female 2007
... ... ... ... ...
342 Chinstrap Dream male 2009
343 Chinstrap Dream female 2009

344 rows × 4 columns