import pandas as pd12 Pandas Functional Programming
12.1 Iterrows
import pandas as pd
# Create sample DataFrame
df = pd.DataFrame({
'A': [1, 2, 3],
'B': [4, 5, 6],
'C': [7, 8, 9]
})
# Iterate through each row
for index, row in df.iterrows():
# 'index' is the row index
# 'row' is a Series object containing the row data
print(f"Row index: {index}")
print(f"Column A value: {row['A']}")
print(f"Column B value: {row['B']}")
print("---")Row index: 0
Column A value: 1
Column B value: 4
---
Row index: 1
Column A value: 2
Column B value: 5
---
Row index: 2
Column A value: 3
Column B value: 6
---
12.2 Map-Like
12.2.1 Map1 (one col)
Applying a function to each element in a column/series
# Create a pandas Series
s = pd.Series([1, 2, 3, 4])
# Apply a function to each element (similar to purrr::map)
s_squared = s.map(lambda x: f"{x}^2 = {x**2}")
print(s_squared)0 1^2 = 1
1 2^2 = 4
2 3^2 = 9
3 4^2 = 16
dtype: object
df = pd.DataFrame({
'col1': [1, 2, 3, 4],
'col2': [5, 6, 7, 8]
})
# Apply a function to a column (equivalent to map in R's purrr)
df['col1_squared'] = df['col1'].map(lambda x: x**2)
df| col1 | col2 | col1_squared | |
|---|---|---|---|
| 0 | 1 | 5 | 1 |
| 1 | 2 | 6 | 4 |
| 2 | 3 | 7 | 9 |
| 3 | 4 | 8 | 16 |
12.2.2 Map2 (two cols)
# Apply a function that takes multiple columns as input
df['sum_col1_col2'] = df.apply(lambda row: row['col1'] + row['col2'], axis=1)
df| col1 | col2 | col1_squared | sum_col1_col2 | |
|---|---|---|---|---|
| 0 | 1 | 5 | 1 | 6 |
| 1 | 2 | 6 | 4 | 8 |
| 2 | 3 | 7 | 9 | 10 |
| 3 | 4 | 8 | 16 | 12 |
df['col1_col2_diff'] = list(map(lambda x, y: x - y, df['col1'], df['col2']))
df| col1 | col2 | col1_squared | sum_col1_col2 | col1_col2_diff | |
|---|---|---|---|---|---|
| 0 | 1 | 5 | 1 | 6 | -4 |
| 1 | 2 | 6 | 4 | 8 | -4 |
| 2 | 3 | 7 | 9 | 10 | -4 |
| 3 | 4 | 8 | 16 | 12 | -4 |
12.2.3 Map > 2
# Create a DataFrame with three columns
df2 = pd.DataFrame({
'col1': [1, 2, 3, 4],
'col2': [5, 6, 7, 8],
'col3': [9, 10, 11, 12]
})
# Apply a function to rows (similar to purrr::pmap)
df2['sum_all'] = df2.apply(lambda row: row['col1'] + row['col2'] + row['col3'], axis=1)
df2| col1 | col2 | col3 | sum_all | |
|---|---|---|---|---|
| 0 | 1 | 5 | 9 | 15 |
| 1 | 2 | 6 | 10 | 18 |
| 2 | 3 | 7 | 11 | 21 |
| 3 | 4 | 8 | 12 | 24 |
12.2.4 Map All
# Apply a function to each element in the DataFrame (similar to map_df)
df_squared = df.applymap(lambda x: x**2)
df_squared/var/folders/70/7wmmf6t55cb84bfx9g1c1k1m0000gn/T/ipykernel_75817/80867529.py:2: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.
df_squared = df.applymap(lambda x: x**2)
| col1 | col2 | col1_squared | sum_col1_col2 | col1_col2_diff | |
|---|---|---|---|---|---|
| 0 | 1 | 25 | 1 | 36 | 16 |
| 1 | 4 | 36 | 16 | 64 | 16 |
| 2 | 9 | 49 | 81 | 100 | 16 |
| 3 | 16 | 64 | 256 | 144 | 16 |
12.3 Pipe
# Define functions for chaining
def multiply_by_two(df):
df['col1'] = df['col1'] * 2
return df
def subtract_five(df):
df['col2'] = df['col2'] - 5
return df
# Chain the operations
df_transformed = df.pipe(multiply_by_two).pipe(subtract_five)
df_transformed| col1 | col2 | col1_squared | sum_col1_col2 | col1_col2_diff | |
|---|---|---|---|---|---|
| 0 | 4 | -5 | 1 | 6 | -4 |
| 1 | 8 | -4 | 4 | 8 | -4 |
| 2 | 12 | -3 | 9 | 10 | -4 |
| 3 | 16 | -2 | 16 | 12 | -4 |
12.4 .where & .mask
s = pd.Series(range(5))
s0 0
1 1
2 2
3 3
4 4
dtype: int64
s.where(s > 2)0 NaN
1 NaN
2 NaN
3 3.0
4 4.0
dtype: float64
s.where(s > 2, other="x")0 x
1 x
2 x
3 3
4 4
dtype: object
# Inverse
s.where(~(s > 2), other="x")0 0
1 1
2 2
3 x
4 x
dtype: object
# Or
s.mask(s > 2, other="x")0 0
1 1
2 2
3 x
4 x
dtype: object