12  Pandas Functional Programming

import pandas as pd

12.1 Iterrows

import pandas as pd

# Create sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
})

# Iterate through each row
for index, row in df.iterrows():
    # 'index' is the row index
    # 'row' is a Series object containing the row data
    print(f"Row index: {index}")
    print(f"Column A value: {row['A']}")
    print(f"Column B value: {row['B']}")
    print("---")
Row index: 0
Column A value: 1
Column B value: 4
---
Row index: 1
Column A value: 2
Column B value: 5
---
Row index: 2
Column A value: 3
Column B value: 6
---

12.2 Map-Like

12.2.1 Map1 (one col)

Applying a function to each element in a column/series

# Create a pandas Series
s = pd.Series([1, 2, 3, 4])

# Apply a function to each element (similar to purrr::map)
s_squared = s.map(lambda x: f"{x}^2 = {x**2}")

print(s_squared)
0     1^2 = 1
1     2^2 = 4
2     3^2 = 9
3    4^2 = 16
dtype: object
df = pd.DataFrame({
    'col1': [1, 2, 3, 4],
    'col2': [5, 6, 7, 8]
})

# Apply a function to a column (equivalent to map in R's purrr)
df['col1_squared'] = df['col1'].map(lambda x: x**2)
df
col1 col2 col1_squared
0 1 5 1
1 2 6 4
2 3 7 9
3 4 8 16

12.2.2 Map2 (two cols)

# Apply a function that takes multiple columns as input
df['sum_col1_col2'] = df.apply(lambda row: row['col1'] + row['col2'], axis=1)
df
col1 col2 col1_squared sum_col1_col2
0 1 5 1 6
1 2 6 4 8
2 3 7 9 10
3 4 8 16 12
df['col1_col2_diff'] = list(map(lambda x, y: x - y, df['col1'], df['col2']))
df
col1 col2 col1_squared sum_col1_col2 col1_col2_diff
0 1 5 1 6 -4
1 2 6 4 8 -4
2 3 7 9 10 -4
3 4 8 16 12 -4

12.2.3 Map > 2

# Create a DataFrame with three columns
df2 = pd.DataFrame({
    'col1': [1, 2, 3, 4],
    'col2': [5, 6, 7, 8],
    'col3': [9, 10, 11, 12]
})

# Apply a function to rows (similar to purrr::pmap)
df2['sum_all'] = df2.apply(lambda row: row['col1'] + row['col2'] + row['col3'], axis=1)
df2
col1 col2 col3 sum_all
0 1 5 9 15
1 2 6 10 18
2 3 7 11 21
3 4 8 12 24

12.2.4 Map All

# Apply a function to each element in the DataFrame (similar to map_df)
df_squared = df.applymap(lambda x: x**2)
df_squared
/var/folders/70/7wmmf6t55cb84bfx9g1c1k1m0000gn/T/ipykernel_75817/80867529.py:2: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.
  df_squared = df.applymap(lambda x: x**2)
col1 col2 col1_squared sum_col1_col2 col1_col2_diff
0 1 25 1 36 16
1 4 36 16 64 16
2 9 49 81 100 16
3 16 64 256 144 16

12.3 Pipe

# Define functions for chaining
def multiply_by_two(df):
    df['col1'] = df['col1'] * 2
    return df

def subtract_five(df):
    df['col2'] = df['col2'] - 5
    return df

# Chain the operations
df_transformed = df.pipe(multiply_by_two).pipe(subtract_five)
df_transformed
col1 col2 col1_squared sum_col1_col2 col1_col2_diff
0 4 -5 1 6 -4
1 8 -4 4 8 -4
2 12 -3 9 10 -4
3 16 -2 16 12 -4

12.4 .where & .mask

s = pd.Series(range(5))
s
0    0
1    1
2    2
3    3
4    4
dtype: int64
s.where(s > 2)
0    NaN
1    NaN
2    NaN
3    3.0
4    4.0
dtype: float64
s.where(s > 2, other="x")
0    x
1    x
2    x
3    3
4    4
dtype: object
# Inverse
s.where(~(s > 2), other="x")
0    0
1    1
2    2
3    x
4    x
dtype: object
# Or
s.mask(s > 2, other="x")
0    0
1    1
2    2
3    x
4    x
dtype: object