import pandas as pd
from pandas import Series, DataFrame3 Pandas Series
3.1 Series from Scratch
series = {
'index':[0, 1, 2, 3],
'data':[145, 142, 38, 13],
'name':'songs'
}def get(series, idx):
value_idx = series['index'].index(idx)
return series['data'][value_idx]get(series, 0)145
series['data'].index(142)1
3.2 Basic
3.2.1 Series Creation
import pandas as pd
songs2 = pd.Series([145, 142, 38, 13], name='counts')
songs20 145
1 142
2 38
3 13
Name: counts, dtype: int64
print(songs2.name)
print(songs2.index)counts
RangeIndex(start=0, stop=4, step=1)
obj = pd.Series([4, 7, -5, 3])
obj0 4
1 7
2 -5
3 3
dtype: int64
# Array Representation
obj.array
type(obj.array)
# Index
obj.index
type(obj.index)pandas.core.indexes.range.RangeIndex
3.2.2 Series with index
obj2 = pd.Series([4, 7, -5, 3], index=["d", "b", "a", "c"])
obj2d 4
b 7
a -5
c 3
dtype: int64
# Index
obj2.indexFrom Dict
sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}
obj3 = pd.Series(sdata)
obj3Ohio 35000
Texas 71000
Oregon 16000
Utah 5000
dtype: int64
Back to Dict, List, etc.
obj3.to_dict()
obj3.to_list()[35000, 71000, 16000, 5000]
3.2.3 Subset Series
# Position
obj2[0]
# Index
obj2["d"]
# list of indices
obj2[["d", "a"]]
# Logical
obj2[obj2 > 0]
# Assign
obj2["c"] = 10
obj2/var/folders/70/7wmmf6t55cb84bfx9g1c1k1m0000gn/T/ipykernel_2646/748481342.py:2: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
obj2[0]
d 4
b 7
a -5
c 10
dtype: int64
3.2.4 Vectorized Operation
obj2 * 2d 8
b 14
a -10
c 20
dtype: int64
# Series is as a fixed-length, ordered dictionary
"b" in obj2True
3.2.5 Missing Value
import numpy as np
nan_series2 = pd.Series([1, 2, 2, np.nan], index=['Ono', 'Clapton', 'Clapton2', "gte"])
# Check NA
nan_series2.isna()Ono False
Clapton False
Clapton2 False
gte True
dtype: bool
# Count number of values (exclude NA)
nan_series2.count()np.int64(3)
# Count number of entries (include NA)
nan_series2.size4
Replace missing values
nan_series2.fillna(0)Ono 1.0
Clapton 2.0
Clapton2 2.0
gte 0.0
dtype: float64
3.2.6 Auto-align arithmatic
states = ["California", "Ohio", "Oregon", "Texas"]
obj4 = pd.Series(sdata, index=states)
obj4# Data alignment features
obj3 + obj4California NaN
Ohio 70000.0
Oregon 32000.0
Texas 142000.0
Utah NaN
dtype: float64
3.2.7 name Attribute
obj4.name = "population"
obj4.index.name = "state"
obj4state
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
Name: population, dtype: float64
3.3 Series to DF
s1 = pd.Series({"A": 1, "B": 2})
s2 = pd.Series({"A": 3, "B": 4})
df = pd.DataFrame([s1, s2])
df| A | B | |
|---|---|---|
| 0 | 1 | 2 |
| 1 | 3 | 4 |