3 Pandas Series

import pandas as pd
from pandas import Series, DataFrame

3.1 Series from Scratch

series = {
 'index':[0, 1, 2, 3],
 'data':[145, 142, 38, 13],
 'name':'songs'
}

def get(series, idx):
   value_idx = series['index'].index(idx)
   return series['data'][value_idx]

get(series, 0)

series['data'].index(142)

3.2 Basic

3.2.1 Series Creation

import pandas as pd
songs2 = pd.Series([145, 142, 38, 13],  name='counts')
songs2

0    145
1    142
2     38
3     13
Name: counts, dtype: int64

print(songs2.name)
print(songs2.index)

counts
RangeIndex(start=0, stop=4, step=1)

obj = pd.Series([4, 7, -5, 3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

# Array Representation
obj.array
type(obj.array)

# Index
obj.index
type(obj.index)

pandas.core.indexes.range.RangeIndex

3.2.2 Series with index

obj2 = pd.Series([4, 7, -5, 3], index=["d", "b", "a", "c"])
obj2

d    4
b    7
a   -5
c    3
dtype: int64


# Index
obj2.index

From Dict

sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

Back to Dict, List, etc.

obj3.to_dict()
obj3.to_list()

[35000, 71000, 16000, 5000]

3.2.3 Subset Series

# Position
obj2[0] 

# Index
obj2["d"]

# list of indices
obj2[["d", "a"]]

# Logical
obj2[obj2 > 0]

# Assign
obj2["c"] = 10
obj2

/var/folders/70/7wmmf6t55cb84bfx9g1c1k1m0000gn/T/ipykernel_2646/748481342.py:2: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  obj2[0]

d     4
b     7
a    -5
c    10
dtype: int64

3.2.4 Vectorized Operation

obj2 * 2

d     8
b    14
a   -10
c    20
dtype: int64

# Series is as a fixed-length, ordered dictionary
"b" in obj2

True

3.2.5 Missing Value

import numpy as np

nan_series2 = pd.Series([1, 2, 2, np.nan], index=['Ono', 'Clapton', 'Clapton2', "gte"])
# Check NA
nan_series2.isna()

Ono         False
Clapton     False
Clapton2    False
gte          True
dtype: bool

# Count number of values (exclude NA)
nan_series2.count()

np.int64(3)

# Count number of entries (include NA)  
nan_series2.size

Replace missing values

nan_series2.fillna(0)

Ono         1.0
Clapton     2.0
Clapton2    2.0
gte         0.0
dtype: float64

3.2.6 Auto-align arithmatic

states = ["California", "Ohio", "Oregon", "Texas"]
obj4 = pd.Series(sdata, index=states)
obj4

# Data alignment features
obj3 + obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

3.2.7 `name` Attribute

obj4.name = "population"
obj4.index.name = "state"
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

3.3 Series to DF

s1 = pd.Series({"A": 1, "B": 2})
s2 = pd.Series({"A": 3, "B": 4})

df = pd.DataFrame([s1, s2])
df

	A	B
0	1	2
1	3	4