from pyprojroot.here import here
import pandas as pd10 Pandas by CMS (StackOF)
Pandas Tutorial by Corey Schafer
10.1 Read
df = pd.read_csv(here("data/stackoverflow2023/survey_results_public.csv"))10.2 Explore
df.head(3) # df.tail(3)| ResponseId | Q120 | MainBranch | Age | Employment | RemoteWork | CodingActivities | EdLevel | LearnCode | LearnCodeOnline | ... | Frequency_1 | Frequency_2 | Frequency_3 | TimeSearching | TimeAnswering | ProfessionalTech | Industry | SurveyLength | SurveyEase | ConvertedCompYearly | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | I agree | None of these | 18-24 years old | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 2 | I agree | I am a developer by profession | 25-34 years old | Employed, full-time | Remote | Hobby;Contribute to open-source projects;Boots... | Bachelor’s degree (B.A., B.S., B.Eng., etc.) | Books / Physical media;Colleague;Friend or fam... | Formal documentation provided by the owner of ... | ... | 1-2 times a week | 10+ times a week | Never | 15-30 minutes a day | 15-30 minutes a day | DevOps function;Microservices;Automated testin... | Information Services, IT, Software Development... | Appropriate in length | Easy | 285000.0 |
| 2 | 3 | I agree | I am a developer by profession | 45-54 years old | Employed, full-time | Hybrid (some remote, some in-person) | Hobby;Professional development or self-paced l... | Bachelor’s degree (B.A., B.S., B.Eng., etc.) | Books / Physical media;Colleague;On the job tr... | Formal documentation provided by the owner of ... | ... | 6-10 times a week | 6-10 times a week | 3-5 times a week | 30-60 minutes a day | 30-60 minutes a day | DevOps function;Microservices;Automated testin... | Information Services, IT, Software Development... | Appropriate in length | Easy | 250000.0 |
3 rows × 84 columns
df.shape(89184, 84)
df.columnsIndex(['ResponseId', 'Q120', 'MainBranch', 'Age', 'Employment', 'RemoteWork',
'CodingActivities', 'EdLevel', 'LearnCode', 'LearnCodeOnline',
'LearnCodeCoursesCert', 'YearsCode', 'YearsCodePro', 'DevType',
'OrgSize', 'PurchaseInfluence', 'TechList', 'BuyNewTool', 'Country',
'Currency', 'CompTotal', 'LanguageHaveWorkedWith',
'LanguageWantToWorkWith', 'DatabaseHaveWorkedWith',
'DatabaseWantToWorkWith', 'PlatformHaveWorkedWith',
'PlatformWantToWorkWith', 'WebframeHaveWorkedWith',
'WebframeWantToWorkWith', 'MiscTechHaveWorkedWith',
'MiscTechWantToWorkWith', 'ToolsTechHaveWorkedWith',
'ToolsTechWantToWorkWith', 'NEWCollabToolsHaveWorkedWith',
'NEWCollabToolsWantToWorkWith', 'OpSysPersonal use',
'OpSysProfessional use', 'OfficeStackAsyncHaveWorkedWith',
'OfficeStackAsyncWantToWorkWith', 'OfficeStackSyncHaveWorkedWith',
'OfficeStackSyncWantToWorkWith', 'AISearchHaveWorkedWith',
'AISearchWantToWorkWith', 'AIDevHaveWorkedWith', 'AIDevWantToWorkWith',
'NEWSOSites', 'SOVisitFreq', 'SOAccount', 'SOPartFreq', 'SOComm',
'SOAI', 'AISelect', 'AISent', 'AIAcc', 'AIBen',
'AIToolInterested in Using', 'AIToolCurrently Using',
'AIToolNot interested in Using', 'AINextVery different',
'AINextNeither different nor similar', 'AINextSomewhat similar',
'AINextVery similar', 'AINextSomewhat different', 'TBranch', 'ICorPM',
'WorkExp', 'Knowledge_1', 'Knowledge_2', 'Knowledge_3', 'Knowledge_4',
'Knowledge_5', 'Knowledge_6', 'Knowledge_7', 'Knowledge_8',
'Frequency_1', 'Frequency_2', 'Frequency_3', 'TimeSearching',
'TimeAnswering', 'ProfessionalTech', 'Industry', 'SurveyLength',
'SurveyEase', 'ConvertedCompYearly'],
dtype='object')
10.3 Subset
df.loc[0:2, 'ResponseId':'Age']| ResponseId | Q120 | MainBranch | Age | |
|---|---|---|---|---|
| 0 | 1 | I agree | None of these | 18-24 years old |
| 1 | 2 | I agree | I am a developer by profession | 25-34 years old |
| 2 | 3 | I agree | I am a developer by profession | 45-54 years old |
10.4 Filter
10.4.1 Filter set
df["Country"].isin(["Brazil", "Romania"])0 False
1 False
2 False
3 False
4 False
...
89179 True
89180 True
89181 False
89182 False
89183 False
Name: Country, Length: 89184, dtype: bool
10.4.2 Filter using String Methods
filt_lang = df["LanguageHaveWorkedWith"].str.contains("Python", na=False)
df.loc[filt_lang, "LanguageHaveWorkedWith"]1 HTML/CSS;JavaScript;Python
7 Go;HTML/CSS;JavaScript;Python;Rust;SQL;TypeScript
9 HTML/CSS;Java;JavaScript;Python;SQL;TypeScript
10 C#;C++;HTML/CSS;JavaScript;Python
11 C#;HTML/CSS;JavaScript;Kotlin;PowerShell;Pytho...
...
89172 Bash/Shell (all shells);HTML/CSS;JavaScript;Pe...
89176 HTML/CSS;Java;JavaScript;PHP;Python;SQL;TypeSc...
89180 Dart;Java;Python;SQL
89181 Assembly;Bash/Shell (all shells);C;C#;Python;R...
89182 Bash/Shell (all shells);C#;HTML/CSS;Java;JavaS...
Name: LanguageHaveWorkedWith, Length: 43158, dtype: object