from pyprojroot.here import here
import pandas as pd
10 Pandas by CMS (StackOF)
Pandas Tutorial by Corey Schafer
10.1 Read
= pd.read_csv(here("data/stackoverflow2023/survey_results_public.csv")) df
10.2 Explore
3) # df.tail(3) df.head(
ResponseId | Q120 | MainBranch | Age | Employment | RemoteWork | CodingActivities | EdLevel | LearnCode | LearnCodeOnline | ... | Frequency_1 | Frequency_2 | Frequency_3 | TimeSearching | TimeAnswering | ProfessionalTech | Industry | SurveyLength | SurveyEase | ConvertedCompYearly | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | I agree | None of these | 18-24 years old | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 2 | I agree | I am a developer by profession | 25-34 years old | Employed, full-time | Remote | Hobby;Contribute to open-source projects;Boots... | Bachelor’s degree (B.A., B.S., B.Eng., etc.) | Books / Physical media;Colleague;Friend or fam... | Formal documentation provided by the owner of ... | ... | 1-2 times a week | 10+ times a week | Never | 15-30 minutes a day | 15-30 minutes a day | DevOps function;Microservices;Automated testin... | Information Services, IT, Software Development... | Appropriate in length | Easy | 285000.0 |
2 | 3 | I agree | I am a developer by profession | 45-54 years old | Employed, full-time | Hybrid (some remote, some in-person) | Hobby;Professional development or self-paced l... | Bachelor’s degree (B.A., B.S., B.Eng., etc.) | Books / Physical media;Colleague;On the job tr... | Formal documentation provided by the owner of ... | ... | 6-10 times a week | 6-10 times a week | 3-5 times a week | 30-60 minutes a day | 30-60 minutes a day | DevOps function;Microservices;Automated testin... | Information Services, IT, Software Development... | Appropriate in length | Easy | 250000.0 |
3 rows × 84 columns
df.shape
(89184, 84)
df.columns
Index(['ResponseId', 'Q120', 'MainBranch', 'Age', 'Employment', 'RemoteWork',
'CodingActivities', 'EdLevel', 'LearnCode', 'LearnCodeOnline',
'LearnCodeCoursesCert', 'YearsCode', 'YearsCodePro', 'DevType',
'OrgSize', 'PurchaseInfluence', 'TechList', 'BuyNewTool', 'Country',
'Currency', 'CompTotal', 'LanguageHaveWorkedWith',
'LanguageWantToWorkWith', 'DatabaseHaveWorkedWith',
'DatabaseWantToWorkWith', 'PlatformHaveWorkedWith',
'PlatformWantToWorkWith', 'WebframeHaveWorkedWith',
'WebframeWantToWorkWith', 'MiscTechHaveWorkedWith',
'MiscTechWantToWorkWith', 'ToolsTechHaveWorkedWith',
'ToolsTechWantToWorkWith', 'NEWCollabToolsHaveWorkedWith',
'NEWCollabToolsWantToWorkWith', 'OpSysPersonal use',
'OpSysProfessional use', 'OfficeStackAsyncHaveWorkedWith',
'OfficeStackAsyncWantToWorkWith', 'OfficeStackSyncHaveWorkedWith',
'OfficeStackSyncWantToWorkWith', 'AISearchHaveWorkedWith',
'AISearchWantToWorkWith', 'AIDevHaveWorkedWith', 'AIDevWantToWorkWith',
'NEWSOSites', 'SOVisitFreq', 'SOAccount', 'SOPartFreq', 'SOComm',
'SOAI', 'AISelect', 'AISent', 'AIAcc', 'AIBen',
'AIToolInterested in Using', 'AIToolCurrently Using',
'AIToolNot interested in Using', 'AINextVery different',
'AINextNeither different nor similar', 'AINextSomewhat similar',
'AINextVery similar', 'AINextSomewhat different', 'TBranch', 'ICorPM',
'WorkExp', 'Knowledge_1', 'Knowledge_2', 'Knowledge_3', 'Knowledge_4',
'Knowledge_5', 'Knowledge_6', 'Knowledge_7', 'Knowledge_8',
'Frequency_1', 'Frequency_2', 'Frequency_3', 'TimeSearching',
'TimeAnswering', 'ProfessionalTech', 'Industry', 'SurveyLength',
'SurveyEase', 'ConvertedCompYearly'],
dtype='object')
10.3 Subset
0:2, 'ResponseId':'Age'] df.loc[
ResponseId | Q120 | MainBranch | Age | |
---|---|---|---|---|
0 | 1 | I agree | None of these | 18-24 years old |
1 | 2 | I agree | I am a developer by profession | 25-34 years old |
2 | 3 | I agree | I am a developer by profession | 45-54 years old |
10.4 Filter
10.4.1 Filter set
"Country"].isin(["Brazil", "Romania"]) df[
0 False
1 False
2 False
3 False
4 False
...
89179 True
89180 True
89181 False
89182 False
89183 False
Name: Country, Length: 89184, dtype: bool
10.4.2 Filter using String Methods
= df["LanguageHaveWorkedWith"].str.contains("Python", na=False)
filt_lang "LanguageHaveWorkedWith"] df.loc[filt_lang,
1 HTML/CSS;JavaScript;Python
7 Go;HTML/CSS;JavaScript;Python;Rust;SQL;TypeScript
9 HTML/CSS;Java;JavaScript;Python;SQL;TypeScript
10 C#;C++;HTML/CSS;JavaScript;Python
11 C#;HTML/CSS;JavaScript;Kotlin;PowerShell;Pytho...
...
89172 Bash/Shell (all shells);HTML/CSS;JavaScript;Pe...
89176 HTML/CSS;Java;JavaScript;PHP;Python;SQL;TypeSc...
89180 Dart;Java;Python;SQL
89181 Assembly;Bash/Shell (all shells);C;C#;Python;R...
89182 Bash/Shell (all shells);C#;HTML/CSS;Java;JavaS...
Name: LanguageHaveWorkedWith, Length: 43158, dtype: object