Posted Updated 4 minutes read (About 665 words)
Pandas Pivot
- 데이터 프레임의 컬럼 데이터에서 index, column, value를 선택해서 데이터 프레임을 만드는 방법
df.pivot(index, columns, values)
df.pivot_table(values, index, columns, aggfunc)
pandas io
1 2 3
| titanic = pd.read_csv("datas/train.csv") titanic.tail(2)
|
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
| 889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.00 |
C148 |
C |
| 890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.75 |
NaN |
Q |
1 2
| titanic.to_csv("datas/titanic.tsv", sep="\t", index=False)
|
1 2 3
| df = pd.read_csv("datas/2014_p.csv", encoding="euc_kr") df.tail(2)
|
|
ID |
RCTRCK |
RACE_DE |
RACE_NO |
PARTCPT_NO |
RANK |
RCHOSE_NM |
HRSMN |
RCORD |
ARVL_DFFRNC |
EACH_SCTN_PASAGE_RANK |
A_WIN_SYTM_EXPECT_ALOT |
WIN_STA_EXPECT_ALOT |
| 27216 |
27217 |
제주 |
2014-11-29 |
9 |
7 |
6.0 |
미주여행 |
김경휴 |
0:01:31.1 |
13 |
2 - - - 2 - 3 - 6 |
6.2 |
9.4 |
| 27217 |
27218 |
제주 |
2014-11-29 |
9 |
6 |
1.0 |
철옹성 |
장우성 |
0:01:26.6 |
NaN |
1 - - - 1 - 1 - 1 |
3.9 |
2.9 |
kaggle
1. 성별, 좌석등급에 따른 데이터의 수
1 2
| df1 = titanic.groupby(["Sex", "Pclass"]).size().reset_index(name="counts") df1
|
|
Sex |
Pclass |
counts |
| 0 |
female |
1 |
94 |
| 1 |
female |
2 |
76 |
| 2 |
female |
3 |
144 |
| 3 |
male |
1 |
122 |
| 4 |
male |
2 |
108 |
| 5 |
male |
3 |
347 |
1 2 3
| result = df1.pivot("Sex", "Pclass", "counts") result
|
| Pclass |
1 |
2 |
3 |
| Sex |
|
|
|
| female |
94 |
76 |
144 |
| male |
122 |
108 |
347 |
1 2 3
| titanic["counts"] = 1 titanic.tail(2)
|
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
counts |
| 889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.00 |
C148 |
C |
1 |
| 890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.75 |
NaN |
Q |
1 |
1 2
| result = titanic.pivot_table("counts", ["Pclass"], ["Survived"], aggfunc=np.sum) result
|
| Survived |
0 |
1 |
| Pclass |
|
|
| 1 |
80 |
136 |
| 2 |
97 |
87 |
| 3 |
372 |
119 |
1 2
| result["total"] = result[0]+result[1] result
|
| Survived |
0 |
1 |
total |
| Pclass |
|
|
|
| 1 |
80 |
136 |
216 |
| 2 |
97 |
87 |
184 |
| 3 |
372 |
119 |
491 |
1 2
| result.loc["total"] = result.loc[1] + result.loc[2] + result.loc[3] result
|
| Survived |
0 |
1 |
total |
| Pclass |
|
|
|
| 1 |
80 |
136 |
216 |
| 2 |
97 |
87 |
184 |
| 3 |
372 |
119 |
491 |
| total |
549 |
342 |
891 |
1 2
| df1 = pd.read_csv("datas/2014_p.csv", encoding="euc-kr") df1.tail(2)
|
|
ID |
RCTRCK |
RACE_DE |
RACE_NO |
PARTCPT_NO |
RANK |
RCHOSE_NM |
HRSMN |
RCORD |
ARVL_DFFRNC |
EACH_SCTN_PASAGE_RANK |
A_WIN_SYTM_EXPECT_ALOT |
WIN_STA_EXPECT_ALOT |
| 27216 |
27217 |
제주 |
2014-11-29 |
9 |
7 |
6.0 |
미주여행 |
김경휴 |
0:01:31.1 |
13 |
2 - - - 2 - 3 - 6 |
6.2 |
9.4 |
| 27217 |
27218 |
제주 |
2014-11-29 |
9 |
6 |
1.0 |
철옹성 |
장우성 |
0:01:26.6 |
NaN |
1 - - - 1 - 1 - 1 |
3.9 |
2.9 |
1 2
| df2 = pd.read_csv("datas/2014_s.csv", encoding="euc-kr") df2.tail(2)
|
|
ID |
RCTRCK |
RACE_DE |
PRDCTN_NATION_NM |
SEX |
AGE |
BND_WT |
TRNER |
RCHOSE_OWNR_NM |
RCHOSE_BDWGH |
| 27216 |
27217 |
제주 |
2014-11-29 |
한 |
거 |
NaN |
53.0 |
강대은 |
김기준 |
281 |
| 27217 |
27218 |
제주 |
2014-11-29 |
한 |
거 |
NaN |
57.5 |
박병진 |
강상우 |
314 |
You need to set install_url to use ShareThis. Please set it in _config.yml.