■ DataFrame 클래스에서 그룹당 상위 N개 행 데이터를 구하는 방법을 보여준다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import pandas as pd url = "https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/csv/tips.csv" dataFrame1 = pd.read_csv(url) print(dataFrame1) """ total_bill tip sex smoker day time size 0 16.99 1.01 Female No Sun Dinner 2 1 10.34 1.66 Male No Sun Dinner 3 2 21.01 3.50 Male No Sun Dinner 3 3 23.68 3.31 Male No Sun Dinner 2 4 24.59 3.61 Female No Sun Dinner 4 .. ... ... ... ... ... ... ... 239 29.03 5.92 Male No Sat Dinner 3 240 27.18 2.00 Female Yes Sat Dinner 2 241 22.67 2.00 Male Yes Sat Dinner 2 242 17.82 1.75 Male No Sat Dinner 2 243 18.78 3.00 Female No Thur Dinner 2 [244 rows x 7 columns] """ print() dataFrame2 = dataFrame1.sort_values(["total_bill"], ascending = False) print(dataFrame2) """ total_bill tip sex smoker day time size 170 50.81 10.00 Male Yes Sat Dinner 3 212 48.33 9.00 Male No Sat Dinner 4 59 48.27 6.73 Male No Sat Dinner 4 156 48.17 5.00 Male No Sun Dinner 6 182 45.35 3.50 Male Yes Sun Dinner 3 .. ... ... ... ... ... ... ... 149 7.51 2.00 Male No Thur Lunch 2 111 7.25 1.00 Female No Sat Dinner 1 172 7.25 5.15 Male Yes Sun Dinner 2 92 5.75 1.00 Female Yes Fri Dinner 2 67 3.07 1.00 Female Yes Sat Dinner 1 [244 rows x 7 columns] """ print() dataFrameGroupBy = dataFrame2.groupby(["day"]) series = dataFrameGroupBy.cumcount() + 1 # 그룹 내에서 각 항목의 누적 카운트(순서)를 반환한다. print(series) print() """ 170 1 212 2 59 3 156 1 182 2 .. 149 62 111 86 172 76 92 19 67 87 Length: 244, dtype: int64 """ dataFrame3 = dataFrame1.assign(rank = series) print(dataFrame3) """ total_bill tip sex smoker day time size rank 0 16.99 1.01 Female No Sun Dinner 2 49 1 10.34 1.66 Male No Sun Dinner 3 68 2 21.01 3.50 Male No Sun Dinner 3 34 3 23.68 3.31 Male No Sun Dinner 2 27 4 24.59 3.61 Female No Sun Dinner 4 23 .. ... ... ... ... ... ... ... ... 239 29.03 5.92 Male No Sat Dinner 3 13 240 27.18 2.00 Female Yes Sat Dinner 2 16 241 22.67 2.00 Male Yes Sat Dinner 2 26 242 17.82 1.75 Male No Sat Dinner 2 47 243 18.78 3.00 Female No Thur Dinner 2 20 [244 rows x 8 columns] """ print() dataFrame4 = dataFrame3.query("rank < 3") print(dataFrame4) """ total_bill tip sex smoker day time size rank 90 28.97 3.00 Male Yes Fri Dinner 2 2 95 40.17 4.73 Male Yes Fri Dinner 4 1 142 41.19 5.00 Male No Thur Lunch 5 2 156 48.17 5.00 Male No Sun Dinner 6 1 170 50.81 10.00 Male Yes Sat Dinner 3 1 182 45.35 3.50 Male Yes Sun Dinner 3 2 197 43.11 5.00 Female Yes Thur Lunch 4 1 212 48.33 9.00 Male No Sat Dinner 4 2 """ print() dataFrame5 = dataFrame4.sort_values(["day", "rank"]) print(dataFrame5) """ total_bill tip sex smoker day time size rank 95 40.17 4.73 Male Yes Fri Dinner 4 1 90 28.97 3.00 Male Yes Fri Dinner 2 2 170 50.81 10.00 Male Yes Sat Dinner 3 1 212 48.33 9.00 Male No Sat Dinner 4 2 156 48.17 5.00 Male No Sun Dinner 6 1 182 45.35 3.50 Male Yes Sun Dinner 3 2 197 43.11 5.00 Female Yes Thur Lunch 4 1 142 41.19 5.00 Male No Thur Lunch 5 2 """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 |
numpy==2.1.2 pandas==2.2.3 python-dateutil==2.9.0.post0 pytz==2024.2 six==1.16.0 tzdata==2024.2 |
※ pip install pandas 명령을 실행했다.