■ DataFrame 클래스에서 그룹당 상위 N개 행 데이터를 구하는 방법을 보여준다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import pandas as pd url = "https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/csv/tips.csv" dataFrame1 = pd.read_csv(url) print(dataFrame1) """ total_bill tip sex smoker day time size 0 16.99 1.01 Female No Sun Dinner 2 1 10.34 1.66 Male No Sun Dinner 3 2 21.01 3.50 Male No Sun Dinner 3 3 23.68 3.31 Male No Sun Dinner 2 4 24.59 3.61 Female No Sun Dinner 4 .. ... ... ... ... ... ... ... 239 29.03 5.92 Male No Sat Dinner 3 240 27.18 2.00 Female Yes Sat Dinner 2 241 22.67 2.00 Male Yes Sat Dinner 2 242 17.82 1.75 Male No Sat Dinner 2 243 18.78 3.00 Female No Thur Dinner 2 [244 rows x 7 columns] """ print() dataFrameGroupBy = dataFrame1.groupby(["day"]) seriesGroupBy = dataFrameGroupBy["total_bill"] series = seriesGroupBy.rank(method = "first", ascending = False) print(series) """ 0 49.0 1 68.0 2 34.0 3 27.0 4 23.0 ... 239 13.0 240 16.0 241 26.0 242 47.0 243 20.0 Name: total_bill, Length: 244, dtype: float64 """ print() dataFrame2 = dataFrame1.assign(rank = series) print(dataFrame2) """ total_bill tip sex smoker day time size rank 0 16.99 1.01 Female No Sun Dinner 2 49.0 1 10.34 1.66 Male No Sun Dinner 3 68.0 2 21.01 3.50 Male No Sun Dinner 3 34.0 3 23.68 3.31 Male No Sun Dinner 2 27.0 4 24.59 3.61 Female No Sun Dinner 4 23.0 .. ... ... ... ... ... ... ... ... 239 29.03 5.92 Male No Sat Dinner 3 13.0 240 27.18 2.00 Female Yes Sat Dinner 2 16.0 241 22.67 2.00 Male Yes Sat Dinner 2 26.0 242 17.82 1.75 Male No Sat Dinner 2 47.0 243 18.78 3.00 Female No Thur Dinner 2 20.0 [244 rows x 8 columns] """ print() dataFrame3 = dataFrame2.query("rank < 3") print(dataFrame3) """ total_bill tip sex smoker day time size rank 90 28.97 3.00 Male Yes Fri Dinner 2 2.0 95 40.17 4.73 Male Yes Fri Dinner 4 1.0 142 41.19 5.00 Male No Thur Lunch 5 2.0 156 48.17 5.00 Male No Sun Dinner 6 1.0 170 50.81 10.00 Male Yes Sat Dinner 3 1.0 182 45.35 3.50 Male Yes Sun Dinner 3 2.0 197 43.11 5.00 Female Yes Thur Lunch 4 1.0 212 48.33 9.00 Male No Sat Dinner 4 2.0 """ print() dataFrame4 = dataFrame3.sort_values(["day", "rank"]) print(dataFrame4) """ total_bill tip sex smoker day time size rnk 95 40.17 4.73 Male Yes Fri Dinner 4 1.0 90 28.97 3.00 Male Yes Fri Dinner 2 2.0 170 50.81 10.00 Male Yes Sat Dinner 3 1.0 212 48.33 9.00 Male No Sat Dinner 4 2.0 156 48.17 5.00 Male No Sun Dinner 6 1.0 182 45.35 3.50 Male Yes Sun Dinner 3 2.0 197 43.11 5.00 Female Yes Thur Lunch 4 1.0 142 41.19 5.00 Male No Thur Lunch 5 2.0 """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 |
numpy==2.1.2 pandas==2.2.3 python-dateutil==2.9.0.post0 pytz==2024.2 six==1.16.0 tzdata==2024.2 |
※ pip install pandas 명령을 실행했다.