■ DataFrame 클래스에서 그룹당 상위 N개 행 데이터를 구하는 방법을 보여준다.
▶ main.py
|
import pandas as pd url = "https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/csv/tips.csv" dataFrame1 = pd.read_csv(url) print(dataFrame1) """ total_bill tip sex smoker day time size 0 16.99 1.01 Female No Sun Dinner 2 1 10.34 1.66 Male No Sun Dinner 3 2 21.01 3.50 Male No Sun Dinner 3 3 23.68 3.31 Male No Sun Dinner 2 4 24.59 3.61 Female No Sun Dinner 4 .. ... ... ... ... ... ... ... 239 29.03 5.92 Male No Sat Dinner 3 240 27.18 2.00 Female Yes Sat Dinner 2 241 22.67 2.00 Male Yes Sat Dinner 2 242 17.82 1.75 Male No Sat Dinner 2 243 18.78 3.00 Female No Thur Dinner 2 [244 rows x 7 columns] """ print() dataFrameGroupBy = dataFrame1.groupby(["sex"]) seriesGroupBy = dataFrameGroupBy["tip"] series = seriesGroupBy.rank(method = "min") print(series) """ 0 4.0 1 20.0 2 109.0 3 103.0 4 70.0 ... 239 150.0 240 18.0 241 29.0 242 23.0 243 49.0 Name: tip, Length: 244, dtype: float64 """ print() dataFrame2 = dataFrame1[dataFrame1["tip"] < 2] print(dataFrame2) """ total_bill tip sex smoker day time size 0 16.99 1.01 Female No Sun Dinner 2 1 10.34 1.66 Male No Sun Dinner 3 8 15.04 1.96 Male No Sun Dinner 2 10 10.27 1.71 Male No Sun Dinner 2 12 15.42 1.57 Male No Sun Dinner 2 16 10.33 1.67 Female No Sun Dinner 3 30 9.55 1.45 Male No Sat Dinner 2 43 9.68 1.32 Male No Sun Dinner 2 53 9.94 1.56 Male No Sun Dinner 2 57 26.41 1.50 Female No Sat Dinner 2 58 11.24 1.76 Male Yes Sat Dinner 2 62 11.02 1.98 Male Yes Sat Dinner 2 67 3.07 1.00 Female Yes Sat Dinner 1 70 12.02 1.97 Male No Sat Dinner 2 75 10.51 1.25 Male No Sat Dinner 2 82 10.07 1.83 Female No Thur Lunch 1 92 5.75 1.00 Female Yes Fri Dinner 2 97 12.03 1.50 Male Yes Fri Dinner 2 99 12.46 1.50 Male No Fri Dinner 2 105 15.36 1.64 Male Yes Sat Dinner 2 111 7.25 1.00 Female No Sat Dinner 1 117 10.65 1.50 Female No Thur Lunch 2 118 12.43 1.80 Female No Thur Lunch 2 121 13.42 1.68 Female No Thur Lunch 2 126 8.52 1.48 Male No Thur Lunch 2 130 19.08 1.50 Male No Thur Lunch 2 132 11.17 1.50 Female No Thur Lunch 2 135 8.51 1.25 Female No Thur Lunch 2 145 8.35 1.50 Female No Thur Lunch 2 146 18.64 1.36 Female No Thur Lunch 3 147 11.87 1.63 Female No Thur Lunch 2 148 9.78 1.73 Male No Thur Lunch 2 168 10.59 1.61 Female Yes Sat Dinner 2 190 15.69 1.50 Male Yes Sun Dinner 2 195 7.56 1.44 Male No Thur Lunch 2 215 12.90 1.10 Female Yes Sat Dinner 2 217 11.59 1.50 Male Yes Sat Dinner 2 218 7.74 1.44 Male Yes Sat Dinner 2 222 8.58 1.92 Male Yes Fri Lunch 1 224 13.42 1.58 Male Yes Fri Lunch 2 233 10.77 1.47 Male No Sat Dinner 2 235 10.07 1.25 Male No Sat Dinner 2 236 12.60 1.00 Male Yes Sat Dinner 2 237 32.83 1.17 Male Yes Sat Dinner 2 242 17.82 1.75 Male No Sat Dinner 2 """ print() dataFrame3 = dataFrame2.assign(rnk_min = series) print(dataFrame3) """ total_bill tip sex smoker day time size rnk_min 0 16.99 1.01 Female No Sun Dinner 2 4.0 1 10.34 1.66 Male No Sun Dinner 3 20.0 8 15.04 1.96 Male No Sun Dinner 2 26.0 10 10.27 1.71 Male No Sun Dinner 2 21.0 12 15.42 1.57 Male No Sun Dinner 2 17.0 16 10.33 1.67 Female No Sun Dinner 3 14.0 30 9.55 1.45 Male No Sat Dinner 2 8.0 43 9.68 1.32 Male No Sun Dinner 2 5.0 53 9.94 1.56 Male No Sun Dinner 2 16.0 57 26.41 1.50 Female No Sat Dinner 2 8.0 58 11.24 1.76 Male Yes Sat Dinner 2 24.0 62 11.02 1.98 Male Yes Sat Dinner 2 28.0 67 3.07 1.00 Female Yes Sat Dinner 1 1.0 70 12.02 1.97 Male No Sat Dinner 2 27.0 75 10.51 1.25 Male No Sat Dinner 2 3.0 82 10.07 1.83 Female No Thur Lunch 1 17.0 92 5.75 1.00 Female Yes Fri Dinner 2 1.0 97 12.03 1.50 Male Yes Fri Dinner 2 11.0 99 12.46 1.50 Male No Fri Dinner 2 11.0 105 15.36 1.64 Male Yes Sat Dinner 2 19.0 111 7.25 1.00 Female No Sat Dinner 1 1.0 117 10.65 1.50 Female No Thur Lunch 2 8.0 118 12.43 1.80 Female No Thur Lunch 2 16.0 121 13.42 1.68 Female No Thur Lunch 2 15.0 126 8.52 1.48 Male No Thur Lunch 2 10.0 130 19.08 1.50 Male No Thur Lunch 2 11.0 132 11.17 1.50 Female No Thur Lunch 2 8.0 135 8.51 1.25 Female No Thur Lunch 2 6.0 145 8.35 1.50 Female No Thur Lunch 2 8.0 146 18.64 1.36 Female No Thur Lunch 3 7.0 147 11.87 1.63 Female No Thur Lunch 2 13.0 148 9.78 1.73 Male No Thur Lunch 2 22.0 168 10.59 1.61 Female Yes Sat Dinner 2 12.0 190 15.69 1.50 Male Yes Sun Dinner 2 11.0 195 7.56 1.44 Male No Thur Lunch 2 6.0 215 12.90 1.10 Female Yes Sat Dinner 2 5.0 217 11.59 1.50 Male Yes Sat Dinner 2 11.0 218 7.74 1.44 Male Yes Sat Dinner 2 6.0 222 8.58 1.92 Male Yes Fri Lunch 1 25.0 224 13.42 1.58 Male Yes Fri Lunch 2 18.0 233 10.77 1.47 Male No Sat Dinner 2 9.0 235 10.07 1.25 Male No Sat Dinner 2 3.0 236 12.60 1.00 Male Yes Sat Dinner 2 1.0 237 32.83 1.17 Male Yes Sat Dinner 2 2.0 242 17.82 1.75 Male No Sat Dinner 2 23.0 """ print() dataFrame4 = dataFrame3.query("rnk_min < 3") print(dataFrame4) """ total_bill tip sex smoker day time size rnk_min 67 3.07 1.00 Female Yes Sat Dinner 1 1.0 92 5.75 1.00 Female Yes Fri Dinner 2 1.0 111 7.25 1.00 Female No Sat Dinner 1 1.0 236 12.60 1.00 Male Yes Sat Dinner 2 1.0 237 32.83 1.17 Male Yes Sat Dinner 2 2.0 """ print() dataFrame5 = dataFrame4.sort_values(["sex", "rnk_min"]) print(dataFrame5) """ total_bill tip sex smoker day time size rnk_min 67 3.07 1.00 Female Yes Sat Dinner 1 1.0 92 5.75 1.00 Female Yes Fri Dinner 2 1.0 111 7.25 1.00 Female No Sat Dinner 1 1.0 236 12.60 1.00 Male Yes Sat Dinner 2 1.0 237 32.83 1.17 Male Yes Sat Dinner 2 2.0 """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 |
numpy==2.1.2 pandas==2.2.3 python-dateutil==2.9.0.post0 pytz==2024.2 six==1.16.0 tzdata==2024.2 |
※ pip install pandas 명령을 실행했다.