■ DataFrame 클래스에서 그룹당 상위 N개 행 데이터를 구하는 방법을 보여준다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
import pandas as pd url = "https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/csv/tips.csv" dataFrame1 = pd.read_csv(url) print(dataFrame1) """ total_bill tip sex smoker day time size 0 16.99 1.01 Female No Sun Dinner 2 1 10.34 1.66 Male No Sun Dinner 3 2 21.01 3.50 Male No Sun Dinner 3 3 23.68 3.31 Male No Sun Dinner 2 4 24.59 3.61 Female No Sun Dinner 4 .. ... ... ... ... ... ... ... 239 29.03 5.92 Male No Sat Dinner 3 240 27.18 2.00 Female Yes Sat Dinner 2 241 22.67 2.00 Male Yes Sat Dinner 2 242 17.82 1.75 Male No Sat Dinner 2 243 18.78 3.00 Female No Thur Dinner 2 [244 rows x 7 columns] """ print() dataFrameGroupBy = dataFrame1.groupby(["sex"]) seriesGroupBy = dataFrameGroupBy["tip"] series = seriesGroupBy.rank(method = "min") print(series) """ 0 4.0 1 20.0 2 109.0 3 103.0 4 70.0 ... 239 150.0 240 18.0 241 29.0 242 23.0 243 49.0 Name: tip, Length: 244, dtype: float64 """ print() dataFrame2 = dataFrame1[dataFrame1["tip"] < 2] print(dataFrame2) """ total_bill tip sex smoker day time size 0 16.99 1.01 Female No Sun Dinner 2 1 10.34 1.66 Male No Sun Dinner 3 8 15.04 1.96 Male No Sun Dinner 2 10 10.27 1.71 Male No Sun Dinner 2 12 15.42 1.57 Male No Sun Dinner 2 16 10.33 1.67 Female No Sun Dinner 3 30 9.55 1.45 Male No Sat Dinner 2 43 9.68 1.32 Male No Sun Dinner 2 53 9.94 1.56 Male No Sun Dinner 2 57 26.41 1.50 Female No Sat Dinner 2 58 11.24 1.76 Male Yes Sat Dinner 2 62 11.02 1.98 Male Yes Sat Dinner 2 67 3.07 1.00 Female Yes Sat Dinner 1 70 12.02 1.97 Male No Sat Dinner 2 75 10.51 1.25 Male No Sat Dinner 2 82 10.07 1.83 Female No Thur Lunch 1 92 5.75 1.00 Female Yes Fri Dinner 2 97 12.03 1.50 Male Yes Fri Dinner 2 99 12.46 1.50 Male No Fri Dinner 2 105 15.36 1.64 Male Yes Sat Dinner 2 111 7.25 1.00 Female No Sat Dinner 1 117 10.65 1.50 Female No Thur Lunch 2 118 12.43 1.80 Female No Thur Lunch 2 121 13.42 1.68 Female No Thur Lunch 2 126 8.52 1.48 Male No Thur Lunch 2 130 19.08 1.50 Male No Thur Lunch 2 132 11.17 1.50 Female No Thur Lunch 2 135 8.51 1.25 Female No Thur Lunch 2 145 8.35 1.50 Female No Thur Lunch 2 146 18.64 1.36 Female No Thur Lunch 3 147 11.87 1.63 Female No Thur Lunch 2 148 9.78 1.73 Male No Thur Lunch 2 168 10.59 1.61 Female Yes Sat Dinner 2 190 15.69 1.50 Male Yes Sun Dinner 2 195 7.56 1.44 Male No Thur Lunch 2 215 12.90 1.10 Female Yes Sat Dinner 2 217 11.59 1.50 Male Yes Sat Dinner 2 218 7.74 1.44 Male Yes Sat Dinner 2 222 8.58 1.92 Male Yes Fri Lunch 1 224 13.42 1.58 Male Yes Fri Lunch 2 233 10.77 1.47 Male No Sat Dinner 2 235 10.07 1.25 Male No Sat Dinner 2 236 12.60 1.00 Male Yes Sat Dinner 2 237 32.83 1.17 Male Yes Sat Dinner 2 242 17.82 1.75 Male No Sat Dinner 2 """ print() dataFrame3 = dataFrame2.assign(rnk_min = series) print(dataFrame3) """ total_bill tip sex smoker day time size rnk_min 0 16.99 1.01 Female No Sun Dinner 2 4.0 1 10.34 1.66 Male No Sun Dinner 3 20.0 8 15.04 1.96 Male No Sun Dinner 2 26.0 10 10.27 1.71 Male No Sun Dinner 2 21.0 12 15.42 1.57 Male No Sun Dinner 2 17.0 16 10.33 1.67 Female No Sun Dinner 3 14.0 30 9.55 1.45 Male No Sat Dinner 2 8.0 43 9.68 1.32 Male No Sun Dinner 2 5.0 53 9.94 1.56 Male No Sun Dinner 2 16.0 57 26.41 1.50 Female No Sat Dinner 2 8.0 58 11.24 1.76 Male Yes Sat Dinner 2 24.0 62 11.02 1.98 Male Yes Sat Dinner 2 28.0 67 3.07 1.00 Female Yes Sat Dinner 1 1.0 70 12.02 1.97 Male No Sat Dinner 2 27.0 75 10.51 1.25 Male No Sat Dinner 2 3.0 82 10.07 1.83 Female No Thur Lunch 1 17.0 92 5.75 1.00 Female Yes Fri Dinner 2 1.0 97 12.03 1.50 Male Yes Fri Dinner 2 11.0 99 12.46 1.50 Male No Fri Dinner 2 11.0 105 15.36 1.64 Male Yes Sat Dinner 2 19.0 111 7.25 1.00 Female No Sat Dinner 1 1.0 117 10.65 1.50 Female No Thur Lunch 2 8.0 118 12.43 1.80 Female No Thur Lunch 2 16.0 121 13.42 1.68 Female No Thur Lunch 2 15.0 126 8.52 1.48 Male No Thur Lunch 2 10.0 130 19.08 1.50 Male No Thur Lunch 2 11.0 132 11.17 1.50 Female No Thur Lunch 2 8.0 135 8.51 1.25 Female No Thur Lunch 2 6.0 145 8.35 1.50 Female No Thur Lunch 2 8.0 146 18.64 1.36 Female No Thur Lunch 3 7.0 147 11.87 1.63 Female No Thur Lunch 2 13.0 148 9.78 1.73 Male No Thur Lunch 2 22.0 168 10.59 1.61 Female Yes Sat Dinner 2 12.0 190 15.69 1.50 Male Yes Sun Dinner 2 11.0 195 7.56 1.44 Male No Thur Lunch 2 6.0 215 12.90 1.10 Female Yes Sat Dinner 2 5.0 217 11.59 1.50 Male Yes Sat Dinner 2 11.0 218 7.74 1.44 Male Yes Sat Dinner 2 6.0 222 8.58 1.92 Male Yes Fri Lunch 1 25.0 224 13.42 1.58 Male Yes Fri Lunch 2 18.0 233 10.77 1.47 Male No Sat Dinner 2 9.0 235 10.07 1.25 Male No Sat Dinner 2 3.0 236 12.60 1.00 Male Yes Sat Dinner 2 1.0 237 32.83 1.17 Male Yes Sat Dinner 2 2.0 242 17.82 1.75 Male No Sat Dinner 2 23.0 """ print() dataFrame4 = dataFrame3.query("rnk_min < 3") print(dataFrame4) """ total_bill tip sex smoker day time size rnk_min 67 3.07 1.00 Female Yes Sat Dinner 1 1.0 92 5.75 1.00 Female Yes Fri Dinner 2 1.0 111 7.25 1.00 Female No Sat Dinner 1 1.0 236 12.60 1.00 Male Yes Sat Dinner 2 1.0 237 32.83 1.17 Male Yes Sat Dinner 2 2.0 """ print() dataFrame5 = dataFrame4.sort_values(["sex", "rnk_min"]) print(dataFrame5) """ total_bill tip sex smoker day time size rnk_min 67 3.07 1.00 Female Yes Sat Dinner 1 1.0 92 5.75 1.00 Female Yes Fri Dinner 2 1.0 111 7.25 1.00 Female No Sat Dinner 1 1.0 236 12.60 1.00 Male Yes Sat Dinner 2 1.0 237 32.83 1.17 Male Yes Sat Dinner 2 2.0 """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 |
numpy==2.1.2 pandas==2.2.3 python-dateutil==2.9.0.post0 pytz==2024.2 six==1.16.0 tzdata==2024.2 |
※ pip install pandas 명령을 실행했다.