■ DataFrame 클래스의 read_excel/to_excel 메소드를 사용해 계층적 데이터 엑셀 파일을 변환하는 방법을 보여준다.
[source.xlsx]
[target.xlsx]
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import pandas as pd def expandCategoryData(sourceDataFrame): targetRowValueDictionaryList = [] for _, rowSeries in sourceDataFrame.iterrows(): currentRowValueDictionary = {} for i in range(1, 6): columnName = f"구분{i}" if pd.notna(rowSeries[columnName]) and rowSeries[columnName] != "": currentRowValueDictionary[columnName] = rowSeries[columnName] newRowValueDictionary = { "구분1" : currentRowValueDictionary.get("구분1", ""), "구분2" : currentRowValueDictionary.get("구분2", ""), "구분3" : currentRowValueDictionary.get("구분3", ""), "구분4" : currentRowValueDictionary.get("구분4", ""), "구분5" : currentRowValueDictionary.get("구분5", ""), "비용코드" : "" } targetRowValueDictionaryList.append(newRowValueDictionary) targetRowValueDictionary = currentRowValueDictionary.copy() for i in range(1, 6): columnName = f"구분{i}" if columnName not in targetRowValueDictionary: targetRowValueDictionary[columnName] = "" targetRowValueDictionary["비용코드"] = rowSeries["비용코드"] targetRowValueDictionaryList.append(targetRowValueDictionary) targetDataFrame = pd.DataFrame(targetRowValueDictionaryList) targetDataFrame = targetDataFrame.drop_duplicates() sortColumnList = ["구분1", "구분2", "구분3", "구분4", "구분5", "비용코드"] targetDataFrame = targetDataFrame.sort_values(by = sortColumnList) return targetDataFrame sourceDataFrame = pd.read_excel("source.xlsx", keep_default_na = False) targetDataFrame = expandCategoryData(sourceDataFrame) targetDataFrame.to_excel("target.xlsx", index = False) |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
defusedxml==0.7.1 et_xmlfile==2.0.0 numpy==2.2.0 odfpy==1.4.1 openpyxl==3.1.5 packaging==24.2 pandas==2.2.3 python-calamine==0.3.1 python-dateutil==2.9.0.post0 pytz==2024.2 pyxlsb==1.0.10 six==1.17.0 tzdata==2024.2 xlrd==2.0.1 XlsxWriter==3.2.0 |
※ pip install pandas[excel] 명령을 실행했다.