■ DataFrame 클래스의 read_excel/to_excel 메소드를 사용해 계층적 데이터 엑셀 파일을 변환하는 방법을 보여준다.
[source.xlsx]
[target.xlsx]
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import pandas as pd def transformCategoryData(sourceDataFrame): targetRowValueListList = [] currentRowValueDictionary = { "구분1" : "", "구분2" : "", "구분3" : "", "구분4" : "", "구분5" : "" } for _, rowSeries in sourceDataFrame.iterrows(): for columnName in ["구분1", "구분2", "구분3", "구분4", "구분5"]: if pd.notna(rowSeries[columnName]) and rowSeries[columnName] != "": currentRowValueDictionary[columnName] = rowSeries[columnName] for lowerColumnName in list(currentRowValueDictionary.keys())[list(currentRowValueDictionary.keys()).index(columnName) + 1:]: currentRowValueDictionary[lowerColumnName] = "" if pd.notna(rowSeries["비용코드"]) and rowSeries["비용코드"] != "": newRowValueDictionary = currentRowValueDictionary.copy() newRowValueDictionary["비용코드"] = rowSeries["비용코드"] targetRowValueListList.append(newRowValueDictionary) else: if any(currentRowValueDictionary.values()): newRowValueDictionary = currentRowValueDictionary.copy() newRowValueDictionary["비용코드"] = "" targetRowValueListList.append(newRowValueDictionary) targetDataFrame = pd.DataFrame(targetRowValueListList) return targetDataFrame def removeNoCostCodeData(sourceDataFrame): targetDataFrame1 = sourceDataFrame.dropna(subset = ["비용코드"]) targetDataFrame2 = targetDataFrame1[targetDataFrame1['비용코드'] != ""] return targetDataFrame2 def transformCategoryFile(sourceFilePath, targetFilePath): sourceDataFrame = pd.read_excel(sourceFilePath, keep_default_na = False) targetDataFrame1 = transformCategoryData(sourceDataFrame) targetDataFrame2 = removeNoCostCodeData(targetDataFrame1) targetDataFrame2.to_excel(targetFilePath, index = False) transformCategoryFile("source.xlsx", "target.xlsx") |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
defusedxml==0.7.1 et_xmlfile==2.0.0 numpy==2.2.0 odfpy==1.4.1 openpyxl==3.1.5 packaging==24.2 pandas==2.2.3 python-calamine==0.3.1 python-dateutil==2.9.0.post0 pytz==2024.2 pyxlsb==1.0.10 six==1.17.0 tzdata==2024.2 xlrd==2.0.1 XlsxWriter==3.2.0 |
※ pip install pandas[excel] 명령을 실행했다.