[PYTHON/PANDAS] DataFrame 클래스 : read_excel/to_excel 메소드를 사용해 계층적 데이터 엑셀 파일 변환하기 3

■ DataFrame 클래스의 read_excel/to_excel 메소드를 사용해 계층적 데이터 엑셀 파일을 변환하는 방법을 보여준다.

[source.xlsx]

[target.xlsx]

▶ main.py


import pandas as pd

def expandCategoryData(sourceDataFrame):
    targetRowValueDictionaryList = []
    for _, rowSeries in sourceDataFrame.iterrows():
        currentRowValueDictionary = {}
        for i in range(1, 6):
            columnName = f"구분{i}"
            if pd.notna(rowSeries[columnName]) and rowSeries[columnName] != "":
                currentRowValueDictionary[columnName] = rowSeries[columnName]
                newRowValueDictionary = {
                    "구분1"    : currentRowValueDictionary.get("구분1", ""),
                    "구분2"    : currentRowValueDictionary.get("구분2", ""),
                    "구분3"    : currentRowValueDictionary.get("구분3", ""),
                    "구분4"    : currentRowValueDictionary.get("구분4", ""),
                    "구분5"    : currentRowValueDictionary.get("구분5", ""),
                    "비용코드" : ""
                }
                targetRowValueDictionaryList.append(newRowValueDictionary)
        targetRowValueDictionary = currentRowValueDictionary.copy()
        for i in range(1, 6):
            columnName = f"구분{i}"
            if columnName not in targetRowValueDictionary:
                targetRowValueDictionary[columnName] = ""
        targetRowValueDictionary["비용코드"] = rowSeries["비용코드"]
        targetRowValueDictionaryList.append(targetRowValueDictionary)
    targetDataFrame = pd.DataFrame(targetRowValueDictionaryList)
    targetDataFrame = targetDataFrame.drop_duplicates()
    sortColumnList  = ["구분1", "구분2", "구분3", "구분4", "구분5", "비용코드"]
    targetDataFrame = targetDataFrame.sort_values(by = sortColumnList)
    return targetDataFrame

sourceDataFrame = pd.read_excel("source.xlsx", keep_default_na = False)
targetDataFrame = expandCategoryData(sourceDataFrame)
targetDataFrame.to_excel("target.xlsx", index = False)

import pandas as pd

def expandCategoryData(sourceDataFrame):

targetRowValueDictionaryList = []

for _, rowSeries in sourceDataFrame.iterrows():

currentRowValueDictionary = {}

for i in range(1, 6):

columnName = f"구분{i}"

if pd.notna(rowSeries[columnName]) and rowSeries[columnName] != "":

currentRowValueDictionary[columnName] = rowSeries[columnName]

newRowValueDictionary = {

"구분1" : currentRowValueDictionary.get("구분1", ""),

"구분2" : currentRowValueDictionary.get("구분2", ""),

"구분3" : currentRowValueDictionary.get("구분3", ""),

"구분4" : currentRowValueDictionary.get("구분4", ""),

"구분5" : currentRowValueDictionary.get("구분5", ""),

"비용코드" : ""

}

targetRowValueDictionaryList.append(newRowValueDictionary)

targetRowValueDictionary = currentRowValueDictionary.copy()

for i in range(1, 6):

columnName = f"구분{i}"

if columnName not in targetRowValueDictionary:

targetRowValueDictionary[columnName] = ""

targetRowValueDictionary["비용코드"] = rowSeries["비용코드"]

targetRowValueDictionaryList.append(targetRowValueDictionary)

targetDataFrame = pd.DataFrame(targetRowValueDictionaryList)

targetDataFrame = targetDataFrame.drop_duplicates()

sortColumnList = ["구분1", "구분2", "구분3", "구분4", "구분5", "비용코드"]

targetDataFrame = targetDataFrame.sort_values(by = sortColumnList)

return targetDataFrame

sourceDataFrame = pd.read_excel("source.xlsx", keep_default_na = False)

targetDataFrame = expandCategoryData(sourceDataFrame)

targetDataFrame.to_excel("target.xlsx", index = False)

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.2.0
odfpy==1.4.1
openpyxl==3.1.5
packaging==24.2
pandas==2.2.3
python-calamine==0.3.1
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.17.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.2.0

odfpy==1.4.1

openpyxl==3.1.5

packaging==24.2

pandas==2.2.3

python-calamine==0.3.1

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.17.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install pandas[excel] 명령을 실행했다.

source.xlsx
target.xlsx