[PYTHON/PANDAS] DataFrame 클래스 : read_excel/to_excel 메소드를 사용해 계층적 데이터 엑셀 파일 변환하기 1

■ DataFrame 클래스 : read_excel/to_excel 메소드를 사용해 계층적 데이터 엑셀 파일 변환하기

[source.xlsx]

[target.xlsx]

▶ main.py


import pandas as pd

def transformCategoryData(sourceDataFrame):
    targetRowValueListList = []
    currentRowValueDictionary = {
        "구분1" : "",
        "구분2" : "",
        "구분3" : "",
        "구분4" : "",
        "구분5" : ""
    }
    for _, rowSeries in sourceDataFrame.iterrows():
        for columnName in ["구분1", "구분2", "구분3", "구분4", "구분5"]:
            if pd.notna(rowSeries[columnName]) and rowSeries[columnName] != "":
                currentRowValueDictionary[columnName] = rowSeries[columnName]
                for lowerColumnName in list(currentRowValueDictionary.keys())[list(currentRowValueDictionary.keys()).index(columnName) + 1:]:
                    currentRowValueDictionary[lowerColumnName] = ""
        if pd.notna(rowSeries["비용코드"]) and rowSeries["비용코드"] != "":
            newRowValueDictionary = currentRowValueDictionary.copy()
            newRowValueDictionary["비용코드"] = rowSeries["비용코드"]
            targetRowValueListList.append(newRowValueDictionary)
        else:
            if any(currentRowValueDictionary.values()):
                newRowValueDictionary = currentRowValueDictionary.copy()
                newRowValueDictionary["비용코드"] = ""
                targetRowValueListList.append(newRowValueDictionary)
    targetDataFrame = pd.DataFrame(targetRowValueListList)
    return targetDataFrame

def transformCategoryFile(sourceFilePath, targetFilePath):
    sourceDataFrame = pd.read_excel(sourceFilePath, keep_default_na = False)
    targetDataFrame = transformCategoryData(sourceDataFrame)
    targetDataFrame.to_excel(targetFilePath, index = False)

transformCategoryFile("source.xlsx", "target.xlsx")

import pandas as pd

def transformCategoryData(sourceDataFrame):

targetRowValueListList = []

currentRowValueDictionary = {

"구분1" : "",

"구분2" : "",

"구분3" : "",

"구분4" : "",

"구분5" : ""

}

for _, rowSeries in sourceDataFrame.iterrows():

for columnName in ["구분1", "구분2", "구분3", "구분4", "구분5"]:

if pd.notna(rowSeries[columnName]) and rowSeries[columnName] != "":

currentRowValueDictionary[columnName] = rowSeries[columnName]

for lowerColumnName in list(currentRowValueDictionary.keys())[list(currentRowValueDictionary.keys()).index(columnName) + 1:]:

currentRowValueDictionary[lowerColumnName] = ""

if pd.notna(rowSeries["비용코드"]) and rowSeries["비용코드"] != "":

newRowValueDictionary = currentRowValueDictionary.copy()

newRowValueDictionary["비용코드"] = rowSeries["비용코드"]

targetRowValueListList.append(newRowValueDictionary)

else:

if any(currentRowValueDictionary.values()):

newRowValueDictionary = currentRowValueDictionary.copy()

newRowValueDictionary["비용코드"] = ""

targetRowValueListList.append(newRowValueDictionary)

targetDataFrame = pd.DataFrame(targetRowValueListList)

return targetDataFrame

def transformCategoryFile(sourceFilePath, targetFilePath):

sourceDataFrame = pd.read_excel(sourceFilePath, keep_default_na = False)

targetDataFrame = transformCategoryData(sourceDataFrame)

targetDataFrame.to_excel(targetFilePath, index = False)

transformCategoryFile("source.xlsx", "target.xlsx")

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.2.0
odfpy==1.4.1
openpyxl==3.1.5
packaging==24.2
pandas==2.2.3
python-calamine==0.3.1
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.17.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.2.0

odfpy==1.4.1

openpyxl==3.1.5

packaging==24.2

pandas==2.2.3

python-calamine==0.3.1

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.17.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install pandas[excel] 명령을 실행했다.

source.xlsx
target.xlsx