[PYTHON/PANDAS] DataFrame 클래스 : read_csv/to_csv 메소드를 사용해 계층적 데이터 CSV 파일 변환하기

■ DataFrame 클래스의 read_csv/to_csv 메소드를 사용해 계층적 데이터 CSV 파일을 변환하는 방법을 보여준다.

▶ main.py


import pandas as pd

def transformCategoryData(sourceDataFrame):
    targetRowValueListList = []
    currentRowValueDictionary = {
        "구분1" : "",
        "구분2" : "",
        "구분3" : "",
        "구분4" : "",
        "구분5" : ""
    }
    for _, rowSeries in sourceDataFrame.iterrows():
        for columnName in ["구분1", "구분2", "구분3", "구분4", "구분5"]:
            if pd.notna(rowSeries[columnName]) and rowSeries[columnName] != "":
                currentRowValueDictionary[columnName] = rowSeries[columnName]
                for lowerColumnName in list(currentRowValueDictionary.keys())[list(currentRowValueDictionary.keys()).index(columnName) + 1:]:
                    currentRowValueDictionary[lowerColumnName] = ""
        if pd.notna(rowSeries["비용코드"]) and rowSeries["비용코드"] != "":
            newRowValueDictionary = currentRowValueDictionary.copy()
            newRowValueDictionary["비용코드"] = rowSeries["비용코드"]
            targetRowValueListList.append(newRowValueDictionary)
        else:
            if any(currentRowValueDictionary.values()):
                newRowValueDictionary = currentRowValueDictionary.copy()
                newRowValueDictionary["비용코드"] = ""
                targetRowValueListList.append(newRowValueDictionary)
    targetDataFrame = pd.DataFrame(targetRowValueListList)
    return targetDataFrame

def transformCategoryFile(sourceFilePath, targetFilePath):
    sourceDataFrame = pd.read_csv(sourceFilePath)
    targetDataFrame = transformCategoryData(sourceDataFrame)
    targetDataFrame.to_csv(targetFilePath, encoding = "utf-8-sig", index = False) # encoding 인자를 utf-8-sig로 설정한 이유는 엑셀에서 로드시 한글 깨짐 방지

transformCategoryFile("source.csv", "target.csv")

import pandas as pd

def transformCategoryData(sourceDataFrame):

targetRowValueListList = []

currentRowValueDictionary = {

"구분1" : "",

"구분2" : "",

"구분3" : "",

"구분4" : "",

"구분5" : ""

}

for _, rowSeries in sourceDataFrame.iterrows():

for columnName in ["구분1", "구분2", "구분3", "구분4", "구분5"]:

if pd.notna(rowSeries[columnName]) and rowSeries[columnName] != "":

currentRowValueDictionary[columnName] = rowSeries[columnName]

for lowerColumnName in list(currentRowValueDictionary.keys())[list(currentRowValueDictionary.keys()).index(columnName) + 1:]:

currentRowValueDictionary[lowerColumnName] = ""

if pd.notna(rowSeries["비용코드"]) and rowSeries["비용코드"] != "":

newRowValueDictionary = currentRowValueDictionary.copy()

newRowValueDictionary["비용코드"] = rowSeries["비용코드"]

targetRowValueListList.append(newRowValueDictionary)

else:

if any(currentRowValueDictionary.values()):

newRowValueDictionary = currentRowValueDictionary.copy()

newRowValueDictionary["비용코드"] = ""

targetRowValueListList.append(newRowValueDictionary)

targetDataFrame = pd.DataFrame(targetRowValueListList)

return targetDataFrame

def transformCategoryFile(sourceFilePath, targetFilePath):

sourceDataFrame = pd.read_csv(sourceFilePath)

targetDataFrame = transformCategoryData(sourceDataFrame)

targetDataFrame.to_csv(targetFilePath, encoding = "utf-8-sig", index = False) # encoding 인자를 utf-8-sig로 설정한 이유는 엑셀에서 로드시 한글 깨짐 방지

transformCategoryFile("source.csv", "target.csv")

▶ requirements.txt


numpy==2.2.0
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.17.0
tzdata==2024.2

numpy==2.2.0

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.17.0

tzdata==2024.2

※ pip install pandas 명령을 실행한다.

source.csv
target.csv