■ DataFrame 클래스 : read_excel 메소드를 사용해 계층 데이터 로드하기
[source.xlsx]
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import pandas as pd from typing import Optional from typing import List from typing import Dict class CategoryNode: def __init__(self, name : str, level : int, parentNode : Optional["CategoryNode"] = None): self.name : str = name self.level : int = level self.parentNode : Optional[CategoryNode] = parentNode self.childNodeList : List[CategoryNode] = [] self.costCodeList : List[str] = [] self.nodeCount : int = 0 self.nodePathList : List[CategoryNode] = self._initializeNodePathList() def _initializeNodePathList(self) -> List["CategoryNode"]: nodePathList = [] currentParentNode = self.parentNode while currentParentNode is not None: nodePathList.insert(0, currentParentNode) currentParentNode = currentParentNode.parentNode nodePathList.append(self) return nodePathList def addChild(self, childNode : "CategoryNode") -> None: self.childNodeList.append(childNode) def addCostCode(self, costCode : str) -> None: if costCode and costCode not in self.costCodeList: self.costCodeList.append(costCode) def getAncestorNodeList(self) -> List["CategoryNode"]: return self.nodePathList class Category: def __init__(self): self.rootNodeList : List[CategoryNode] = [] self.costCodeDictionary : Dict[str, CategoryNode] = {} def addRootNode(self, rootNode : CategoryNode) -> None: self.rootNodeList.append(rootNode) def addCodeCode(self, costCode : str, node : CategoryNode) -> None: self.costCodeDictionary[costCode] = node def setNodeCount(self) -> None: def calculateNodeCount(node : CategoryNode) -> int: count = 1 for child in node.childNodeList: count += calculateNodeCount(child) node.nodeCount = count return count for rootNode in self.rootNodeList: calculateNodeCount(rootNode) def createCategory(sourceFilePath : str) -> Category: sourceDataFrame = pd.read_excel(sourceFilePath) category = Category() nodeDictionary = {} for _, rowSeries in sourceDataFrame.iterrows(): currentParentNode = None currentPathList = [] for level in range(1, 6): # 구분1 ~ 구분5 columnName = f"구분{level}" value = rowSeries[columnName] if pd.isna(value): break currentPathList.append(value) pathKeyTuple = tuple(currentPathList) if pathKeyTuple not in nodeDictionary: newNode = CategoryNode(value, level - 1, currentParentNode) nodeDictionary[pathKeyTuple] = newNode if currentParentNode: currentParentNode.addChild(newNode) else: if newNode.name not in [rootNode.name for rootNode in category.rootNodeList]: category.addRootNode(newNode) currentParentNode = nodeDictionary[pathKeyTuple] costCode = rowSeries["비용코드"] if pd.notna(costCode) and currentParentNode: currentParentNode.addCostCode(costCode) category.addCodeCode(costCode, currentParentNode) category.setNodeCount() return category def printCategoryNode(node : CategoryNode, indent : str = "") -> None: print(f"{indent}{node.name} (Level : {node.level}, Node Count : {node.nodeCount})", end = "") if node.costCodeList: print(f", Cost Code List : {node.costCodeList}") else: print() for child in node.childNodeList: printCategoryNode(child, indent + " ") def printNodePathList(node : CategoryNode) -> str: nodePathString = "" for node in node.nodePathList: if len(nodePathString) == 0: nodePathString += node.name else: nodePathString += f",{node.name}" return nodePathString def printCostCodeDictionary(category : Category) -> None: for costCode, node in category.costCodeDictionary.items(): print(f"Cost Code : {costCode} -> Node : {node.name} (Level : {node.level}, Node Path List : {printNodePathList(node)})") sourceFilePath = "source.xlsx" category = createCategory(sourceFilePath) for rootNode in category.rootNodeList: printCategoryNode(rootNode) print() printCostCodeDictionary(category) """ A (Level : 0, Node Count : 7) A1 (Level : 1, Node Count : 4) A2 (Level : 2, Node Count : 3) A3 (Level : 3, Node Count : 2) A4 (Level : 4, Node Count : 1), Cost Code List : ['CFA01', 'CFA02', 'CFA03'] A5 (Level : 1, Node Count : 2) A6 (Level : 2, Node Count : 1), Cost Code List : ['CFA04', 'CFA05'] B (Level : 0, Node Count : 4) B1 (Level : 1, Node Count : 2) B2 (Level : 2, Node Count : 1), Cost Code List : ['CFA06', 'CFA07'] B3 (Level : 1, Node Count : 1), Cost Code List : ['CFA08'] C (Level : 0, Node Count : 7) C1 (Level : 1, Node Count : 3) C2 (Level : 2, Node Count : 2) C3 (Level : 3, Node Count : 1), Cost Code List : ['CFA09', 'CFA10'] C4 (Level : 1, Node Count : 3) C5 (Level : 2, Node Count : 2) C6 (Level : 3, Node Count : 1), Cost Code List : ['CFA11', 'CFA12'] Cost Code : CFA01 -> Node : A4 (Level : 4, Node Path List : A,A1,A2,A3,A4) Cost Code : CFA02 -> Node : A4 (Level : 4, Node Path List : A,A1,A2,A3,A4) Cost Code : CFA03 -> Node : A4 (Level : 4, Node Path List : A,A1,A2,A3,A4) Cost Code : CFA04 -> Node : A6 (Level : 2, Node Path List : A,A5,A6) Cost Code : CFA05 -> Node : A6 (Level : 2, Node Path List : A,A5,A6) Cost Code : CFA06 -> Node : B2 (Level : 2, Node Path List : B,B1,B2) Cost Code : CFA07 -> Node : B2 (Level : 2, Node Path List : B,B1,B2) Cost Code : CFA08 -> Node : B3 (Level : 1, Node Path List : B,B3) Cost Code : CFA09 -> Node : C3 (Level : 3, Node Path List : C,C1,C2,C3) Cost Code : CFA10 -> Node : C3 (Level : 3, Node Path List : C,C1,C2,C3) Cost Code : CFA11 -> Node : C6 (Level : 3, Node Path List : C,C4,C5,C6) Cost Code : CFA12 -> Node : C6 (Level : 3, Node Path List : C,C4,C5,C6) """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
defusedxml==0.7.1 et_xmlfile==2.0.0 numpy==2.2.0 odfpy==1.4.1 openpyxl==3.1.5 packaging==24.2 pandas==2.2.3 python-calamine==0.3.1 python-dateutil==2.9.0.post0 pytz==2024.2 pyxlsb==1.0.10 six==1.17.0 tzdata==2024.2 xlrd==2.0.1 XlsxWriter==3.2.0 |
※ pip install pandas[excel] 명령을 실행했다.