■ RichEditControl 클래스의 HtmlText 속성에서 구한 HTML 문자열을 파싱하는 방법을 보여준다.
▶ Program.cs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
using System.Text; using System.Web; using HtmlAgilityPack; namespace TestProject; /// <summary> /// 프로그램 /// </summary> class Program { //////////////////////////////////////////////////////////////////////////////////////////////////// Method ////////////////////////////////////////////////////////////////////////////////////////// Static //////////////////////////////////////////////////////////////////////////////// Private #region 프로그램 시작하기 - Main() /// <summary> /// 프로그램 시작하기 /// </summary> private static void Main() { HtmlDocument document = new HtmlDocument(); document.Load("source.html", Encoding.UTF8); StringBuilder stringBuilder = new StringBuilder(); HtmlNode bodyNode = document.DocumentNode.ChildNodes["html"].ChildNodes["body"]; Parse(bodyNode, stringBuilder, 0); Console.WriteLine(stringBuilder.ToString()); } #endregion #region 파싱하기 - Parse(parentNode, stringBuilder, nodeLevel) /// <summary> /// 파싱하기 /// </summary> /// <param name="parentNode">부모 노드</param> /// <param name="stringBuilder">문자열 빌더</param> /// <param name="nodeLevel">노드 레벨</param> private static void Parse(HtmlNode parentNode, StringBuilder stringBuilder, int nodeLevel) { foreach(HtmlNode childNode in parentNode.ChildNodes) { if(childNode.Name == "#text") { continue; } if(nodeLevel > 0) { stringBuilder.Append(" ".PadRight(nodeLevel * 4)); } int imageCount = 0; int tableCount = 0; if(childNode.Name == "p") { if(childNode.ChildNodes.Count > 0) { if(childNode.ChildNodes[0].Name == "span") { if(childNode.ChildNodes[0].ChildNodes.Count > 0) { if(childNode.ChildNodes[0].ChildNodes[0].Name == "img") { imageCount++; stringBuilder.AppendLine($"[이미지 {imageCount}]"); } else { string text = childNode.InnerText.Trim(); if(string.IsNullOrEmpty(text)) { stringBuilder.AppendLine(); } else { stringBuilder.AppendLine($"[문단] {HttpUtility.HtmlDecode(text).Replace(" ", " ")}"); } } } continue; } continue; } continue; } else if(childNode.Name == "table") { tableCount++; stringBuilder.AppendLine($"[표 {tableCount}]"); } } } #endregion } |