using System.Text;
using System.Web;
using HtmlAgilityPack;
namespace TestProject;
/// <summary>
/// 프로그램
/// </summary>
class Program
{
//////////////////////////////////////////////////////////////////////////////////////////////////// Method
////////////////////////////////////////////////////////////////////////////////////////// Static
//////////////////////////////////////////////////////////////////////////////// Private
#region 프로그램 시작하기 - Main()
/// <summary>
/// 프로그램 시작하기
/// </summary>
private static void Main()
{
HtmlDocument document = new HtmlDocument();
document.Load("source.html", Encoding.UTF8);
StringBuilder stringBuilder = new StringBuilder();
HtmlNode bodyNode = document.DocumentNode.ChildNodes["html"].ChildNodes["body"];
Parse(bodyNode, stringBuilder, 0);
Console.WriteLine(stringBuilder.ToString());
}
#endregion
#region 파싱하기 - Parse(parentNode, stringBuilder, nodeLevel)
/// <summary>
/// 파싱하기
/// </summary>
/// <param name="parentNode">부모 노드</param>
/// <param name="stringBuilder">문자열 빌더</param>
/// <param name="nodeLevel">노드 레벨</param>
private static void Parse(HtmlNode parentNode, StringBuilder stringBuilder, int nodeLevel)
{
foreach(HtmlNode childNode in parentNode.ChildNodes)
{
if(childNode.Name == "#text")
{
continue;
}
if(nodeLevel > 0)
{
stringBuilder.Append(" ".PadRight(nodeLevel * 4));
}
int imageCount = 0;
int tableCount = 0;
if(childNode.Name == "p")
{
if(childNode.ChildNodes.Count > 0)
{
if(childNode.ChildNodes[0].Name == "span")
{
if(childNode.ChildNodes[0].ChildNodes.Count > 0)
{
if(childNode.ChildNodes[0].ChildNodes[0].Name == "img")
{
imageCount++;
stringBuilder.AppendLine($"[이미지 {imageCount}]");
}
else
{
string text = childNode.InnerText.Trim();
if(string.IsNullOrEmpty(text))
{
stringBuilder.AppendLine();
}
else
{
stringBuilder.AppendLine($"[문단] {HttpUtility.HtmlDecode(text).Replace(" ", " ")}");
}
}
}
continue;
}
continue;
}
continue;
}
else if(childNode.Name == "table")
{
tableCount++;
stringBuilder.AppendLine($"[표 {tableCount}]");
}
}
}
#endregion
}