[C#/WPF/.NET8] 크롬 브라우저의 활성탭 웹 페이지에서 텍스트 추출하기 1
■ 크롬 브라우저의 활성탭 웹 페이지에서 텍스트를 추출하는 방법을 보여준다. ▶ ChromBrowserProcessHelper.cs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
using System; using System.Collections.Generic; using System.Diagnostics; using System.Runtime.InteropServices; using System.Text; using System.Threading; namespace TestProject; /// <summary> /// 크롬 브라우저 프로세스 헬퍼 /// </summary> public class ChromBrowserProcessHelper { //////////////////////////////////////////////////////////////////////////////////////////////////// Structure ////////////////////////////////////////////////////////////////////////////////////////// Private #region 사각형 - RECT /// <summary> /// 사각형 /// </summary> [StructLayout(LayoutKind.Sequential)] private struct RECT { //////////////////////////////////////////////////////////////////////////////////////////////////// Field ////////////////////////////////////////////////////////////////////////////////////////// Public #region Field /// <summary> /// 왼쪽 /// </summary> public int Left; /// <summary> /// 위쪽 /// </summary> public int Top; /// <summary> /// 오른쪽 /// </summary> public int Right; /// <summary> /// 아래쪽 /// </summary> public int Bottom; #endregion } #endregion //////////////////////////////////////////////////////////////////////////////////////////////////// Delegate ////////////////////////////////////////////////////////////////////////////////////////// Public #region 윈도우 나열하기 대리자 - EnumerateWindowDelegate(windowHandle, longParameter) /// <summary> /// 윈도우 나열하기 대리자 /// </summary> /// <param name="windowHandle">윈도우 핸들</param> /// <param name="longParameter">LONG 매개 변수</param> /// <returns>처리 결과</returns> public delegate bool EnumerateWindowDelegate(IntPtr windowHandle, IntPtr longParameter); #endregion //////////////////////////////////////////////////////////////////////////////////////////////////// Import ////////////////////////////////////////////////////////////////////////////////////////// Static //////////////////////////////////////////////////////////////////////////////// Private #region 윈도우 열거하기 - EnumWindows(enumerateWindowDelegate, longParameter) /// <summary> /// 윈도우 열거하기 /// </summary> /// <param name="enumerateWindowDelegate">윈도우 열거하기 대리자</param> /// <param name="longParameter">LONG 매개 변수</param> /// <returns>처리 결과</returns> [DllImport("user32")] private static extern bool EnumWindows(EnumerateWindowDelegate enumerateWindowDelegate, IntPtr longParameter); #endregion #region 윈도우 표시 여부 구하기 - IsWindowVisible(windowHandle) /// <summary> /// 윈도우 표시 여부 구하기 /// </summary> /// <param name="windowHandle">윈도우 핸들</param> /// <returns>윈도우 표시 여부</returns> [DllImport("user32")] private static extern bool IsWindowVisible(IntPtr windowHandle); #endregion #region 윈도우 제목 구하기 - GetWindowText(windowHandle, stringBuilder, maximumCount) /// <summary> /// 윈도우 제목 구하기 /// </summary> /// <param name="windowHandle">윈도우 핸들</param> /// <param name="stringBuilder">문자열 빌더</param> /// <param name="maximumCount">최대 카운트</param> /// <returns>처리 결과</returns> [DllImport("user32", SetLastError = true)] private static extern int GetWindowText(IntPtr windowHandle, StringBuilder stringBuilder, int maximumCount); #endregion #region 윈도우 사각형 구하기 - GetWindowRect(windowHandle, rectangle) /// <summary> /// 윈도우 사각형 구하기 /// </summary> /// <param name="windowHandle">윈도우 핸들</param> /// <param name="rectangle">사각형</param> /// <returns>처리 결과</returns> [DllImport("user32")] private static extern bool GetWindowRect(IntPtr windowHandle, out RECT rectangle); #endregion #region 활성 윈도우 설정하기 - SetForegroundWindow(windowHandle) /// <summary> /// 활성 윈도우 설정하기 /// </summary> /// <param name="windowHandle">윈도우 핸들</param> /// <returns>처리 결과</returns> [DllImport("user32")] private static extern bool SetForegroundWindow(IntPtr windowHandle); #endregion #region 윈도우 스레드 프로세스 ID 구하기 - GetWindowThreadProcessId(windowHandle, processID) /// <summary> /// 윈도우 스레드 프로세스 ID 구하기 /// </summary> /// <param name="windowHandle">윈도우 핸들</param> /// <param name="processID">프로세스 ID</param> /// <returns>처리 결과</returns> [DllImport("user32")] private static extern uint GetWindowThreadProcessId(IntPtr windowHandle, out uint processID); #endregion //////////////////////////////////////////////////////////////////////////////////////////////////// Method ////////////////////////////////////////////////////////////////////////////////////////// Static //////////////////////////////////////////////////////////////////////////////// Public #region 크롬 브라우저 윈도우 정보 리스트 구하기 - GetChromeBrowserWindowInfoList() /// <summary> /// 크롬 브라우저 윈도우 정보 리스트 구하기 /// </summary> /// <returns></returns> public static List<WindowInformation> GetChromeBrowserWindowInfoList() { List<WindowInformation> windowInformationList = new List<WindowInformation>(); EnumWindows ( delegate(IntPtr windowHandle, IntPtr longParameter) { if(IsWindowVisible(windowHandle)) { uint processID; GetWindowThreadProcessId(windowHandle, out processID); try { Process process = Process.GetProcessById((int)processID); if(process.ProcessName.ToLower().Contains("chrome")) { StringBuilder stringBuilder = new StringBuilder(256); GetWindowText(windowHandle, stringBuilder, 256); GetWindowRect(windowHandle, out RECT rectangle); int width = rectangle.Right - rectangle.Left; int height = rectangle.Bottom - rectangle.Top; windowInformationList.Add ( new WindowInformation { WindowHandle = windowHandle, Title = stringBuilder.ToString(), ProcessID = processID, Width = width, Height = height } ); } } catch(ArgumentException) { } } return true; }, IntPtr.Zero ); return windowInformationList; } #endregion #region 메인 크롬 브라우저 윈도우 정보 구하기 - GetMainChromeBrowserWindowInformation(windowInformationList) /// <summary> /// 메인 크롬 브라우저 윈도우 정보 구하기 /// </summary> /// <param name="windowInformationList">윈도우 정보 리스트</param> /// <returns>윈도우 정보</returns> public static WindowInformation GetMainChromeBrowserWindowInformation(List<WindowInformation> windowInformationList) { if(windowInformationList == null || windowInformationList.Count == 0) { return null; } WindowInformation mainWindowInformation = null; int maximumArea = 0; foreach(WindowInformation windowInformation in windowInformationList) { int area = windowInformation.Width * windowInformation.Height; if(area > maximumArea) { maximumArea = area; mainWindowInformation = windowInformation; } } return mainWindowInformation; } #endregion #region 활성탭 정보 구하기 - GetActiveTabInformation(windowInformation) /// <summary> /// 활성탭 정보 구하기 /// </summary> /// <param name="windowInformation">윈도우 정보</param> /// <returns>URL/HTML 튜플</returns> public static (string url, string html) GetActiveTabInformation(WindowInformation windowInformation) { IntPtr windowHandle = windowInformation.WindowHandle; SetForegroundWindow(windowHandle); Thread.Sleep(300); // ALT 키를 누른다. System.Windows.Forms.SendKeys.SendWait("%"); Thread.Sleep(100); // 주소 창을 선택한다. System.Windows.Forms.SendKeys.SendWait("^{l}"); Thread.Sleep(10); // URL을 복사한다. System.Windows.Forms.SendKeys.SendWait("^{c}"); Thread.Sleep(10); // 클립보드에서 URL을 구한다. string url = System.Windows.Forms.Clipboard.GetText(); // 소스 보기 창을 연다. System.Windows.Forms.SendKeys.SendWait("^{u}"); Thread.Sleep(1000); // 전체 문자열을 선택한다. System.Windows.Forms.SendKeys.SendWait("^{a}"); Thread.Sleep(500); // HTML을 복사한다. System.Windows.Forms.SendKeys.SendWait("^{c}"); Thread.Sleep(100); // 클립보드에서 HTML을 가져온다. string html = System.Windows.Forms.Clipboard.GetText(); // 소스 창을 닫는다. System.Windows.Forms.SendKeys.SendWait("^{w}"); return (url, html); } #endregion } |
▶ HTMLHelper.cs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
using System; using System.Linq; using System.Text.RegularExpressions; using HtmlAgilityPack; namespace TestProject; /// <summary> /// HTML 헬퍼 /// </summary> public class HTMLHelper { //////////////////////////////////////////////////////////////////////////////////////////////////// Method ////////////////////////////////////////////////////////////////////////////////////////// Static //////////////////////////////////////////////////////////////////////////////// Public #region 텍스트 추출하기 - ExtractText(html) /// <summary> /// 텍스트 추출하기 /// </summary> /// <param name="html">HTML</param> /// <returns>텍스트</returns> public static string ExtractText(string html) { HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); return htmlDocument.DocumentNode.InnerText; } #endregion #region 문자열 정규화하기 - NormalizeString(sourceString) /// <summary> /// 문자열 정규화하기 /// </summary> /// <param name="sourceString">소스 문자열</param> /// <returns>정규화 문자열</returns> /// <remarks> /// 1. 각 줄의 문자열의 앞뒤 공백을 제거한다. /// 2. 빈줄이 반복되는 경우 1개의 빈줄로 만든다. /// </remarks> public static string NormalizeString(string sourceString) { string[] sourceLineArray = sourceString.Split(new[] { "\r\n", "\r", "\n" }, StringSplitOptions.None); string[] targetLineArray = sourceLineArray.Select(line => line.Trim()).ToArray(); string joinedString = string.Join(Environment.NewLine, targetLineArray); string targetString = Regex.Replace(joinedString, @"(\r\n|\n){2,}", Environment.NewLine + Environment.NewLine); return targetString; } #endregion } |
▶ WindowHelper.cs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
using System; using System.Runtime.InteropServices; using System.Windows; using System.Windows.Interop; namespace TestProject; /// <summary> /// 윈도우 헬퍼 /// </summary> public static class WindowHelper { //////////////////////////////////////////////////////////////////////////////////////////////////// Import ////////////////////////////////////////////////////////////////////////////////////////// Static //////////////////////////////////////////////////////////////////////////////// Private #region 활성 윈도우 설정하기 - SetForegroundWindow(windowHandle) /// <summary> /// 활성 윈도우 설정하기 /// </summary> /// <param name="windowHandle">윈도우 핸들</param> /// <returns>처리 결과</returns> [DllImport("user32")] private static extern bool SetForegroundWindow(IntPtr windowHandle); #endregion #region 윈도우 표시하기 - ShowWindow(windowHandle, command) /// <summary> /// 윈도우 표시하기 /// </summary> /// <param name="windowHandle">윈도우 핸들</param> /// <param name="command">명령</param> /// <returns>처리 결과</returns> [DllImport("user32")] private static extern bool ShowWindow(IntPtr windowHandle, int command); #endregion //////////////////////////////////////////////////////////////////////////////////////////////////// Field ////////////////////////////////////////////////////////////////////////////////////////// Private #region Field /// <summary> /// SW_SHOW /// </summary> private const int SW_SHOW = 5; #endregion //////////////////////////////////////////////////////////////////////////////////////////////////// Method ////////////////////////////////////////////////////////////////////////////////////////// Static //////////////////////////////////////////////////////////////////////////////// Public #region 윈도우 앞으로 가져오기 - BringToFront(window) /// <summary> /// 윈도우 앞으로 가져오기 /// </summary> /// <param name="window">윈도우</param> public static void BringToFront(Window window) { WindowInteropHelper windowInteropHelper = new WindowInteropHelper(window); nint windowHandle = windowInteropHelper.Handle; ShowWindow(windowHandle, SW_SHOW); SetForegroundWindow(windowHandle); } #endregion } |
▶ WindowInformation.cs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
using System; namespace TestProject; /// <summary> /// 윈도우 정보 /// </summary> public class WindowInformation { //////////////////////////////////////////////////////////////////////////////////////////////////// Property ////////////////////////////////////////////////////////////////////////////////////////// Public #region 윈도우 핸들 - WindowHandle /// <summary> /// 윈도우 핸들 /// </summary> public IntPtr WindowHandle { get; set; } #endregion #region 제목 - Title /// <summary> /// 제목 /// </summary> public string Title { get; set; } #endregion #region 프로세스 ID - ProcessID /// <summary> /// 프로세스 ID /// </summary> public uint ProcessID { get; set; } #endregion #region 너비 - Width /// <summary> /// 너비 /// </summary> public int Width { get; set; } #endregion #region 높이 - Height /// <summary> /// 높이 /// </summary> public int Height { get; set; } #endregion } |