[C#/ML.NET] k-평균 클러스터링(k-means clustering) 사용하기
■ k-평균 클러스터링(k-means clustering)을 사용하는 방법을 보여준다. ▶ SourceModel.cs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
using Microsoft.ML.Data; namespace TestProject { /// <summary> /// 소스 모델 /// </summary> public class SourceModel { //////////////////////////////////////////////////////////////////////////////////////////////////// Property ////////////////////////////////////////////////////////////////////////////////////////// Public #region 알코올 - Alcohol /// <summary> /// 알코올 /// </summary> [LoadColumn(0)] public float Alcohol { get; set; } #endregion #region 사과산 - MalicAcid /// <summary> /// 사과산 /// </summary> [LoadColumn(1)] public float MalicAcid { get; set; } #endregion #region 애쉬 - Ash /// <summary> /// 애쉬 /// </summary> [LoadColumn(2)] public float Ash { get; set; } #endregion #region 애쉬 알카니티 - AshAlcanity /// <summary> /// 애쉬 알카니티 /// </summary> [LoadColumn(3)] public float AshAlcanity { get; set; } #endregion #region 마그네슘 - Magnesium /// <summary> /// 마그네슘 /// </summary> [LoadColumn(4)] public float Magnesium { get; set; } #endregion #region 전체 페놀 - TotalPhenols /// <summary> /// 전체 페놀 /// </summary> [LoadColumn(5)] public float TotalPhenols { get; set; } #endregion #region 플라보노이드 - Flavanoids /// <summary> /// 플라보노이드 /// </summary> [LoadColumn(6)] public float Flavanoids { get; set; } #endregion #region 비 플라보노이드 페놀 - NonflavanoidPhenols /// <summary> /// 비 플라보노이드 페놀 /// </summary> [LoadColumn(7)] public float NonflavanoidPhenols { get; set; } #endregion #region 프로안토시아닌 - Proanthocyanins /// <summary> /// 프로안토시아닌 /// </summary> [LoadColumn(8)] public float Proanthocyanins { get; set; } #endregion #region 색상 강도 - ColorIntensity /// <summary> /// 색상 강도 /// </summary> [LoadColumn(9)] public float ColorIntensity { get; set; } #endregion #region 색조 - Hue /// <summary> /// 색조 /// </summary> [LoadColumn(10)] public float Hue { get; set; } #endregion #region OD280 - OD280 /// <summary> /// OD280 /// </summary> [LoadColumn(11)] public float OD280 { get; set; } #endregion #region 프롤린 - Proline /// <summary> /// 프롤린 /// </summary> [LoadColumn(12)] public float Proline { get; set; } #endregion } } |
▶ TargetModel.cs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
using Microsoft.ML.Data; namespace TestProject { /// <summary> /// 타겟 모델 /// </summary> public class TargetModel { //////////////////////////////////////////////////////////////////////////////////////////////////// Property ////////////////////////////////////////////////////////////////////////////////////////// Public #region PredictedClusterID /// <summary> /// 예측 클러스터 ID /// </summary> [ColumnName("PredictedLabel")] public uint PredictedClusterID; #endregion #region DistanceArray /// <summary> /// 거리 배열 /// </summary> [ColumnName("Score")] public float[] DistanceArray; #endregion } } |
▶ Program.cs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
using System; using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; namespace TestProject { /// <summary> /// 프로그램 /// </summary> class Program { //////////////////////////////////////////////////////////////////////////////////////////////////// Method ////////////////////////////////////////////////////////////////////////////////////////// Static //////////////////////////////////////////////////////////////////////////////// Private #region 프로그램 시작하기 - Main() /// <summary> /// 프로그램 시작하기 /// </summary> private static void Main() { string dataFilePath = "wine-clustering.csv"; MLContext mlContext = new MLContext(); IDataView sourceDataView = mlContext.Data.LoadFromTextFile<SourceModel>(dataFilePath, hasHeader : true, separatorChar : ','); string[] columnNameArray = sourceDataView.Schema.Select(column => column.Name).ToArray(); var data = mlContext.Data.TrainTestSplit(sourceDataView, testFraction : 0.2); var pipeline = mlContext.Transforms.Concatenate("Feature", columnNameArray).Append(mlContext.Clustering.Trainers.KMeans("Feature", numberOfClusters : 3)); var model = pipeline.Fit(data.TrainSet); IDataView testDataView = model.Transform(data.TestSet); ClusteringMetrics metrics = mlContext.Clustering.Evaluate(testDataView, scoreColumnName : "Score", featureColumnName : "Feature"); PredictionEngine<SourceModel, TargetModel> predictionEngine = mlContext.Model.CreatePredictionEngine<SourceModel, TargetModel>(model); TargetModel target = predictionEngine.Predict ( new SourceModel() { Alcohol = 11, Ash = 2.33f, AshAlcanity = 3.21f, ColorIntensity = 2.9f, Flavanoids = 1.54f, Hue = 1.55f, Magnesium = 120, MalicAcid = 1.44f, NonflavanoidPhenols = 0.3f, OD280 = 2.93f, Proanthocyanins = 1.72f, TotalPhenols = 3.86f, Proline = 1300 } ); Console.WriteLine($"예측 클러스터 ID : {target.PredictedClusterID}"); } #endregion } } |
TestProject.zip