diff --git a/.gitignore b/.gitignore
index 5d381cc..33068a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,406 @@
+x64/
+eigen-3.4.0/
+*.debug
+
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
+
+# User-specific files
+*.rsuser
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Mono auto generated files
+mono_crash.*
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+[Ww][Ii][Nn]32/
+[Aa][Rr][Mm]/
+[Aa][Rr][Mm]64/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+[Ll]ogs/
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUnit
+*.VisualState.xml
+TestResult.xml
+nunit-*.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+# ASP.NET Scaffolding
+ScaffoldingReadMe.txt
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_h.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*_wpftmp.csproj
+*.log
+*.tlog
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Coverlet is a free, cross platform Code Coverage Tool
+coverage*.json
+coverage*.xml
+coverage*.info
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# NuGet Symbol Packages
+*.snupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+*.appxbundle
+*.appxupload
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!?*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+*- [Bb]ackup.rdl
+*- [Bb]ackup ([0-9]).rdl
+*- [Bb]ackup ([0-9][0-9]).rdl
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio 6 auto-generated project file (contains which files were open etc.)
+*.vbp
+
+# Visual Studio 6 workspace and project file (working project files containing files to include in project)
+*.dsw
+*.dsp
+
+# Visual Studio 6 technical files
+*.ncb
+*.aps
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# CodeRush personal settings
+.cr/personal
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output
+ASALocalRun/
+
+# MSBuild Binary and Structured Log
+*.binlog
+
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+
+# MFractors (Xamarin productivity tool) working folder
+.mfractor/
+
+# Local History for Visual Studio
+.localhistory/
+
+# Visual Studio History (VSHistory) files
+.vshistory/
+
+# BeatPulse healthcheck temp database
+healthchecksdb
+
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+MigrationBackup/
+
+# Ionide (cross platform F# VS Code tools) working folder
+.ionide/
+
+# Fody - auto-generated XML schema
+FodyWeavers.xsd
+
+# VS Code files for those working on multiple tools
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+
+# Local History for Visual Studio Code
+.history/
+
+# Windows Installer files from build outputs
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# JetBrains Rider
+*.sln.iml
+
# ---> Python
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/GMM/GMM.vcxproj b/GMM/GMM.vcxproj
new file mode 100644
index 0000000..42bdea4
--- /dev/null
+++ b/GMM/GMM.vcxproj
@@ -0,0 +1,136 @@
+
+
+
+
+ true
+ true
+ true
+ true
+ 15.0
+ {a2b67815-1235-4f7c-874d-4fccb3b0c738}
+ Win32Proj
+ GMM
+ 10.0
+ 10.0.17134.0
+
+
+
+
+ Debug
+ Win32
+
+
+ Release
+ Win32
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ Application
+ v143
+ v142
+ v141
+ v140
+ Unicode
+
+
+ true
+ true
+
+
+ false
+ true
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NotUsing
+ pch.h
+ $(IntDir)pch.pch
+ _CONSOLE;WIN32_LEAN_AND_MEAN;WINRT_LEAN_AND_MEAN;%(PreprocessorDefinitions)
+ Level4
+ %(AdditionalOptions) /permissive- /bigobj
+
+
+
+
+ Disabled
+ _DEBUG;%(PreprocessorDefinitions)
+ D:\matlab2023a\extern\include;D:\apps\Twirls\eigen-3.4.0;%(AdditionalIncludeDirectories)
+
+
+ Console
+ false
+ D:\matlab2023a\extern\lib\win64\microsoft;%(AdditionalLibraryDirectories)
+ libmat.lib;libmx.lib;libmex.lib;libeng.lib;%(AdditionalDependencies)
+
+
+
+
+ WIN32;%(PreprocessorDefinitions)
+
+
+
+
+ MaxSpeed
+ true
+ true
+ NDEBUG;%(PreprocessorDefinitions)
+ D:\matlab2023a\extern\include;D:\apps\Twirls\eigen-3.4.0;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ false
+ D:\matlab2023a\extern\lib\win64\microsoft;%(AdditionalLibraryDirectories)
+ libmat.lib;libmx.lib;libmex.lib;libeng.lib;%(AdditionalDependencies)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+
+
+
+
+
+
+
+
+ 这台计算机上缺少此项目引用的 NuGet 程序包。使用“NuGet 程序包还原”可下载这些程序包。有关更多信息,请参见 http://go.microsoft.com/fwlink/?LinkID=322105。缺少的文件是 {0}。
+
+
+
+
+
\ No newline at end of file
diff --git a/GMM/GMM.vcxproj.filters b/GMM/GMM.vcxproj.filters
new file mode 100644
index 0000000..c29d2e2
--- /dev/null
+++ b/GMM/GMM.vcxproj.filters
@@ -0,0 +1,43 @@
+
+
+
+
+ {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
+ cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
+
+
+ {93995380-89BD-4b04-88EB-625FBE52EBFB}
+ h;hh;hpp;hxx;hm;inl;inc;xsd
+
+
+ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Header Files
+
+
+ Header Files
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/GMM/PropertySheet.props b/GMM/PropertySheet.props
new file mode 100644
index 0000000..b0c6226
--- /dev/null
+++ b/GMM/PropertySheet.props
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/GMM/gmm.cpp b/GMM/gmm.cpp
new file mode 100644
index 0000000..1d6300c
--- /dev/null
+++ b/GMM/gmm.cpp
@@ -0,0 +1,620 @@
+/***************************************************************************
+
+Module Name:
+
+ Gaussian Mixture Model with Diagonal Covariance Matrix
+
+History:
+
+ 2003/11/01 Fei Wang
+ 2013 luxiaoxun
+***************************************************************************/
+#include
+#include
+#include
+#include "gmm.h"
+#include "kmeans.h"
+using namespace std;
+
+GMM::GMM(int dimNum, int mixNum)
+{
+ m_dimNum = dimNum;
+ m_mixNum = mixNum;
+
+ m_maxIterNum = 100;
+ m_endError = 0.001;
+
+ Allocate();
+
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ m_priors[i] = 1.0 / m_mixNum;
+
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ m_means[i][d] = 0;
+ m_vars[i][d] = 1;
+ }
+ }
+}
+
+GMM::~GMM()
+{
+ Dispose();
+}
+
+void GMM::Allocate()
+{
+ m_priors = new double[m_mixNum];
+ m_means = new double* [m_mixNum];
+ m_vars = new double* [m_mixNum];
+
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ m_means[i] = new double[m_dimNum];
+ m_vars[i] = new double[m_dimNum];
+ }
+
+ m_minVars = new double[m_dimNum];
+}
+
+void GMM::Dispose()
+{
+ delete[] m_priors;
+
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ delete[] m_means[i];
+ delete[] m_vars[i];
+ }
+ delete[] m_means;
+ delete[] m_vars;
+
+ delete[] m_minVars;
+}
+
+void GMM::Copy(GMM* gmm)
+{
+ assert(m_mixNum == gmm->m_mixNum && m_dimNum == gmm->m_dimNum);
+
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ m_priors[i] = gmm->Prior(i);
+ memcpy(m_means[i], gmm->Mean(i), sizeof(double) * m_dimNum);
+ memcpy(m_vars[i], gmm->Variance(i), sizeof(double) * m_dimNum);
+ }
+ memcpy(m_minVars, gmm->m_minVars, sizeof(double) * m_dimNum);
+}
+
+double GMM::GetProbability(const double* sample)
+{
+ double p = 0;
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ p += m_priors[i] * GetProbability(sample, i);
+ }
+ return p;
+}
+
+double GMM::GetProbability(const double* x, int j)
+{
+ double p = 1;
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ p *= 1 / sqrt(2 * M_PI * m_vars[j][d]);
+ p *= exp(-0.5 * (x[d] - m_means[j][d]) * (x[d] - m_means[j][d]) / m_vars[j][d]);
+ }
+ return p;
+}
+
+void GMM::Train(const char* sampleFileName)
+{
+ //DumpSampleFile(sampleFileName);
+ Init(sampleFileName);
+
+ ifstream sampleFile(sampleFileName, ios_base::binary);
+ assert(sampleFile);
+
+ int size = 0;
+ sampleFile.seekg(0, ios_base::beg);
+ sampleFile.read((char*)&size, sizeof(int));
+
+ // Reestimation
+ bool loop = true;
+ double iterNum = 0;
+ double lastL = 0;
+ double currL = 0;
+ int unchanged = 0;
+ double* x = new double[m_dimNum]; // Sample data
+ double* next_priors = new double[m_mixNum];
+ double** next_vars = new double* [m_mixNum];
+ double** next_means = new double* [m_mixNum];
+
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ next_means[i] = new double[m_dimNum];
+ next_vars[i] = new double[m_dimNum];
+ }
+
+ while (loop)
+ {
+ // Clear buffer for reestimation
+ memset(next_priors, 0, sizeof(double) * m_mixNum);
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ memset(next_vars[i], 0, sizeof(double) * m_dimNum);
+ memset(next_means[i], 0, sizeof(double) * m_dimNum);
+ }
+
+ lastL = currL;
+ currL = 0;
+
+ // Predict
+ sampleFile.seekg(2 * sizeof(int), ios_base::beg);
+ for (int k = 0; k < size; k++)
+ {
+ sampleFile.read((char*)x, sizeof(double) * m_dimNum);
+ double p = GetProbability(x);
+
+ for (int j = 0; j < m_mixNum; j++)
+ {
+ double pj = GetProbability(x, j) * m_priors[j] / p;
+ next_priors[j] += pj;
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ next_means[j][d] += pj * x[d];
+ next_vars[j][d] += pj * x[d] * x[d];
+ }
+ }
+ currL += (p > 1E-20) ? log10(p) : -20;
+ }
+ currL /= size;
+
+ // Reestimation: generate new priors, means and variances.
+ for (int j = 0; j < m_mixNum; j++)
+ {
+ m_priors[j] = next_priors[j] / size;
+
+ if (m_priors[j] > 0)
+ {
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ m_means[j][d] = next_means[j][d] / next_priors[j];
+ m_vars[j][d] = next_vars[j][d] / next_priors[j] - m_means[j][d] * m_means[j][d];
+ if (m_vars[j][d] < m_minVars[d])
+ {
+ m_vars[j][d] = m_minVars[d];
+ }
+ }
+ }
+ }
+
+ // Terminal conditions
+ iterNum++;
+ if (fabs(currL - lastL) < m_endError * fabs(lastL))
+ {
+ unchanged++;
+ }
+ if (iterNum >= m_maxIterNum || unchanged >= 3)
+ {
+ loop = false;
+ }
+ //--- Debug ---
+ //cout << "Iter: " << iterNum << ", Average Log-Probability: " << currL << endl;
+ }
+
+ sampleFile.close();
+ delete[] next_priors;
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ delete[] next_means[i];
+ delete[] next_vars[i];
+ }
+ delete[] next_means;
+ delete[] next_vars;
+ delete[] x;
+}
+
+void GMM::Train(double* data, int N)
+{
+ Init(data, N);
+
+ int size = N;
+
+ // Reestimation
+ bool loop = true;
+ double iterNum = 0;
+ double lastL = 0;
+ double currL = 0;
+ int unchanged = 0;
+ double* x = new double[m_dimNum]; // Sample data
+ double* next_priors = new double[m_mixNum];
+ double** next_vars = new double* [m_mixNum];
+ double** next_means = new double* [m_mixNum];
+
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ next_means[i] = new double[m_dimNum];
+ next_vars[i] = new double[m_dimNum];
+ }
+
+ while (loop)
+ {
+ // Clear buffer for reestimation
+ memset(next_priors, 0, sizeof(double) * m_mixNum);
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ memset(next_vars[i], 0, sizeof(double) * m_dimNum);
+ memset(next_means[i], 0, sizeof(double) * m_dimNum);
+ }
+
+ lastL = currL;
+ currL = 0;
+
+ // Predict
+ for (int k = 0; k < size; k++)
+ {
+ for (int j = 0;j < m_dimNum;j++)
+ x[j] = data[k * m_dimNum + j];
+ double p = GetProbability(x);
+
+ for (int j = 0; j < m_mixNum; j++)
+ {
+ double pj = GetProbability(x, j) * m_priors[j] / p;
+
+ next_priors[j] += pj;
+
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ next_means[j][d] += pj * x[d];
+ next_vars[j][d] += pj * x[d] * x[d];
+ }
+ }
+
+ currL += (p > 1E-20) ? log10(p) : -20;
+ }
+ currL /= size;
+
+ // Reestimation: generate new priors, means and variances.
+ for (int j = 0; j < m_mixNum; j++)
+ {
+ m_priors[j] = next_priors[j] / size;
+
+ if (m_priors[j] > 0)
+ {
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ m_means[j][d] = next_means[j][d] / next_priors[j];
+ m_vars[j][d] = next_vars[j][d] / next_priors[j] - m_means[j][d] * m_means[j][d];
+ if (m_vars[j][d] < m_minVars[d])
+ {
+ m_vars[j][d] = m_minVars[d];
+ }
+ }
+ }
+ }
+
+ // Terminal conditions
+ iterNum++;
+ if (fabs(currL - lastL) < m_endError * fabs(lastL))
+ {
+ unchanged++;
+ }
+ if (iterNum >= m_maxIterNum || unchanged >= 3)
+ {
+ loop = false;
+ }
+
+ //--- Debug ---
+ //cout << "Iter: " << iterNum << ", Average Log-Probability: " << currL << endl;
+ }
+ delete[] next_priors;
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ delete[] next_means[i];
+ delete[] next_vars[i];
+ }
+ delete[] next_means;
+ delete[] next_vars;
+ delete[] x;
+}
+
+void GMM::Init(double* data, int N)
+{
+ const double MIN_VAR = 1E-10;
+
+ KMeans* kmeans = new KMeans(m_dimNum, m_mixNum);
+ kmeans->SetInitMode(KMeans::InitUniform);
+ int* Label;
+ Label = new int[N];
+ kmeans->Cluster(data, N, Label);
+
+ int* counts = new int[m_mixNum];
+ double* overMeans = new double[m_dimNum]; // Overall mean of training data
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ counts[i] = 0;
+ m_priors[i] = 0;
+ memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum);
+ memset(m_vars[i], 0, sizeof(double) * m_dimNum);
+ }
+ memset(overMeans, 0, sizeof(double) * m_dimNum);
+ memset(m_minVars, 0, sizeof(double) * m_dimNum);
+
+ int size = 0;
+ size = N;
+
+ double* x = new double[m_dimNum];
+ int label = -1;
+
+ for (int i = 0; i < size; i++)
+ {
+ for (int j = 0;j < m_dimNum;j++)
+ x[j] = data[i * m_dimNum + j];
+ label = Label[i];
+
+ // Count each Gaussian
+ counts[label]++;
+ double* m = kmeans->GetMean(label);
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]);
+ }
+
+ // Count the overall mean and variance.
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ overMeans[d] += x[d];
+ m_minVars[d] += x[d] * x[d];
+ }
+ }
+
+ // Compute the overall variance (* 0.01) as the minimum variance.
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ overMeans[d] /= size;
+ m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d]));
+ }
+
+ // Initialize each Gaussian.
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ m_priors[i] = 1.0 * counts[i] / size;
+
+ if (m_priors[i] > 0)
+ {
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ m_vars[i][d] = m_vars[i][d] / counts[i];
+
+ // A minimum variance for each dimension is required.
+ if (m_vars[i][d] < m_minVars[d])
+ {
+ m_vars[i][d] = m_minVars[d];
+ }
+ }
+ }
+ else
+ {
+ memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum);
+ cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n";
+ }
+ }
+ delete kmeans;
+ delete[] x;
+ delete[] counts;
+ delete[] overMeans;
+ delete[] Label;
+
+}
+
+void GMM::Init(const char* sampleFileName)
+{
+ const double MIN_VAR = 1E-10;
+
+ KMeans* kmeans = new KMeans(m_dimNum, m_mixNum);
+ kmeans->SetInitMode(KMeans::InitUniform);
+ kmeans->Cluster(sampleFileName, "gmm_init.tmp");
+
+ int* counts = new int[m_mixNum];
+ double* overMeans = new double[m_dimNum]; // Overall mean of training data
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ counts[i] = 0;
+ m_priors[i] = 0;
+ memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum);
+ memset(m_vars[i], 0, sizeof(double) * m_dimNum);
+ }
+ memset(overMeans, 0, sizeof(double) * m_dimNum);
+ memset(m_minVars, 0, sizeof(double) * m_dimNum);
+
+ // Open the sample and label file to initialize the model
+ ifstream sampleFile(sampleFileName, ios_base::binary);
+ assert(sampleFile);
+
+ ifstream labelFile("gmm_init.tmp", ios_base::binary);
+ assert(labelFile);
+
+ int size = 0;
+ sampleFile.read((char*)&size, sizeof(int));
+ sampleFile.seekg(2 * sizeof(int), ios_base::beg);
+ labelFile.seekg(sizeof(int), ios_base::beg);
+
+ double* x = new double[m_dimNum];
+ int label = -1;
+
+ for (int i = 0; i < size; i++)
+ {
+ sampleFile.read((char*)x, sizeof(double) * m_dimNum);
+ labelFile.read((char*)&label, sizeof(int));
+
+ // Count each Gaussian
+ counts[label]++;
+ double* m = kmeans->GetMean(label);
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]);
+ }
+
+ // Count the overall mean and variance.
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ overMeans[d] += x[d];
+ m_minVars[d] += x[d] * x[d];
+ }
+ }
+
+ // Compute the overall variance (* 0.01) as the minimum variance.
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ overMeans[d] /= size;
+ m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d]));
+ }
+
+ // Initialize each Gaussian.
+ for (int i = 0; i < m_mixNum; i++)
+ {
+ m_priors[i] = 1.0 * counts[i] / size;
+
+ if (m_priors[i] > 0)
+ {
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ m_vars[i][d] = m_vars[i][d] / counts[i];
+
+ // A minimum variance for each dimension is required.
+ if (m_vars[i][d] < m_minVars[d])
+ {
+ m_vars[i][d] = m_minVars[d];
+ }
+ }
+ }
+ else
+ {
+ memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum);
+ cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n";
+ }
+ }
+
+ delete kmeans;
+ delete[] x;
+ delete[] counts;
+ delete[] overMeans;
+
+ sampleFile.close();
+ labelFile.close();
+}
+
+void GMM::DumpSampleFile(const char* fileName)
+{
+ ifstream sampleFile(fileName, ios_base::binary);
+ assert(sampleFile);
+
+ int size = 0;
+ sampleFile.read((char*)&size, sizeof(int));
+ cout << size << endl;
+
+ int dim = 0;
+ sampleFile.read((char*)&dim, sizeof(int));
+ cout << dim << endl;
+
+ double* f = new double[dim];
+ for (int i = 0; i < size; i++)
+ {
+ sampleFile.read((char*)f, sizeof(double) * dim);
+
+ cout << i << ":";
+ for (int j = 0; j < dim; j++)
+ {
+ cout << " " << f[j];
+ }
+ cout << endl;
+ }
+
+ delete[] f;
+ sampleFile.close();
+}
+
+ostream& operator<<(ostream& out, GMM& gmm)
+{
+ out << "" << endl;
+ out << " " << gmm.m_dimNum << " " << endl;
+ out << " " << gmm.m_mixNum << " " << endl;
+
+ out << " ";
+ for (int i = 0; i < gmm.m_mixNum; i++)
+ {
+ out << gmm.m_priors[i] << " ";
+ }
+ out << "" << endl;
+
+ out << "" << endl;
+ for (int i = 0; i < gmm.m_mixNum; i++)
+ {
+ for (int d = 0; d < gmm.m_dimNum; d++)
+ {
+ out << gmm.m_means[i][d] << " ";
+ }
+ out << endl;
+ }
+ out << "" << endl;
+
+ out << "" << endl;
+ for (int i = 0; i < gmm.m_mixNum; i++)
+ {
+ for (int d = 0; d < gmm.m_dimNum; d++)
+ {
+ out << gmm.m_vars[i][d] << " ";
+ }
+ out << endl;
+ }
+ out << "" << endl;
+
+ out << "" << endl;
+
+ return out;
+}
+
+istream& operator>>(istream& in, GMM& gmm)
+{
+ char label[50];
+ in >> label; // ""
+ assert(strcmp(label, "") == 0);
+
+ gmm.Dispose();
+
+ in >> label >> gmm.m_dimNum >> label; // ""
+ in >> label >> gmm.m_mixNum >> label; // ""
+
+ gmm.Allocate();
+
+ in >> label; // ""
+ for (int i = 0; i < gmm.m_mixNum; i++)
+ {
+ in >> gmm.m_priors[i];
+ }
+ in >> label;
+
+ in >> label; // ""
+ for (int i = 0; i < gmm.m_mixNum; i++)
+ {
+ for (int d = 0; d < gmm.m_dimNum; d++)
+ {
+ in >> gmm.m_means[i][d];
+ }
+ }
+ in >> label;
+
+ in >> label; // ""
+ for (int i = 0; i < gmm.m_mixNum; i++)
+ {
+ for (int d = 0; d < gmm.m_dimNum; d++)
+ {
+ in >> gmm.m_vars[i][d];
+ }
+ }
+ in >> label;
+
+ in >> label; // ""
+ return in;
+}
\ No newline at end of file
diff --git a/GMM/gmm.h b/GMM/gmm.h
new file mode 100644
index 0000000..3e2f329
--- /dev/null
+++ b/GMM/gmm.h
@@ -0,0 +1,68 @@
+/***************************************************************************
+Module Name:
+ Gaussian Mixture Model with Diagonal Covariance Matrix
+
+History:
+ 2003/11/01 Fei Wang
+ 2013 luxiaoxun
+***************************************************************************/
+
+#pragma once
+#include
+#define M_PI 3.1415926535897932384626433832795
+
+class GMM
+{
+public:
+ GMM(int dimNum = 1, int mixNum = 1);
+ ~GMM();
+
+ void Copy(GMM* gmm);
+
+ void SetMaxIterNum(int i) { m_maxIterNum = i; }
+ void SetEndError(double f) { m_endError = f; }
+
+ int GetDimNum() { return m_dimNum; }
+ int GetMixNum() { return m_mixNum; }
+ int GetMaxIterNum() { return m_maxIterNum; }
+ double GetEndError() { return m_endError; }
+
+ double& Prior(int i) { return m_priors[i]; }
+ double* Mean(int i) { return m_means[i]; }
+ double* Variance(int i) { return m_vars[i]; }
+
+ void setPrior(int i, double val) { m_priors[i] = val; }
+ void setMean(int i, double* val) { for (int j = 0;j < m_dimNum;j++) m_means[i][j] = val[j]; }
+ void setVariance(int i, double* val) { for (int j = 0;j < m_dimNum;j++) m_vars[i][j] = val[j]; }
+
+ double GetProbability(const double* sample);
+
+ /* SampleFile: ...*/
+ void Init(const char* sampleFileName);
+ void Train(const char* sampleFileName);
+ void Init(double* data, int N);
+ void Train(double* data, int N);
+
+ void DumpSampleFile(const char* fileName);
+
+ friend std::ostream& operator<<(std::ostream& out, GMM& gmm);
+ friend std::istream& operator>>(std::istream& in, GMM& gmm);
+
+private:
+ int m_dimNum; // 样本维数
+ int m_mixNum; // Gaussian数目
+ double* m_priors; // Gaussian权重
+ double** m_means; // Gaussian均值
+ double** m_vars; // Gaussian方差
+
+ // A minimum variance is required. Now, it is the overall variance * 0.01.
+ double* m_minVars;
+ int m_maxIterNum; // The stopping criterion regarding the number of iterations
+ double m_endError; // The stopping criterion regarding the error
+
+private:
+ // Return the "j"th pdf, p(x|j).
+ double GetProbability(const double* x, int j);
+ void Allocate();
+ void Dispose();
+};
\ No newline at end of file
diff --git a/GMM/kmeans.cpp b/GMM/kmeans.cpp
new file mode 100644
index 0000000..f284c42
--- /dev/null
+++ b/GMM/kmeans.cpp
@@ -0,0 +1,389 @@
+/***************************************************************************
+Module Name:
+ KMeans
+
+History:
+ 2003/10/16 Fei Wang
+ 2013 luxiaoxun
+***************************************************************************/
+#include
+#include
+#include
+#include
+#include
+#include "KMeans.h"
+using namespace std;
+
+
+KMeans::KMeans(int dimNum, int clusterNum)
+{
+ m_dimNum = dimNum;
+ m_clusterNum = clusterNum;
+
+ m_means = new double* [m_clusterNum];
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ m_means[i] = new double[m_dimNum];
+ memset(m_means[i], 0, sizeof(double) * m_dimNum);
+ }
+
+ m_initMode = InitRandom;
+ m_maxIterNum = 100;
+ m_endError = 0.001;
+}
+
+KMeans::~KMeans()
+{
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ delete[] m_means[i];
+ }
+ delete[] m_means;
+}
+
+void KMeans::Cluster(const char* sampleFileName, const char* labelFileName)
+{
+ // Check the sample file
+ ifstream sampleFile(sampleFileName, ios_base::binary);
+ assert(sampleFile);
+
+ int size = 0;
+ int dim = 0;
+ sampleFile.read((char*)&size, sizeof(int));
+ sampleFile.read((char*)&dim, sizeof(int));
+ assert(size >= m_clusterNum);
+ assert(dim == m_dimNum);
+
+ // Initialize model
+ Init(sampleFile);
+
+ // Recursion
+ double* x = new double[m_dimNum]; // Sample data
+ int label = -1; // Class index
+ double iterNum = 0;
+ double lastCost = 0;
+ double currCost = 0;
+ int unchanged = 0;
+ bool loop = true;
+ int* counts = new int[m_clusterNum];
+ double** next_means = new double* [m_clusterNum]; // New model for reestimation
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ next_means[i] = new double[m_dimNum];
+ }
+
+ while (loop)
+ {
+ memset(counts, 0, sizeof(int) * m_clusterNum);
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ memset(next_means[i], 0, sizeof(double) * m_dimNum);
+ }
+
+ lastCost = currCost;
+ currCost = 0;
+
+ sampleFile.clear();
+ sampleFile.seekg(sizeof(int) * 2, ios_base::beg);
+
+ // Classification
+ for (int i = 0; i < size; i++)
+ {
+ sampleFile.read((char*)x, sizeof(double) * m_dimNum);
+ currCost += GetLabel(x, &label);
+
+ counts[label]++;
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ next_means[label][d] += x[d];
+ }
+ }
+ currCost /= size;
+
+ // Reestimation
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ if (counts[i] > 0)
+ {
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ next_means[i][d] /= counts[i];
+ }
+ memcpy(m_means[i], next_means[i], sizeof(double) * m_dimNum);
+ }
+ }
+
+ // Terminal conditions
+ iterNum++;
+ if (fabs(lastCost - currCost) < m_endError * lastCost)
+ {
+ unchanged++;
+ }
+ if (iterNum >= m_maxIterNum || unchanged >= 3)
+ {
+ loop = false;
+ }
+ //DEBUG
+ //cout << "Iter: " << iterNum << ", Average Cost: " << currCost << endl;
+ }
+
+ // Output the label file
+ ofstream labelFile(labelFileName, ios_base::binary);
+ assert(labelFile);
+
+ labelFile.write((char*)&size, sizeof(int));
+ sampleFile.clear();
+ sampleFile.seekg(sizeof(int) * 2, ios_base::beg);
+
+ for (int i = 0; i < size; i++)
+ {
+ sampleFile.read((char*)x, sizeof(double) * m_dimNum);
+ GetLabel(x, &label);
+ labelFile.write((char*)&label, sizeof(int));
+ }
+
+ sampleFile.close();
+ labelFile.close();
+
+ delete[] counts;
+ delete[] x;
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ delete[] next_means[i];
+ }
+ delete[] next_means;
+}
+
+//
+void KMeans::Cluster(double* data, int N, int* Label)
+{
+ int size = 0;
+ size = N;
+
+ assert(size >= m_clusterNum);
+
+ // Initialize model
+ Init(data, N);
+
+ // Recursion
+ double* x = new double[m_dimNum]; // Sample data
+ int label = -1; // Class index
+ double iterNum = 0;
+ double lastCost = 0;
+ double currCost = 0;
+ int unchanged = 0;
+ bool loop = true;
+ int* counts = new int[m_clusterNum];
+ double** next_means = new double* [m_clusterNum]; // New model for reestimation
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ next_means[i] = new double[m_dimNum];
+ }
+
+ while (loop)
+ {
+ memset(counts, 0, sizeof(int) * m_clusterNum);
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ memset(next_means[i], 0, sizeof(double) * m_dimNum);
+ }
+
+ lastCost = currCost;
+ currCost = 0;
+
+ // Classification
+ for (int i = 0; i < size; i++)
+ {
+ for (int j = 0; j < m_dimNum; j++)
+ x[j] = data[i * m_dimNum + j];
+
+ currCost += GetLabel(x, &label);
+
+ counts[label]++;
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ next_means[label][d] += x[d];
+ }
+ }
+ currCost /= size;
+
+ // Reestimation
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ if (counts[i] > 0)
+ {
+ for (int d = 0; d < m_dimNum; d++)
+ {
+ next_means[i][d] /= counts[i];
+ }
+ memcpy(m_means[i], next_means[i], sizeof(double) * m_dimNum);
+ }
+ }
+
+ // Terminal conditions
+ iterNum++;
+ if (fabs(lastCost - currCost) < m_endError * lastCost)
+ {
+ unchanged++;
+ }
+ if (iterNum >= m_maxIterNum || unchanged >= 3)
+ {
+ loop = false;
+ }
+
+ //DEBUG
+ //cout << "Iter: " << iterNum << ", Average Cost: " << currCost << endl;
+ }
+
+ // Output the label file
+ for (int i = 0; i < size; i++)
+ {
+ for (int j = 0; j < m_dimNum; j++)
+ x[j] = data[i * m_dimNum + j];
+ GetLabel(x, &label);
+ Label[i] = label;
+ }
+ delete[] counts;
+ delete[] x;
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ delete[] next_means[i];
+ }
+ delete[] next_means;
+}
+
+void KMeans::Init(double* data, int N)
+{
+ int size = N;
+
+ if (m_initMode == InitRandom)
+ {
+ int inteval = size / m_clusterNum;
+ double* sample = new double[m_dimNum];
+
+ // Seed the random-number generator with current time
+ srand((unsigned)time(NULL));
+
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ int select = inteval * i + (inteval - 1) * rand() / RAND_MAX;
+ for (int j = 0; j < m_dimNum; j++)
+ sample[j] = data[select * m_dimNum + j];
+ memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
+ }
+
+ delete[] sample;
+ }
+ else if (m_initMode == InitUniform)
+ {
+ double* sample = new double[m_dimNum];
+
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ int select = i * size / m_clusterNum;
+ for (int j = 0; j < m_dimNum; j++)
+ sample[j] = data[select * m_dimNum + j];
+ memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
+ }
+
+ delete[] sample;
+ }
+ else if (m_initMode == InitManual)
+ {
+ // Do nothing
+ }
+}
+
+void KMeans::Init(ifstream& sampleFile)
+{
+ int size = 0;
+ sampleFile.seekg(0, ios_base::beg);
+ sampleFile.read((char*)&size, sizeof(int));
+
+ if (m_initMode == InitRandom)
+ {
+ int inteval = size / m_clusterNum;
+ double* sample = new double[m_dimNum];
+
+ // Seed the random-number generator with current time
+ srand((unsigned)time(NULL));
+
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ int select = inteval * i + (inteval - 1) * rand() / RAND_MAX;
+ int offset = sizeof(int) * 2 + select * sizeof(double) * m_dimNum;
+
+ sampleFile.seekg(offset, ios_base::beg);
+ sampleFile.read((char*)sample, sizeof(double) * m_dimNum);
+ memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
+ }
+
+ delete[] sample;
+ }
+ else if (m_initMode == InitUniform)
+ {
+ double* sample = new double[m_dimNum];
+
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ int select = i * size / m_clusterNum;
+ int offset = sizeof(int) * 2 + select * sizeof(double) * m_dimNum;
+
+ sampleFile.seekg(offset, ios_base::beg);
+ sampleFile.read((char*)sample, sizeof(double) * m_dimNum);
+ memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
+ }
+
+ delete[] sample;
+ }
+ else if (m_initMode == InitManual)
+ {
+ // Do nothing
+ }
+}
+
+double KMeans::GetLabel(const double* sample, int* label)
+{
+ double dist = -1;
+ for (int i = 0; i < m_clusterNum; i++)
+ {
+ double temp = CalcDistance(sample, m_means[i], m_dimNum);
+ if (temp < dist || dist == -1)
+ {
+ dist = temp;
+ *label = i;
+ }
+ }
+ return dist;
+}
+
+double KMeans::CalcDistance(const double* x, const double* u, int dimNum)
+{
+ double temp = 0;
+ for (int d = 0; d < dimNum; d++)
+ {
+ temp += (x[d] - u[d]) * (x[d] - u[d]);
+ }
+ return sqrt(temp);
+}
+
+ostream& operator<<(ostream& out, KMeans& kmeans)
+{
+ out << "" << endl;
+ out << " " << kmeans.m_dimNum << " " << endl;
+ out << " " << kmeans.m_clusterNum << " " << endl;
+
+ out << "" << endl;
+ for (int i = 0; i < kmeans.m_clusterNum; i++)
+ {
+ for (int d = 0; d < kmeans.m_dimNum; d++)
+ {
+ out << kmeans.m_means[i][d] << " ";
+ }
+ out << endl;
+ }
+ out << "" << endl;
+
+ out << "" << endl;
+ return out;
+}
\ No newline at end of file
diff --git a/GMM/kmeans.h b/GMM/kmeans.h
new file mode 100644
index 0000000..2687808
--- /dev/null
+++ b/GMM/kmeans.h
@@ -0,0 +1,57 @@
+/***************************************************************************
+Module Name:
+ KMeans
+
+History:
+ 2003/10/16 Fei Wang
+ 2013 luxiaoxun
+***************************************************************************/
+
+#pragma once
+#include
+
+class KMeans
+{
+public:
+ enum InitMode
+ {
+ InitRandom,
+ InitManual,
+ InitUniform,
+ };
+
+ KMeans(int dimNum = 1, int clusterNum = 1);
+ ~KMeans();
+
+ void SetMean(int i, const double* u) { memcpy(m_means[i], u, sizeof(double) * m_dimNum); }
+ void SetInitMode(int i) { m_initMode = i; }
+ void SetMaxIterNum(int i) { m_maxIterNum = i; }
+ void SetEndError(double f) { m_endError = f; }
+
+ double* GetMean(int i) { return m_means[i]; }
+ int GetInitMode() { return m_initMode; }
+ int GetMaxIterNum() { return m_maxIterNum; }
+ double GetEndError() { return m_endError; }
+
+
+ /* SampleFile: ...
+ LabelFile: