将随机模拟加入项目中,创建了高斯混合模型的工程

This commit is contained in:
zzh 2023-09-18 00:10:52 +08:00
parent 97a0e0d6db
commit fb5fe032a4
20 changed files with 2532 additions and 1 deletions

403
.gitignore vendored
View File

@ -1,3 +1,406 @@
x64/
eigen-3.4.0/
*.debug
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.tlog
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*.json
coverage*.xml
coverage*.info
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
*.vbp
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
*.dsw
*.dsp
# Visual Studio 6 technical files
*.ncb
*.aps
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# Visual Studio History (VSHistory) files
.vshistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp
# JetBrains Rider
*.sln.iml
# ---> Python
# Byte-compiled / optimized / DLL files
__pycache__/

136
GMM/GMM.vcxproj 100644
View File

@ -0,0 +1,136 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.props" Condition="Exists('..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.props')" />
<PropertyGroup Label="Globals">
<CppWinRTOptimized>true</CppWinRTOptimized>
<CppWinRTRootNamespaceAutoMerge>true</CppWinRTRootNamespaceAutoMerge>
<CppWinRTGenerateWindowsMetadata>true</CppWinRTGenerateWindowsMetadata>
<MinimalCoreWin>true</MinimalCoreWin>
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{a2b67815-1235-4f7c-874d-4fccb3b0c738}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>GMM</RootNamespace>
<WindowsTargetPlatformVersion Condition=" '$(WindowsTargetPlatformVersion)' == '' ">10.0</WindowsTargetPlatformVersion>
<WindowsTargetPlatformMinVersion>10.0.17134.0</WindowsTargetPlatformMinVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v143</PlatformToolset>
<PlatformToolset Condition="'$(VisualStudioVersion)' == '16.0'">v142</PlatformToolset>
<PlatformToolset Condition="'$(VisualStudioVersion)' == '15.0'">v141</PlatformToolset>
<PlatformToolset Condition="'$(VisualStudioVersion)' == '14.0'">v140</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Project="PropertySheet.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup>
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
<PrecompiledHeaderOutputFile>$(IntDir)pch.pch</PrecompiledHeaderOutputFile>
<PreprocessorDefinitions>_CONSOLE;WIN32_LEAN_AND_MEAN;WINRT_LEAN_AND_MEAN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<WarningLevel>Level4</WarningLevel>
<AdditionalOptions>%(AdditionalOptions) /permissive- /bigobj</AdditionalOptions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">D:\matlab2023a\extern\include;D:\apps\Twirls\eigen-3.4.0;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateWindowsMetadata>false</GenerateWindowsMetadata>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">D:\matlab2023a\extern\lib\win64\microsoft;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">libmat.lib;libmx.lib;libmex.lib;libeng.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
<ClCompile>
<PreprocessorDefinitions>WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">D:\matlab2023a\extern\include;D:\apps\Twirls\eigen-3.4.0;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateWindowsMetadata>false</GenerateWindowsMetadata>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">D:\matlab2023a\extern\lib\win64\microsoft;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies Condition="'$(Configuration)|$(Platform)'=='Release|x64'">libmat.lib;libmx.lib;libmex.lib;libeng.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="gmm.h" />
<ClInclude Include="kmeans.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="gmm.cpp" />
<ClCompile Include="kmeans.cpp" />
<ClCompile Include="main.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
<None Include="PropertySheet.props" />
<Text Include="readme.txt">
<DeploymentContent>false</DeploymentContent>
</Text>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.targets" Condition="Exists('..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.targets')" />
</ImportGroup>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>这台计算机上缺少此项目引用的 NuGet 程序包。使用“NuGet 程序包还原”可下载这些程序包。有关更多信息,请参见 http://go.microsoft.com/fwlink/?LinkID=322105。缺少的文件是 {0}。</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.props')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.props'))" />
<Error Condition="!Exists('..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.targets'))" />
</Target>
</Project>

View File

@ -0,0 +1,43 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="kmeans.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="gmm.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="main.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="kmeans.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="gmm.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="PropertySheet.props" />
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
<Text Include="readme.txt" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup Label="UserMacros" />
<!--
To customize common C++/WinRT project properties:
* right-click the project node
* expand the Common Properties item
* select the C++/WinRT property page
For more advanced scenarios, and complete documentation, please see:
https://github.com/Microsoft/cppwinrt/tree/master/nuget
-->
<PropertyGroup />
<ItemDefinitionGroup />
</Project>

620
GMM/gmm.cpp 100644
View File

@ -0,0 +1,620 @@
/***************************************************************************
Module Name:
Gaussian Mixture Model with Diagonal Covariance Matrix
History:
2003/11/01 Fei Wang
2013 luxiaoxun
***************************************************************************/
#include <math.h>
#include <iostream>
#include <cassert>
#include "gmm.h"
#include "kmeans.h"
using namespace std;
GMM::GMM(int dimNum, int mixNum)
{
m_dimNum = dimNum;
m_mixNum = mixNum;
m_maxIterNum = 100;
m_endError = 0.001;
Allocate();
for (int i = 0; i < m_mixNum; i++)
{
m_priors[i] = 1.0 / m_mixNum;
for (int d = 0; d < m_dimNum; d++)
{
m_means[i][d] = 0;
m_vars[i][d] = 1;
}
}
}
GMM::~GMM()
{
Dispose();
}
void GMM::Allocate()
{
m_priors = new double[m_mixNum];
m_means = new double* [m_mixNum];
m_vars = new double* [m_mixNum];
for (int i = 0; i < m_mixNum; i++)
{
m_means[i] = new double[m_dimNum];
m_vars[i] = new double[m_dimNum];
}
m_minVars = new double[m_dimNum];
}
void GMM::Dispose()
{
delete[] m_priors;
for (int i = 0; i < m_mixNum; i++)
{
delete[] m_means[i];
delete[] m_vars[i];
}
delete[] m_means;
delete[] m_vars;
delete[] m_minVars;
}
void GMM::Copy(GMM* gmm)
{
assert(m_mixNum == gmm->m_mixNum && m_dimNum == gmm->m_dimNum);
for (int i = 0; i < m_mixNum; i++)
{
m_priors[i] = gmm->Prior(i);
memcpy(m_means[i], gmm->Mean(i), sizeof(double) * m_dimNum);
memcpy(m_vars[i], gmm->Variance(i), sizeof(double) * m_dimNum);
}
memcpy(m_minVars, gmm->m_minVars, sizeof(double) * m_dimNum);
}
double GMM::GetProbability(const double* sample)
{
double p = 0;
for (int i = 0; i < m_mixNum; i++)
{
p += m_priors[i] * GetProbability(sample, i);
}
return p;
}
double GMM::GetProbability(const double* x, int j)
{
double p = 1;
for (int d = 0; d < m_dimNum; d++)
{
p *= 1 / sqrt(2 * M_PI * m_vars[j][d]);
p *= exp(-0.5 * (x[d] - m_means[j][d]) * (x[d] - m_means[j][d]) / m_vars[j][d]);
}
return p;
}
void GMM::Train(const char* sampleFileName)
{
//DumpSampleFile(sampleFileName);
Init(sampleFileName);
ifstream sampleFile(sampleFileName, ios_base::binary);
assert(sampleFile);
int size = 0;
sampleFile.seekg(0, ios_base::beg);
sampleFile.read((char*)&size, sizeof(int));
// Reestimation
bool loop = true;
double iterNum = 0;
double lastL = 0;
double currL = 0;
int unchanged = 0;
double* x = new double[m_dimNum]; // Sample data
double* next_priors = new double[m_mixNum];
double** next_vars = new double* [m_mixNum];
double** next_means = new double* [m_mixNum];
for (int i = 0; i < m_mixNum; i++)
{
next_means[i] = new double[m_dimNum];
next_vars[i] = new double[m_dimNum];
}
while (loop)
{
// Clear buffer for reestimation
memset(next_priors, 0, sizeof(double) * m_mixNum);
for (int i = 0; i < m_mixNum; i++)
{
memset(next_vars[i], 0, sizeof(double) * m_dimNum);
memset(next_means[i], 0, sizeof(double) * m_dimNum);
}
lastL = currL;
currL = 0;
// Predict
sampleFile.seekg(2 * sizeof(int), ios_base::beg);
for (int k = 0; k < size; k++)
{
sampleFile.read((char*)x, sizeof(double) * m_dimNum);
double p = GetProbability(x);
for (int j = 0; j < m_mixNum; j++)
{
double pj = GetProbability(x, j) * m_priors[j] / p;
next_priors[j] += pj;
for (int d = 0; d < m_dimNum; d++)
{
next_means[j][d] += pj * x[d];
next_vars[j][d] += pj * x[d] * x[d];
}
}
currL += (p > 1E-20) ? log10(p) : -20;
}
currL /= size;
// Reestimation: generate new priors, means and variances.
for (int j = 0; j < m_mixNum; j++)
{
m_priors[j] = next_priors[j] / size;
if (m_priors[j] > 0)
{
for (int d = 0; d < m_dimNum; d++)
{
m_means[j][d] = next_means[j][d] / next_priors[j];
m_vars[j][d] = next_vars[j][d] / next_priors[j] - m_means[j][d] * m_means[j][d];
if (m_vars[j][d] < m_minVars[d])
{
m_vars[j][d] = m_minVars[d];
}
}
}
}
// Terminal conditions
iterNum++;
if (fabs(currL - lastL) < m_endError * fabs(lastL))
{
unchanged++;
}
if (iterNum >= m_maxIterNum || unchanged >= 3)
{
loop = false;
}
//--- Debug ---
//cout << "Iter: " << iterNum << ", Average Log-Probability: " << currL << endl;
}
sampleFile.close();
delete[] next_priors;
for (int i = 0; i < m_mixNum; i++)
{
delete[] next_means[i];
delete[] next_vars[i];
}
delete[] next_means;
delete[] next_vars;
delete[] x;
}
void GMM::Train(double* data, int N)
{
Init(data, N);
int size = N;
// Reestimation
bool loop = true;
double iterNum = 0;
double lastL = 0;
double currL = 0;
int unchanged = 0;
double* x = new double[m_dimNum]; // Sample data
double* next_priors = new double[m_mixNum];
double** next_vars = new double* [m_mixNum];
double** next_means = new double* [m_mixNum];
for (int i = 0; i < m_mixNum; i++)
{
next_means[i] = new double[m_dimNum];
next_vars[i] = new double[m_dimNum];
}
while (loop)
{
// Clear buffer for reestimation
memset(next_priors, 0, sizeof(double) * m_mixNum);
for (int i = 0; i < m_mixNum; i++)
{
memset(next_vars[i], 0, sizeof(double) * m_dimNum);
memset(next_means[i], 0, sizeof(double) * m_dimNum);
}
lastL = currL;
currL = 0;
// Predict
for (int k = 0; k < size; k++)
{
for (int j = 0;j < m_dimNum;j++)
x[j] = data[k * m_dimNum + j];
double p = GetProbability(x);
for (int j = 0; j < m_mixNum; j++)
{
double pj = GetProbability(x, j) * m_priors[j] / p;
next_priors[j] += pj;
for (int d = 0; d < m_dimNum; d++)
{
next_means[j][d] += pj * x[d];
next_vars[j][d] += pj * x[d] * x[d];
}
}
currL += (p > 1E-20) ? log10(p) : -20;
}
currL /= size;
// Reestimation: generate new priors, means and variances.
for (int j = 0; j < m_mixNum; j++)
{
m_priors[j] = next_priors[j] / size;
if (m_priors[j] > 0)
{
for (int d = 0; d < m_dimNum; d++)
{
m_means[j][d] = next_means[j][d] / next_priors[j];
m_vars[j][d] = next_vars[j][d] / next_priors[j] - m_means[j][d] * m_means[j][d];
if (m_vars[j][d] < m_minVars[d])
{
m_vars[j][d] = m_minVars[d];
}
}
}
}
// Terminal conditions
iterNum++;
if (fabs(currL - lastL) < m_endError * fabs(lastL))
{
unchanged++;
}
if (iterNum >= m_maxIterNum || unchanged >= 3)
{
loop = false;
}
//--- Debug ---
//cout << "Iter: " << iterNum << ", Average Log-Probability: " << currL << endl;
}
delete[] next_priors;
for (int i = 0; i < m_mixNum; i++)
{
delete[] next_means[i];
delete[] next_vars[i];
}
delete[] next_means;
delete[] next_vars;
delete[] x;
}
void GMM::Init(double* data, int N)
{
const double MIN_VAR = 1E-10;
KMeans* kmeans = new KMeans(m_dimNum, m_mixNum);
kmeans->SetInitMode(KMeans::InitUniform);
int* Label;
Label = new int[N];
kmeans->Cluster(data, N, Label);
int* counts = new int[m_mixNum];
double* overMeans = new double[m_dimNum]; // Overall mean of training data
for (int i = 0; i < m_mixNum; i++)
{
counts[i] = 0;
m_priors[i] = 0;
memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum);
memset(m_vars[i], 0, sizeof(double) * m_dimNum);
}
memset(overMeans, 0, sizeof(double) * m_dimNum);
memset(m_minVars, 0, sizeof(double) * m_dimNum);
int size = 0;
size = N;
double* x = new double[m_dimNum];
int label = -1;
for (int i = 0; i < size; i++)
{
for (int j = 0;j < m_dimNum;j++)
x[j] = data[i * m_dimNum + j];
label = Label[i];
// Count each Gaussian
counts[label]++;
double* m = kmeans->GetMean(label);
for (int d = 0; d < m_dimNum; d++)
{
m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]);
}
// Count the overall mean and variance.
for (int d = 0; d < m_dimNum; d++)
{
overMeans[d] += x[d];
m_minVars[d] += x[d] * x[d];
}
}
// Compute the overall variance (* 0.01) as the minimum variance.
for (int d = 0; d < m_dimNum; d++)
{
overMeans[d] /= size;
m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d]));
}
// Initialize each Gaussian.
for (int i = 0; i < m_mixNum; i++)
{
m_priors[i] = 1.0 * counts[i] / size;
if (m_priors[i] > 0)
{
for (int d = 0; d < m_dimNum; d++)
{
m_vars[i][d] = m_vars[i][d] / counts[i];
// A minimum variance for each dimension is required.
if (m_vars[i][d] < m_minVars[d])
{
m_vars[i][d] = m_minVars[d];
}
}
}
else
{
memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum);
cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n";
}
}
delete kmeans;
delete[] x;
delete[] counts;
delete[] overMeans;
delete[] Label;
}
void GMM::Init(const char* sampleFileName)
{
const double MIN_VAR = 1E-10;
KMeans* kmeans = new KMeans(m_dimNum, m_mixNum);
kmeans->SetInitMode(KMeans::InitUniform);
kmeans->Cluster(sampleFileName, "gmm_init.tmp");
int* counts = new int[m_mixNum];
double* overMeans = new double[m_dimNum]; // Overall mean of training data
for (int i = 0; i < m_mixNum; i++)
{
counts[i] = 0;
m_priors[i] = 0;
memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum);
memset(m_vars[i], 0, sizeof(double) * m_dimNum);
}
memset(overMeans, 0, sizeof(double) * m_dimNum);
memset(m_minVars, 0, sizeof(double) * m_dimNum);
// Open the sample and label file to initialize the model
ifstream sampleFile(sampleFileName, ios_base::binary);
assert(sampleFile);
ifstream labelFile("gmm_init.tmp", ios_base::binary);
assert(labelFile);
int size = 0;
sampleFile.read((char*)&size, sizeof(int));
sampleFile.seekg(2 * sizeof(int), ios_base::beg);
labelFile.seekg(sizeof(int), ios_base::beg);
double* x = new double[m_dimNum];
int label = -1;
for (int i = 0; i < size; i++)
{
sampleFile.read((char*)x, sizeof(double) * m_dimNum);
labelFile.read((char*)&label, sizeof(int));
// Count each Gaussian
counts[label]++;
double* m = kmeans->GetMean(label);
for (int d = 0; d < m_dimNum; d++)
{
m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]);
}
// Count the overall mean and variance.
for (int d = 0; d < m_dimNum; d++)
{
overMeans[d] += x[d];
m_minVars[d] += x[d] * x[d];
}
}
// Compute the overall variance (* 0.01) as the minimum variance.
for (int d = 0; d < m_dimNum; d++)
{
overMeans[d] /= size;
m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d]));
}
// Initialize each Gaussian.
for (int i = 0; i < m_mixNum; i++)
{
m_priors[i] = 1.0 * counts[i] / size;
if (m_priors[i] > 0)
{
for (int d = 0; d < m_dimNum; d++)
{
m_vars[i][d] = m_vars[i][d] / counts[i];
// A minimum variance for each dimension is required.
if (m_vars[i][d] < m_minVars[d])
{
m_vars[i][d] = m_minVars[d];
}
}
}
else
{
memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum);
cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n";
}
}
delete kmeans;
delete[] x;
delete[] counts;
delete[] overMeans;
sampleFile.close();
labelFile.close();
}
void GMM::DumpSampleFile(const char* fileName)
{
ifstream sampleFile(fileName, ios_base::binary);
assert(sampleFile);
int size = 0;
sampleFile.read((char*)&size, sizeof(int));
cout << size << endl;
int dim = 0;
sampleFile.read((char*)&dim, sizeof(int));
cout << dim << endl;
double* f = new double[dim];
for (int i = 0; i < size; i++)
{
sampleFile.read((char*)f, sizeof(double) * dim);
cout << i << ":";
for (int j = 0; j < dim; j++)
{
cout << " " << f[j];
}
cout << endl;
}
delete[] f;
sampleFile.close();
}
ostream& operator<<(ostream& out, GMM& gmm)
{
out << "<GMM>" << endl;
out << "<DimNum> " << gmm.m_dimNum << " </DimNum>" << endl;
out << "<MixNum> " << gmm.m_mixNum << " </MixNum>" << endl;
out << "<Prior> ";
for (int i = 0; i < gmm.m_mixNum; i++)
{
out << gmm.m_priors[i] << " ";
}
out << "</Prior>" << endl;
out << "<Mean>" << endl;
for (int i = 0; i < gmm.m_mixNum; i++)
{
for (int d = 0; d < gmm.m_dimNum; d++)
{
out << gmm.m_means[i][d] << " ";
}
out << endl;
}
out << "</Mean>" << endl;
out << "<Variance>" << endl;
for (int i = 0; i < gmm.m_mixNum; i++)
{
for (int d = 0; d < gmm.m_dimNum; d++)
{
out << gmm.m_vars[i][d] << " ";
}
out << endl;
}
out << "</Variance>" << endl;
out << "</GMM>" << endl;
return out;
}
istream& operator>>(istream& in, GMM& gmm)
{
char label[50];
in >> label; // "<GMM>"
assert(strcmp(label, "<GMM>") == 0);
gmm.Dispose();
in >> label >> gmm.m_dimNum >> label; // "<DimNum>"
in >> label >> gmm.m_mixNum >> label; // "<MixNum>"
gmm.Allocate();
in >> label; // "<Prior>"
for (int i = 0; i < gmm.m_mixNum; i++)
{
in >> gmm.m_priors[i];
}
in >> label;
in >> label; // "<Mean>"
for (int i = 0; i < gmm.m_mixNum; i++)
{
for (int d = 0; d < gmm.m_dimNum; d++)
{
in >> gmm.m_means[i][d];
}
}
in >> label;
in >> label; // "<Variance>"
for (int i = 0; i < gmm.m_mixNum; i++)
{
for (int d = 0; d < gmm.m_dimNum; d++)
{
in >> gmm.m_vars[i][d];
}
}
in >> label;
in >> label; // "</GMM>"
return in;
}

68
GMM/gmm.h 100644
View File

@ -0,0 +1,68 @@
/***************************************************************************
Module Name:
Gaussian Mixture Model with Diagonal Covariance Matrix
History:
2003/11/01 Fei Wang
2013 luxiaoxun
***************************************************************************/
#pragma once
#include <fstream>
#define M_PI 3.1415926535897932384626433832795
class GMM
{
public:
GMM(int dimNum = 1, int mixNum = 1);
~GMM();
void Copy(GMM* gmm);
void SetMaxIterNum(int i) { m_maxIterNum = i; }
void SetEndError(double f) { m_endError = f; }
int GetDimNum() { return m_dimNum; }
int GetMixNum() { return m_mixNum; }
int GetMaxIterNum() { return m_maxIterNum; }
double GetEndError() { return m_endError; }
double& Prior(int i) { return m_priors[i]; }
double* Mean(int i) { return m_means[i]; }
double* Variance(int i) { return m_vars[i]; }
void setPrior(int i, double val) { m_priors[i] = val; }
void setMean(int i, double* val) { for (int j = 0;j < m_dimNum;j++) m_means[i][j] = val[j]; }
void setVariance(int i, double* val) { for (int j = 0;j < m_dimNum;j++) m_vars[i][j] = val[j]; }
double GetProbability(const double* sample);
/* SampleFile: <size><dim><data>...*/
void Init(const char* sampleFileName);
void Train(const char* sampleFileName);
void Init(double* data, int N);
void Train(double* data, int N);
void DumpSampleFile(const char* fileName);
friend std::ostream& operator<<(std::ostream& out, GMM& gmm);
friend std::istream& operator>>(std::istream& in, GMM& gmm);
private:
int m_dimNum; // 样本维数
int m_mixNum; // Gaussian数目
double* m_priors; // Gaussian权重
double** m_means; // Gaussian均值
double** m_vars; // Gaussian方差
// A minimum variance is required. Now, it is the overall variance * 0.01.
double* m_minVars;
int m_maxIterNum; // The stopping criterion regarding the number of iterations
double m_endError; // The stopping criterion regarding the error
private:
// Return the "j"th pdf, p(x|j).
double GetProbability(const double* x, int j);
void Allocate();
void Dispose();
};

389
GMM/kmeans.cpp 100644
View File

@ -0,0 +1,389 @@
/***************************************************************************
Module Name:
KMeans
History:
2003/10/16 Fei Wang
2013 luxiaoxun
***************************************************************************/
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <iostream>
#include <cassert>
#include "KMeans.h"
using namespace std;
KMeans::KMeans(int dimNum, int clusterNum)
{
m_dimNum = dimNum;
m_clusterNum = clusterNum;
m_means = new double* [m_clusterNum];
for (int i = 0; i < m_clusterNum; i++)
{
m_means[i] = new double[m_dimNum];
memset(m_means[i], 0, sizeof(double) * m_dimNum);
}
m_initMode = InitRandom;
m_maxIterNum = 100;
m_endError = 0.001;
}
KMeans::~KMeans()
{
for (int i = 0; i < m_clusterNum; i++)
{
delete[] m_means[i];
}
delete[] m_means;
}
void KMeans::Cluster(const char* sampleFileName, const char* labelFileName)
{
// Check the sample file
ifstream sampleFile(sampleFileName, ios_base::binary);
assert(sampleFile);
int size = 0;
int dim = 0;
sampleFile.read((char*)&size, sizeof(int));
sampleFile.read((char*)&dim, sizeof(int));
assert(size >= m_clusterNum);
assert(dim == m_dimNum);
// Initialize model
Init(sampleFile);
// Recursion
double* x = new double[m_dimNum]; // Sample data
int label = -1; // Class index
double iterNum = 0;
double lastCost = 0;
double currCost = 0;
int unchanged = 0;
bool loop = true;
int* counts = new int[m_clusterNum];
double** next_means = new double* [m_clusterNum]; // New model for reestimation
for (int i = 0; i < m_clusterNum; i++)
{
next_means[i] = new double[m_dimNum];
}
while (loop)
{
memset(counts, 0, sizeof(int) * m_clusterNum);
for (int i = 0; i < m_clusterNum; i++)
{
memset(next_means[i], 0, sizeof(double) * m_dimNum);
}
lastCost = currCost;
currCost = 0;
sampleFile.clear();
sampleFile.seekg(sizeof(int) * 2, ios_base::beg);
// Classification
for (int i = 0; i < size; i++)
{
sampleFile.read((char*)x, sizeof(double) * m_dimNum);
currCost += GetLabel(x, &label);
counts[label]++;
for (int d = 0; d < m_dimNum; d++)
{
next_means[label][d] += x[d];
}
}
currCost /= size;
// Reestimation
for (int i = 0; i < m_clusterNum; i++)
{
if (counts[i] > 0)
{
for (int d = 0; d < m_dimNum; d++)
{
next_means[i][d] /= counts[i];
}
memcpy(m_means[i], next_means[i], sizeof(double) * m_dimNum);
}
}
// Terminal conditions
iterNum++;
if (fabs(lastCost - currCost) < m_endError * lastCost)
{
unchanged++;
}
if (iterNum >= m_maxIterNum || unchanged >= 3)
{
loop = false;
}
//DEBUG
//cout << "Iter: " << iterNum << ", Average Cost: " << currCost << endl;
}
// Output the label file
ofstream labelFile(labelFileName, ios_base::binary);
assert(labelFile);
labelFile.write((char*)&size, sizeof(int));
sampleFile.clear();
sampleFile.seekg(sizeof(int) * 2, ios_base::beg);
for (int i = 0; i < size; i++)
{
sampleFile.read((char*)x, sizeof(double) * m_dimNum);
GetLabel(x, &label);
labelFile.write((char*)&label, sizeof(int));
}
sampleFile.close();
labelFile.close();
delete[] counts;
delete[] x;
for (int i = 0; i < m_clusterNum; i++)
{
delete[] next_means[i];
}
delete[] next_means;
}
//
void KMeans::Cluster(double* data, int N, int* Label)
{
int size = 0;
size = N;
assert(size >= m_clusterNum);
// Initialize model
Init(data, N);
// Recursion
double* x = new double[m_dimNum]; // Sample data
int label = -1; // Class index
double iterNum = 0;
double lastCost = 0;
double currCost = 0;
int unchanged = 0;
bool loop = true;
int* counts = new int[m_clusterNum];
double** next_means = new double* [m_clusterNum]; // New model for reestimation
for (int i = 0; i < m_clusterNum; i++)
{
next_means[i] = new double[m_dimNum];
}
while (loop)
{
memset(counts, 0, sizeof(int) * m_clusterNum);
for (int i = 0; i < m_clusterNum; i++)
{
memset(next_means[i], 0, sizeof(double) * m_dimNum);
}
lastCost = currCost;
currCost = 0;
// Classification
for (int i = 0; i < size; i++)
{
for (int j = 0; j < m_dimNum; j++)
x[j] = data[i * m_dimNum + j];
currCost += GetLabel(x, &label);
counts[label]++;
for (int d = 0; d < m_dimNum; d++)
{
next_means[label][d] += x[d];
}
}
currCost /= size;
// Reestimation
for (int i = 0; i < m_clusterNum; i++)
{
if (counts[i] > 0)
{
for (int d = 0; d < m_dimNum; d++)
{
next_means[i][d] /= counts[i];
}
memcpy(m_means[i], next_means[i], sizeof(double) * m_dimNum);
}
}
// Terminal conditions
iterNum++;
if (fabs(lastCost - currCost) < m_endError * lastCost)
{
unchanged++;
}
if (iterNum >= m_maxIterNum || unchanged >= 3)
{
loop = false;
}
//DEBUG
//cout << "Iter: " << iterNum << ", Average Cost: " << currCost << endl;
}
// Output the label file
for (int i = 0; i < size; i++)
{
for (int j = 0; j < m_dimNum; j++)
x[j] = data[i * m_dimNum + j];
GetLabel(x, &label);
Label[i] = label;
}
delete[] counts;
delete[] x;
for (int i = 0; i < m_clusterNum; i++)
{
delete[] next_means[i];
}
delete[] next_means;
}
void KMeans::Init(double* data, int N)
{
int size = N;
if (m_initMode == InitRandom)
{
int inteval = size / m_clusterNum;
double* sample = new double[m_dimNum];
// Seed the random-number generator with current time
srand((unsigned)time(NULL));
for (int i = 0; i < m_clusterNum; i++)
{
int select = inteval * i + (inteval - 1) * rand() / RAND_MAX;
for (int j = 0; j < m_dimNum; j++)
sample[j] = data[select * m_dimNum + j];
memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
}
delete[] sample;
}
else if (m_initMode == InitUniform)
{
double* sample = new double[m_dimNum];
for (int i = 0; i < m_clusterNum; i++)
{
int select = i * size / m_clusterNum;
for (int j = 0; j < m_dimNum; j++)
sample[j] = data[select * m_dimNum + j];
memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
}
delete[] sample;
}
else if (m_initMode == InitManual)
{
// Do nothing
}
}
void KMeans::Init(ifstream& sampleFile)
{
int size = 0;
sampleFile.seekg(0, ios_base::beg);
sampleFile.read((char*)&size, sizeof(int));
if (m_initMode == InitRandom)
{
int inteval = size / m_clusterNum;
double* sample = new double[m_dimNum];
// Seed the random-number generator with current time
srand((unsigned)time(NULL));
for (int i = 0; i < m_clusterNum; i++)
{
int select = inteval * i + (inteval - 1) * rand() / RAND_MAX;
int offset = sizeof(int) * 2 + select * sizeof(double) * m_dimNum;
sampleFile.seekg(offset, ios_base::beg);
sampleFile.read((char*)sample, sizeof(double) * m_dimNum);
memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
}
delete[] sample;
}
else if (m_initMode == InitUniform)
{
double* sample = new double[m_dimNum];
for (int i = 0; i < m_clusterNum; i++)
{
int select = i * size / m_clusterNum;
int offset = sizeof(int) * 2 + select * sizeof(double) * m_dimNum;
sampleFile.seekg(offset, ios_base::beg);
sampleFile.read((char*)sample, sizeof(double) * m_dimNum);
memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
}
delete[] sample;
}
else if (m_initMode == InitManual)
{
// Do nothing
}
}
double KMeans::GetLabel(const double* sample, int* label)
{
double dist = -1;
for (int i = 0; i < m_clusterNum; i++)
{
double temp = CalcDistance(sample, m_means[i], m_dimNum);
if (temp < dist || dist == -1)
{
dist = temp;
*label = i;
}
}
return dist;
}
double KMeans::CalcDistance(const double* x, const double* u, int dimNum)
{
double temp = 0;
for (int d = 0; d < dimNum; d++)
{
temp += (x[d] - u[d]) * (x[d] - u[d]);
}
return sqrt(temp);
}
ostream& operator<<(ostream& out, KMeans& kmeans)
{
out << "<KMeans>" << endl;
out << "<DimNum> " << kmeans.m_dimNum << " </DimNum>" << endl;
out << "<ClusterNum> " << kmeans.m_clusterNum << " </CluterNum>" << endl;
out << "<Mean>" << endl;
for (int i = 0; i < kmeans.m_clusterNum; i++)
{
for (int d = 0; d < kmeans.m_dimNum; d++)
{
out << kmeans.m_means[i][d] << " ";
}
out << endl;
}
out << "</Mean>" << endl;
out << "</KMeans>" << endl;
return out;
}

57
GMM/kmeans.h 100644
View File

@ -0,0 +1,57 @@
/***************************************************************************
Module Name:
KMeans
History:
2003/10/16 Fei Wang
2013 luxiaoxun
***************************************************************************/
#pragma once
#include <fstream>
class KMeans
{
public:
enum InitMode
{
InitRandom,
InitManual,
InitUniform,
};
KMeans(int dimNum = 1, int clusterNum = 1);
~KMeans();
void SetMean(int i, const double* u) { memcpy(m_means[i], u, sizeof(double) * m_dimNum); }
void SetInitMode(int i) { m_initMode = i; }
void SetMaxIterNum(int i) { m_maxIterNum = i; }
void SetEndError(double f) { m_endError = f; }
double* GetMean(int i) { return m_means[i]; }
int GetInitMode() { return m_initMode; }
int GetMaxIterNum() { return m_maxIterNum; }
double GetEndError() { return m_endError; }
/* SampleFile: <size><dim><data>...
LabelFile: <size><label>...
*/
void Cluster(const char* sampleFileName, const char* labelFileName);
void Init(std::ifstream& sampleFile);
void Init(double* data, int N);
void Cluster(double* data, int N, int* Label);
friend std::ostream& operator<<(std::ostream& out, KMeans& kmeans);
private:
int m_dimNum;
int m_clusterNum;
double** m_means;
int m_initMode;
int m_maxIterNum; // The stopping criterion regarding the number of iterations
double m_endError; // The stopping criterion regarding the error
double GetLabel(const double* x, int* label);
double CalcDistance(const double* x, const double* u, int dimNum);
};

325
GMM/main.cpp 100644
View File

@ -0,0 +1,325 @@
#include <iostream>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <random>
#include <unordered_map>
#include <omp.h>
#include <time.h>
#include <string>
#include <vector>
#include <queue>
#ifdef _WIN32
#include <io.h>
#include <process.h>
#define F_OK 0
#else
#include <unistd.h>
#endif
#include <mat.h>
#include "gmm.h"
using namespace std;
using std::cout;
using std::vector;
/* 从mat文件中读取给定名称的矩阵数据并获取矩阵的行列数值 */
template<typename T>
T* ReadMatlabMat(const string &filePath, const string &mtxName, int *pRowNum, int *pColNum) {
T* dst = nullptr;
MATFile* pMatFile = nullptr;
mxArray* pMxArray = nullptr;
int rowNum, colNum;
double* matData;
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
if (pMatFile == nullptr) {
cerr << "filePath is error!" << endl;
return nullptr;
}
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
rowNum = mxGetM(pMxArray);
colNum = mxGetN(pMxArray);
cout << rowNum << " " << colNum << endl;
matData = (double*)mxGetData(pMxArray); //获取指针
dst = new T[rowNum * colNum];
for (int i = 0; i < rowNum; ++i) {
for (int j = 0; j < colNum; ++j) {
dst[i * colNum + j] = T(matData[j * rowNum + i]);
}
}
mxDestroyArray(pMxArray); //释放内存
matClose(pMatFile); // 关闭文件
*pRowNum = rowNum;
*pColNum = colNum;
return dst;
}
/* 将数据写入mat文件中用给定的名称命名 */
template<typename T>
bool SaveMatrix(T* src, MATFile* pMatFile, string matrixName, int rowNum, int colNum)
{
//转置存储
int datasize = colNum * rowNum;
double* mtxData = new double[datasize];//待存储数据转为double格式
// memset(mtxData, 0, datasize * sizeof(double));
for (int i = 0; i < rowNum; i++)
{
for (int j = 0; j < colNum; j++)
{
mtxData[j * rowNum + i] = double(src[i * colNum + j]);
// *(mtxData + j * rowNum + i) = (double)src[i * colNum + j]; 可消除警告
}
}
mxArray* pWriteArray = NULL;//matlab格式矩阵
if (pMatFile == nullptr)
{
cerr << "mat file pointer is error!" << endl;
return false;
}
//创建一个rowNum*colNum的矩阵
pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
//把data的值赋给pWriteArray指针
memcpy((void*)(mxGetPr(pWriteArray)), (void*)mtxData, sizeof(double) * datasize);
//给矩阵命名为matrixName
matPutVariable(pMatFile, matrixName.c_str(), pWriteArray);
mxDestroyArray(pWriteArray);//release resource
delete[]mtxData;//release resource
return true;
}
/* 将x向量放到宽度为binWidth大小的桶中功能类似matlab的hist*/
void PutXtoBin(double* x, int xSize, double binWidth, vector<double>& vXBin, vector<double>& vYBin) {
double maxX = 0.0;
for (int i = 0; i < xSize; ++i) {
if (maxX < x[i]) maxX = x[i];
}
int binSize = (int)((maxX + binWidth / 2) / binWidth + 1);
double binMaxVal = (binSize - 1) * binWidth;
if (binMaxVal > maxX) { // 确保最后一个bin不大于maxX而且不小于maxX-binWidth
binSize -= 1;
}
vXBin.resize(xSize);
vYBin.resize(binSize);
for (int i = 0; i < binSize; ++i) vYBin[i] = 0;
for (int i = 0; i < xSize; ++i) {
int binIdx = (int)((x[i] + binWidth / 2) / binWidth);
if (binIdx >= binSize) binIdx = binSize - 1;
vYBin[binIdx] += 1;
// vXBin[i] = binIdx * binWidth;
}
// 按大小顺序将修改后的x数值存储在vXBin中点的顺序不同训练出的高斯混合模型参数会有一些不同。
int xIdx = 0;
for (int i = 0; i < binSize; ++i) {
for (int j = 0; j < vYBin[i]; ++j) {
vXBin[xIdx++] = i * binWidth;
}
}
}
/* 将标准高斯模型训练出的参数转换成自定义的系数, 并返回拟合后的Y值向量 */
struct cmpFunc {
bool operator()(const pair<double, double>& a, const pair<double, double>& b) { return a.first < b.first; }
};
void GMMToFactorEY(GMM& gmm, double binWidth, vector<double> &vYBin, vector<double>& vFactor, vector<double>& vEY) {
/* 需要调整曲线的权重,来拟合高斯曲线,而不是用概率密度 */
double zoomFactorSum = 0.0;
int valNum = 0;
vEY.resize(vYBin.size());
int topM = vYBin.size() / 4;
if (topM < 1) topM = 1;
/* 用堆排序的方式取前topM个最大值, 用来计算缩放参数*/
priority_queue<pair<double, double>, vector<pair<double, double> >, cmpFunc> pqTopM;
for (int i = 0; i < vYBin.size(); ++i) {
double xVal = i * binWidth;
double probVal = gmm.GetProbability(&xVal);
vEY[i] = probVal;
pqTopM.push(make_pair(vYBin[i], probVal));
}
for (int i = 0; i < topM; ++i) {
pair<double, double> topEle = pqTopM.top();
pqTopM.pop();
cout << topEle.first << '\t' << topEle.second << endl;
zoomFactorSum += topEle.first / topEle.second;
}
cout << endl;
double zoomFactor = zoomFactorSum / topM;
for (int i = 0; i < vEY.size(); ++i) {
vEY[i] *= zoomFactor;
}
vFactor.clear();
vFactor.push_back(zoomFactor * gmm.Prior(0) / sqrt(2 * M_PI * *gmm.Variance(0)));
vFactor.push_back(*gmm.Mean(0));
vFactor.push_back(sqrt(2 * *gmm.Variance(0)));
vFactor.push_back(zoomFactor * gmm.Prior(1) / sqrt(2 * M_PI * *gmm.Variance(1)));
vFactor.push_back(*gmm.Mean(1));
vFactor.push_back(sqrt(2 * *gmm.Variance(1)));
}
/* 计算平均数 */
template <typename T>
T Average(vector<T>& vVal) {
T sumVal = T(0);
for (int i = 0; i < vVal.size(); ++i) {
sumVal += vVal[i];
}
return sumVal / vVal.size();
}
/* 计算平方的均值 */
template <typename T>
T SquareAverage(vector<T>& vVal) {
vector<T> vSquare(vVal.size());
for (int i = 0; i < vVal.size(); ++i) {
vSquare[i] = vVal[i] * vVal[i];
}
return Average(vSquare);
}
/* 计算向量x和y的相关距离, 向量维度必须相等*/
double CorrelationDistance(vector<double>& vX, vector<double>& vY) {
vector<double> vXY(vX.size());
for (int i = 0; i < vXY.size(); ++i) {
vXY[i] = vX[i] * vY[i];
}
double uv = Average(vXY);
double uu = SquareAverage(vX);
double vv = SquareAverage(vY);
double dist = 1.0 - uv / sqrt(uu * vv);
return abs(dist);
}
/* 处理matlab的mat文件中包含的待拟合的数据 */
void processMatData(const string& filePath) {
double* hs = nullptr;
int rowNum = 0;
int colNum = 0;
clock_t begin, finish;
double total_cov = 0;
double total_cov2 = 0;
begin = clock();
hs = ReadMatlabMat<double>(filePath, "hs", &rowNum, &colNum);
ofstream gmmOfs("mat_gmm.debug");
ofstream gmmOfs2("mat_gmm2.debug");
ofstream xyOfs("xy_cpp.debug");
ofstream brOfs("br.debug");
vector<double>vXBin;
vector<double>vYBin;
vector<double>vEY;
vector<double>vFactor;
/* 用来保存数据存入mat文件 */
vector<double>vDist(rowNum);
vector<double>vFactorAll;
for (int i = 0; i < rowNum; ++i) {
PutXtoBin(hs + i * colNum, colNum, 0.2, vXBin, vYBin);
// for (int m = 0; m < vYBin.size(); ++m) xyOfs<< fixed << setprecision(1) << 0.2 * m << ' ';
// xyOfs << endl;
// for (int m = 0; m < vYBin.size(); ++m) xyOfs << (int)vYBin[m] << ' ';
// xyOfs << endl;
GMM gmm(1, 2); // 1维 2个高斯模型
gmm.Train(vXBin.data(), vXBin.size());
total_cov += *gmm.Variance(0);
gmmOfs << gmm << endl;
GMMToFactorEY(gmm, 0.2, vYBin, vFactor, vEY);
vDist[i] = CorrelationDistance(vYBin, vEY);
vFactorAll.insert(vFactorAll.end(), vFactor.begin(), vFactor.end());
brOfs << CorrelationDistance(vYBin, vEY) << endl;
for (int j = 0; j < vFactor.size(); ++j) brOfs << vFactor[j] << ", ";
GMM gmm2(1, 2);
gmm2.Train(hs + i * colNum, colNum);
total_cov2 += *gmm2.Variance(0);
gmmOfs2 << gmm2 << endl;
}
/* 写入matlab文件 */
MATFile* pMatFile = matOpen("D:\\save_br.mat", "w");
SaveMatrix<double>(vFactorAll.data(), pMatFile, "factor", rowNum, 6);
SaveMatrix<double>(vDist.data(), pMatFile, "correlation", rowNum, 1);
matClose(pMatFile);
gmmOfs.close();
gmmOfs2.close();
xyOfs.close();
brOfs.close();
finish = clock();
cout << "Total cov: " << total_cov << endl;
cout << "Total cov2: " << total_cov2 << endl;
cout << "Total time:" << (double)(finish - begin) / CLOCKS_PER_SEC << endl;
//MATFile* pMatFile = matOpen("D:\\save_hs.mat", "w");
//SaveMatrix<double>(hs, pMatFile, "hs_saved", rowNum, colNum);
//matClose(pMatFile);
delete[] hs;
}
/* 处理已经转换成txt的文本数据 */
void processTxtData(const string& filePath) {
clock_t begin, finish;
double total_cov = 0;
ifstream ifs(filePath, ios::in);
begin = clock();
ofstream gmmOfs("txt_gmm.debug");
while (!ifs.eof()) {
vector<double> vec_point;
string x_str, y_str;
if (!getline(ifs, x_str)) break;
if (!getline(ifs, y_str)) break;
// cout << x_str << endl << y_str << endl;
stringstream ss_x(x_str);
stringstream ss_y(y_str);
float x, y;
while (ss_x >> x && ss_y >> y) {
vec_point.resize(vec_point.size() + y);
for (int i = vec_point.size() - y; i < vec_point.size(); ++i)
vec_point[i] = x;
}
if (vec_point.size() == 0) continue;
GMM gmm(1, 2); // 1维 2个高斯模型
gmm.Train(vec_point.data(), vec_point.size());
// cout << *gmm.Mean(0) << endl;
total_cov += *gmm.Variance(0);
gmmOfs << gmm << endl;
}
gmmOfs.close();
finish = clock();
cout << "Total cov: " << total_cov << endl;
cout << "Total time:" << (double)(finish - begin) / CLOCKS_PER_SEC << endl;
if (ifs.is_open())
ifs.close();
}
int main(int argc, char** argv) {
//if (argc != 2)
// cerr << "This program should take one argument(input data file)!" << endl;
// processMatData(argv[1]);
processMatData("D:\\Twirls\\runtime\\ALS_test\\1775\\twirls_id_abs2class_hs.mat");
processTxtData("D:\\Twirls\\backup\\xy.txt");
return 0;
}

View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Microsoft.Windows.CppWinRT" version="2.0.210806.1" targetFramework="native" />
</packages>

30
GMM/readme.txt 100644
View File

@ -0,0 +1,30 @@
========================================================================
C++/WinRT GMM Project Overview
========================================================================
This project demonstrates how to get started consuming Windows Runtime
classes directly from standard C++, using platform projection headers
generated from Windows SDK metadata files.
Steps to generate and consume SDK platform projection:
1. Build project initially to generate platform projection headers into
your Generated Files folder.
2. Include a projection namespace header in your pch.h, such as
<winrt/Windows.Foundation.h>.
3. Consume winrt namespace and any Windows Runtime namespaces, such as
winrt::Windows::Foundation, from source code.
4. Initialize apartment via init_apartment() and consume winrt classes.
Steps to generate and consume a projection from third party metadata:
1. Add a WinMD reference by right-clicking the References project node
and selecting "Add Reference...". In the Add References dialog,
browse to the component WinMD you want to consume and add it.
2. Build the project once to generate projection headers for the
referenced WinMD file under the "Generated Files" subfolder.
3. As above, include projection headers in pch or source code
to consume projected Windows Runtime classes.
========================================================================
Learn more about C++/WinRT here:
http://aka.ms/cppwinrt/
========================================================================

View File

@ -1,3 +1,4 @@
# twirls
重构matlab代码改成python加速文献感知
先将matlab耗时的部分改成c++,通过文件的方式进行交互

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup Label="UserMacros" />
<!--
To customize common C++/WinRT project properties:
* right-click the project node
* expand the Common Properties item
* select the C++/WinRT property page
For more advanced scenarios, and complete documentation, please see:
https://github.com/Microsoft/cppwinrt/tree/master/nuget
-->
<PropertyGroup />
<ItemDefinitionGroup />
</Project>

View File

@ -0,0 +1,127 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.props" Condition="Exists('..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.props')" />
<PropertyGroup Label="Globals">
<CppWinRTOptimized>true</CppWinRTOptimized>
<CppWinRTRootNamespaceAutoMerge>true</CppWinRTRootNamespaceAutoMerge>
<CppWinRTGenerateWindowsMetadata>true</CppWinRTGenerateWindowsMetadata>
<MinimalCoreWin>true</MinimalCoreWin>
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{d3a55022-91f5-4db2-85a6-a25ae12cc840}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>RandSim</RootNamespace>
<WindowsTargetPlatformVersion Condition=" '$(WindowsTargetPlatformVersion)' == '' ">10.0</WindowsTargetPlatformVersion>
<WindowsTargetPlatformMinVersion>10.0.17134.0</WindowsTargetPlatformMinVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v143</PlatformToolset>
<PlatformToolset Condition="'$(VisualStudioVersion)' == '16.0'">v142</PlatformToolset>
<PlatformToolset Condition="'$(VisualStudioVersion)' == '15.0'">v141</PlatformToolset>
<PlatformToolset Condition="'$(VisualStudioVersion)' == '14.0'">v140</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Project="PropertySheet.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup>
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
<PrecompiledHeaderOutputFile>$(IntDir)pch.pch</PrecompiledHeaderOutputFile>
<PreprocessorDefinitions>_CONSOLE;WIN32_LEAN_AND_MEAN;WINRT_LEAN_AND_MEAN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<WarningLevel>Level4</WarningLevel>
<AdditionalOptions>%(AdditionalOptions) /permissive- /bigobj</AdditionalOptions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateWindowsMetadata>false</GenerateWindowsMetadata>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
<ClCompile>
<PreprocessorDefinitions>WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateWindowsMetadata>false</GenerateWindowsMetadata>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="rand_sim.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="rand_sim.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
<None Include="PropertySheet.props" />
<Text Include="readme.txt">
<DeploymentContent>false</DeploymentContent>
</Text>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.targets" Condition="Exists('..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.targets')" />
</ImportGroup>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>这台计算机上缺少此项目引用的 NuGet 程序包。使用“NuGet 程序包还原”可下载这些程序包。有关更多信息,请参见 http://go.microsoft.com/fwlink/?LinkID=322105。缺少的文件是 {0}。</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.props')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.props'))" />
<Error Condition="!Exists('..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Windows.CppWinRT.2.0.210806.1\build\native\Microsoft.Windows.CppWinRT.targets'))" />
</Target>
</Project>

View File

@ -0,0 +1,34 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="rand_sim.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="rand_sim.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="PropertySheet.props" />
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
<Text Include="readme.txt" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Microsoft.Windows.CppWinRT" version="2.0.210806.1" targetFramework="native" />
</packages>

View File

@ -0,0 +1,184 @@
#include <iostream>
#include <fstream>
#include <algorithm>
#include <random>
#include <unordered_map>
#include <omp.h>
#include <time.h>
#include <string>
#ifdef _WIN32
#include <io.h>
#include <process.h>
#define F_OK 0
#else
#include <unistd.h>
#endif
#include "rand_sim.h"
using namespace std;
void txtReaderVvs(vector<vector<string>>& vecVecStr, string cf) {
filebuf fb;
string fileName = cf;
if (fb.open(fileName.c_str(), ios::in) == NULL) {
cout << "error" << endl;
}
istream is(&fb);
string lineInfo;
while (getline(is, lineInfo)) {
int i = 0;
vector<string> vecStr;
string tmp;
while (i < lineInfo.length()) {
while (i < lineInfo.length() && lineInfo[i] != ' ') {
tmp += lineInfo[i++];
}
if (!tmp.empty()) {
transform(tmp.begin(), tmp.end(), tmp.begin(), ::tolower);
vecStr.push_back(tmp);
}
tmp.clear();
++i;
}
vecVecStr.push_back(vecStr);
}
fb.close();
}
template <class AUTO>
void txtWriterVvs(vector<vector<AUTO>>& vecVecStr, string cf) {
ofstream os;
string fileName = cf;
os.open(fileName, ios::out);
for (auto vecStr : vecVecStr) {
for (auto s : vecStr)
os << s << " ";
os << "" << endl;
}
}
void txtReaderVs(vector<string>& vecStr, string cf) {
filebuf fb;
string fileName = cf;
if (fb.open(fileName.c_str(), ios::in) == NULL) {
cout << "error" << endl;
}
istream is(&fb);
string lineInfo;
while (getline(is, lineInfo)) {
transform(lineInfo.begin(), lineInfo.end(), lineInfo.begin(), ::tolower);
vecStr.push_back(lineInfo);
}
fb.close();
}
void txtWriterVs(vector<string>& vecStr, string cf) {
ofstream os;
string fileName = cf;
os.open(fileName, ios::out);
for (auto s : vecStr) {
os << s << endl;
}
}
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int main(int argc, char** argv) {
//input : dicr, wd2, wd1s;
vector<string> dicr;
vector<vector<string>> wd2;
vector<vector<string>> wd1s;
string dicrName = "F:\\myWork\\20191129\\dicr.txt";
string wd2Name = "F:\\myWork\\20191129\\wd2.txt";
string wd1sName = "F:\\myWork\\20191129\\wd1s.txt";
string ZrName = "F:\\myWork\\20191129\\Zr.txt";
int loopNum;
if (argc != 6) {
cout << "Unexpected count of input arguments! Please input paths of dicr, wd2, wd1s, Zr and loop num! eg: F:\\myWork\\dicr.txt or 1000" << endl;
return -1;
}
dicrName = argv[1];
wd2Name = argv[2];
wd1sName = argv[3];
ZrName = argv[4];
loopNum = atoi(argv[5]);
if (access(dicrName.c_str(), F_OK) == -1) {
cout << "Invalid dicr file path! Please input a right path!" << endl;
return -1;
}
if (access(wd2Name.c_str(), F_OK) == -1) {
cout << "Invalid wd2 file path! Please input a right path!" << endl;
return -1;
}
if (access(wd1sName.c_str(), F_OK) == -1) {
cout << "Invalid wd1s file path! Please input a right path!" << endl;
return -1;
}
txtReaderVs(dicr, dicrName);
txtReaderVvs(wd2, wd2Name);
txtReaderVvs(wd1s, wd1sName);
// cout<<"/************* Input file read finished! ***************/"<<endl;
//cout<<dicr.size()<<" "<<wd2.size()<<" "<<wd1s.size()<<endl;
//for(int i=0;i<50;++i)
// cout<<wd1s[i].size()<<endl;
unordered_map<string, int> dicrHashMap;
for (int i = 0;i < dicr.size();++i)
dicrHashMap[dicr[i]] = i;
//output : Zr;
vector<vector<int>> Zr(loopNum, vector<int>(dicr.size(), 0));
vector<int> randNums(wd2.size(), 0);
for (int i = 0; i < wd2.size(); ++i) {
randNums[i] = i;
}
//cout<<"12345"<<endl;
clock_t start, end;
start = clock();
//cout<<dicr.size()<<endl;
//cout<<wd2.size()<<" "<<wd2[0].size()<<endl;
//cout<<wd1s.size()<<" "<<wd1s[0].size()<<endl;
//#pragma omp parallel for
for (int i = 0; i < loopNum; ++i) {
clock_t begin = clock();
//#pragma omp atomic
/*vector<int> randNums(wd2.size(),0);
for(int i = 0; i < wd2.size(); ++i){
randNums[i] = i;
}*/
std::random_device rd;
std::shuffle(randNums.begin(), randNums.end(), std::default_random_engine(rd()));
//cout<<i<<endl;
for (int j = 0; j < wd1s.size(); ++j) {
vector<string> wd2rCell = wd2[randNums[j]];
//cout<<"step 1"<<endl;
vector<int> f(dicr.size(), 0);
//cout<<"step 2"<<endl;
for (int k = 0;k < wd2rCell.size();++k) {
string tmp = wd2rCell[k];
//cout<<tmp<<" "<<k<<endl;
if (dicrHashMap.find(tmp) != dicrHashMap.end()) {
//cout<<dicrHashMap[tmp]<<endl;
f[dicrHashMap[tmp]] = 1;
}
}
//cout<<"step 3"<<endl;
for (int k = 0;k < dicr.size();++k) {
if (f[k] == 1)
Zr[i][k]++;
}
//cout<<"step 4"<<endl;
}
clock_t finish = clock();
double loopTime = (double)(finish - begin) / CLOCKS_PER_SEC;
//cout<<"Loop : "<<i<<" Time consuming :"<<loopTime<<"s"<<endl;
}
end = clock();
double endtime = (double)(end - start) / CLOCKS_PER_SEC;
//cout<<"Total time:"<<endtime<<endl;
txtWriterVvs(Zr, ZrName);
return 0;
}

View File

@ -0,0 +1,3 @@
#pragma once
#include <winrt/Windows.Foundation.h>
#include <winrt/Windows.Foundation.Collections.h>

30
RandSim/readme.txt 100644
View File

@ -0,0 +1,30 @@
========================================================================
C++/WinRT RandSim Project Overview
========================================================================
This project demonstrates how to get started consuming Windows Runtime
classes directly from standard C++, using platform projection headers
generated from Windows SDK metadata files.
Steps to generate and consume SDK platform projection:
1. Build project initially to generate platform projection headers into
your Generated Files folder.
2. Include a projection namespace header in your pch.h, such as
<winrt/Windows.Foundation.h>.
3. Consume winrt namespace and any Windows Runtime namespaces, such as
winrt::Windows::Foundation, from source code.
4. Initialize apartment via init_apartment() and consume winrt classes.
Steps to generate and consume a projection from third party metadata:
1. Add a WinMD reference by right-clicking the References project node
and selecting "Add Reference...". In the Add References dialog,
browse to the component WinMD you want to consume and add it.
2. Build the project once to generate projection headers for the
referenced WinMD file under the "Generated Files" subfolder.
3. As above, include projection headers in pch or source code
to consume projected Windows Runtime classes.
========================================================================
Learn more about C++/WinRT here:
http://aka.ms/cppwinrt/
========================================================================

41
Twirls.sln 100644
View File

@ -0,0 +1,41 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.32014.148
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GMM", "GMM\GMM.vcxproj", "{A2B67815-1235-4F7C-874D-4FCCB3B0C738}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "RandSim", "RandSim\RandSim.vcxproj", "{D3A55022-91F5-4DB2-85A6-A25AE12CC840}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{A2B67815-1235-4F7C-874D-4FCCB3B0C738}.Debug|x64.ActiveCfg = Debug|x64
{A2B67815-1235-4F7C-874D-4FCCB3B0C738}.Debug|x64.Build.0 = Debug|x64
{A2B67815-1235-4F7C-874D-4FCCB3B0C738}.Debug|x86.ActiveCfg = Debug|Win32
{A2B67815-1235-4F7C-874D-4FCCB3B0C738}.Debug|x86.Build.0 = Debug|Win32
{A2B67815-1235-4F7C-874D-4FCCB3B0C738}.Release|x64.ActiveCfg = Release|x64
{A2B67815-1235-4F7C-874D-4FCCB3B0C738}.Release|x64.Build.0 = Release|x64
{A2B67815-1235-4F7C-874D-4FCCB3B0C738}.Release|x86.ActiveCfg = Release|Win32
{A2B67815-1235-4F7C-874D-4FCCB3B0C738}.Release|x86.Build.0 = Release|Win32
{D3A55022-91F5-4DB2-85A6-A25AE12CC840}.Debug|x64.ActiveCfg = Debug|x64
{D3A55022-91F5-4DB2-85A6-A25AE12CC840}.Debug|x64.Build.0 = Debug|x64
{D3A55022-91F5-4DB2-85A6-A25AE12CC840}.Debug|x86.ActiveCfg = Debug|Win32
{D3A55022-91F5-4DB2-85A6-A25AE12CC840}.Debug|x86.Build.0 = Debug|Win32
{D3A55022-91F5-4DB2-85A6-A25AE12CC840}.Release|x64.ActiveCfg = Release|x64
{D3A55022-91F5-4DB2-85A6-A25AE12CC840}.Release|x64.Build.0 = Release|x64
{D3A55022-91F5-4DB2-85A6-A25AE12CC840}.Release|x86.ActiveCfg = Release|Win32
{D3A55022-91F5-4DB2-85A6-A25AE12CC840}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {250954D6-DDDE-4F05-AE0F-FF61CD93D1E1}
EndGlobalSection
EndGlobal