function [x,gmm] = GMMrand(N,D,K,mu,sigma,w)
% Copyright 2017: Steven Van Kuyk
% This program comes WITHOUT ANY WARRANTY.
%
% Generate data according to a Gaussian Mixture Model
% See 'Pattern Recognition and Machine Learning', Bishop
%
% Inputs
% N: number of data points to generate
% D: number of dimensions
% K: number of mixtures
% mu: DxK matrix where each column represents a means (optional)
% sigma: DxDxK matrix containing PSD covariance matrices (optional)
% w: vector containing the weights of each mixture (optional)
%
% Outputs:
% x: KxN matrix where columns are observations
% gmm: model structure

% generate random model using Bayesian hyper-parameters
if isempty(sigma)
   for k=1:K
       v0 = D+1;
       W0 = 1*eye(D); % size of precision matrix
       sigma(:,:,k) = iwishrnd(W0,v0);
   end
end
if isempty(mu)
    beta0 = 0.1; % smaller beta0 means greater spread of the centroids
    mu0 = zeros(D,1);
    for k=1:K
        mu(:,k) = chol((1/beta0)*sigma(:,:,k))'*randn(D,1) + mu0;
    end
end
if isempty(w)
    a0 = 0.5*K; % roughly corresponds to number of dominant clusters
    w = gamrnd(a0*ones(K,1)/K,1);
    w=w/sum(w);
end
w=w(:);
w=max(0,w); % weights must be positive
w=w/sum(w); % weights must sum to 1

% errors
if ~isequal(D,size(mu,1),size(sigma,1),size(sigma,2))
    error('Input error: inconsistent number of dimensions')
    return
end
if ~isequal(K,size(mu,2),size(sigma,3),length(w))
    error('Input error: inconsistent number of GMM components')
    return
end

% generate data (ancestral sampling)
[~,~,z] = histcounts(rand(1,N),[0 ; cumsum(w)]); % random cluster labels, z
x = zeros(D,N);
for k=1:K % each cluster
    ii = (z==k);
    x(:,ii) = chol(sigma(:,:,k))'*randn(D,sum(ii)) + repmat(mu(:,k),1,sum(ii)); % sample p(x|z)
end

gmm.mu = mu;
gmm.sigma = sigma;
gmm.w = w;







