00001 00031 #include <itpp/stat/mog_diag_kmeans.h> 00032 #include <iostream> 00033 00034 namespace itpp { 00035 00036 inline double MOG_diag_kmeans_sup::dist(const double * x, const double * y) const { 00037 double acc = 0.0; 00038 for(int d=0;d<D;d++) { double tmp = x[d]-y[d]; acc += tmp*tmp; } 00039 return(acc); 00040 } 00041 00042 void MOG_diag_kmeans_sup::assign_to_means() { 00043 00044 for(int k=0;k<K;k++) c_count[k] = 0; 00045 00046 for(int n=0;n<N;n++) { 00047 00048 int k = 0; 00049 double min_dist = dist( c_means[k], c_X[n] ); 00050 int k_winner = k; 00051 00052 for(int k=1;k<K;k++) { 00053 double tmp_dist = dist( c_means[k], c_X[n] ); 00054 if(tmp_dist < min_dist) { min_dist = tmp_dist; k_winner = k; } 00055 } 00056 00057 c_partitions[ k_winner ][ count[k_winner] ] = n; 00058 c_count[k_winner]++; 00059 } 00060 } 00061 00062 00063 void MOG_diag_kmeans_sup::recalculate_means() { 00064 00065 for(int k=0;k<K;k++) { 00066 00067 for(int d=0;d<D;d++) c_tmpvec[d] = 0.0; 00068 00069 int Nk = c_count[k]; 00070 00071 for(int n=0;n<Nk;n++) { 00072 double * x = c_X[ c_partitions[k][n] ]; 00073 for(int d=0;d<D;d++) c_tmpvec[d] += x[d]; 00074 } 00075 00076 if(Nk > 0) { 00077 double * c_mean = c_means[k]; 00078 for(int d=0;d<D;d++) c_mean[d] = c_tmpvec[d] / Nk; 00079 } 00080 } 00081 00082 } 00083 00084 00085 bool MOG_diag_kmeans_sup::dezombify_means() { 00086 00087 static int counter = 0; 00088 00089 bool zombie_mean = false; 00090 00091 int k = 0; 00092 int max_count = count[k]; 00093 int k_hog = k; 00094 00095 for(int k=1;k<K;k++) if( c_count[k] > max_count ) { max_count = c_count[k]; k_hog = k; } 00096 00097 for(int k=0;k<K;k++) { 00098 if( c_count[k] == 0 ) { 00099 00100 zombie_mean = true; 00101 if(verbose) it_warning("MOG_diag_kmeans_sup::dezombify_means(): detected zombie mean"); 00102 00103 if(k_hog == k) { 00104 it_warning("MOG_diag_kmeans_sup::dezombify_means(): weirdness: k_hog == k"); 00105 return(false); 00106 } 00107 00108 if( counter >= c_count[k_hog] ) counter = 0; 00109 00110 double * c_mean = c_means[k]; 00111 double * c_x = c_X[ c_partitions[k_hog][counter] ]; 00112 00113 for(int d=0;d<D;d++) c_mean[d] = 0.5*( c_means[k_hog][d] + c_x[d] ); 00114 counter++; 00115 } 00116 00117 } 00118 00119 if(zombie_mean) assign_to_means(); 00120 00121 return(true); 00122 } 00123 00124 00125 double MOG_diag_kmeans_sup::measure_change() const { 00126 00127 double tmp_dist = 0.0; 00128 for(int k=0;k<K;k++) tmp_dist += dist( c_means[k], c_means_old[k] ); 00129 return(tmp_dist); 00130 } 00131 00132 00133 void MOG_diag_kmeans_sup::initial_means() { 00134 00135 for(int d=0;d<D;d++) c_tmpvec[d] = 0.0; 00136 00137 for(int n=0;n<N;n++) { 00138 double * c_x = c_X[n]; 00139 for(int d=0;d<D;d++) c_tmpvec[d] += c_x[d]; 00140 } 00141 00142 for(int d=0;d<D;d++) c_tmpvec[d] /= N; 00143 00144 int step = int(floor(double(N)/double(K))); 00145 for(int k=0;k<K;k++) { 00146 double * c_mean = c_means[k]; 00147 double * c_x = c_X[k*step]; 00148 00149 for(int d=0;d<D;d++) c_mean[d] = 0.5*(c_tmpvec[d] + c_x[d]); 00150 } 00151 } 00152 00153 00154 void MOG_diag_kmeans_sup::iterate() { 00155 00156 for(int k=0;k<K;k++) for(int d=0;d<D;d++) c_means_old[k][d] = c_means[k][d]; 00157 00158 for(int i=0;i<max_iter;i++) { 00159 00160 assign_to_means(); 00161 if(!dezombify_means()) return; 00162 recalculate_means(); 00163 00164 double change = measure_change(); 00165 00166 if(verbose) std::cout << "MOG_diag_kmeans_sup::iterate(): iteration = " << i << " change = " << change << std::endl; 00167 if(change == 0) break; 00168 00169 for(int k=0;k<K;k++) for(int d=0;d<D;d++) c_means_old[k][d] = c_means[k][d]; 00170 } 00171 00172 } 00173 00174 00175 void MOG_diag_kmeans_sup::calc_means() { 00176 initial_means(); 00177 iterate(); 00178 } 00179 00180 00181 void MOG_diag_kmeans_sup::calc_covs() { 00182 00183 for(int k=0;k<K;k++) { 00184 int Nk = c_count[k]; 00185 00186 if(Nk >= 2) { 00187 double * c_mean = c_means[k]; 00188 00189 for(int d=0;d<D;d++) c_tmpvec[d] = 0.0; 00190 00191 for(int n=0;n<Nk;n++) { 00192 double * c_x = c_X[ c_partitions[k][n] ]; 00193 for(int d=0;d<D;d++) { double tmp = c_x[d] - c_mean[d]; c_tmpvec[d] += tmp*tmp; } 00194 } 00195 00196 for(int d=0;d<D;d++) c_diag_covs[k][d] = trust*(c_tmpvec[d] / (Nk-1.0) ) + (1.0-trust)*(1.0); 00197 } 00198 else { 00199 for(int d=0;d<D;d++) c_diag_covs[k][d] = 1.0; 00200 } 00201 } 00202 00203 } 00204 00205 00206 void MOG_diag_kmeans_sup::calc_weights() { 00207 for(int k=0;k<K;k++) c_weights[k] = trust*(c_count[k] / double(N)) + (1.0-trust)*(1.0/K); 00208 } 00209 00210 00211 void MOG_diag_kmeans_sup::normalise_vectors() { 00212 00213 for(int d=0;d<D;d++) { 00214 double acc = 0.0; for(int n=0;n<N;n++) acc += c_X[n][d]; 00215 c_norm_mu[d] = acc / N; 00216 } 00217 00218 for(int d=0;d<D;d++) { 00219 double acc = 0.0; for(int n=0;n<N;n++) { double tmp = c_X[n][d] - c_norm_mu[d]; acc += tmp*tmp; } 00220 c_norm_sd[d] = std::sqrt(acc/(N-1)); 00221 } 00222 00223 for(int n=0;n<N;n++) for(int d=0;d<D;d++) { 00224 c_X[n][d] -= c_norm_mu[d]; 00225 if(c_norm_sd[d] > 0.0) c_X[n][d] /= c_norm_sd[d]; 00226 } 00227 } 00228 00229 00230 void MOG_diag_kmeans_sup::unnormalise_vectors() { 00231 00232 for(int n=0;n<N;n++) for(int d=0;d<D;d++) { 00233 if(c_norm_sd[d] > 0.0) c_X[n][d] *= c_norm_sd[d]; 00234 c_X[n][d] += c_norm_mu[d]; 00235 } 00236 } 00237 00238 00239 void MOG_diag_kmeans_sup::unnormalise_means() { 00240 00241 for(int k=0;k<K;k++) for(int d=0;d<D;d++) { 00242 if(norm_sd[d] > 0.0) c_means[k][d] *= c_norm_sd[d]; 00243 c_means[k][d] += norm_mu[d]; 00244 } 00245 } 00246 00247 00248 void MOG_diag_kmeans_sup::run(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in) { 00249 00250 it_assert( model_in.is_valid(), "MOG_diag_kmeans_sup::run(): given model is not valid" ); 00251 it_assert( (max_iter_in > 0), "MOG_diag_kmeans_sup::run(): 'max_iter' needs to be greater than zero" ); 00252 it_assert( ((trust_in >= 0.0) && (trust_in <= 1.0) ), "MOG_diag_kmeans_sup::run(): 'trust' must be between 0 and 1 (inclusive)" ); 00253 00254 verbose = verbose_in; 00255 00256 Array<vec> means_in = model_in.get_means(); 00257 Array<vec> diag_covs_in = model_in.get_diag_covs(); 00258 vec weights_in = model_in.get_weights(); 00259 00260 init( means_in, diag_covs_in, weights_in ); 00261 00262 means_in.set_size(0); diag_covs_in.set_size(0); weights_in.set_size(0); 00263 00264 it_assert(check_size(X_in), "MOG_diag_kmeans_sup::run(): 'X' is empty or contains vectors of wrong dimensionality" ); 00265 00266 N = X_in.size(); 00267 00268 if(K > N) it_warning("MOG_diag_kmeans_sup::run(): K > N"); 00269 else 00270 if(K > N/10) it_warning("MOG_diag_kmeans_sup::run(): K > N/10"); 00271 00272 max_iter = max_iter_in; 00273 trust = trust_in; 00274 00275 means_old.set_size(K); for(int k=0;k<K;k++) means_old(k).set_size(D); 00276 partitions.set_size(K); for(int k=0;k<K;k++) partitions(k).set_size(N); 00277 count.set_size(K); 00278 tmpvec.set_size(D); 00279 norm_mu.set_size(D); 00280 norm_sd.set_size(D); 00281 00282 c_X = enable_c_access(X_in); 00283 c_means_old = enable_c_access(means_old); 00284 c_partitions = enable_c_access(partitions); 00285 c_count = enable_c_access(count); 00286 c_tmpvec = enable_c_access(tmpvec); 00287 c_norm_mu = enable_c_access(norm_mu); 00288 c_norm_sd = enable_c_access(norm_sd); 00289 00290 if(normalise_in) normalise_vectors(); 00291 00292 calc_means(); if(normalise_in) { unnormalise_vectors(); unnormalise_means(); } 00293 calc_covs(); 00294 calc_weights(); 00295 00296 model_in.init(means, diag_covs, weights); 00297 00298 disable_c_access(c_X); 00299 disable_c_access(c_means_old); 00300 disable_c_access(c_partitions); 00301 disable_c_access(c_count); 00302 disable_c_access(c_tmpvec); 00303 disable_c_access(c_norm_mu); 00304 disable_c_access(c_norm_sd); 00305 00306 means_old.set_size(0); 00307 partitions.set_size(0); 00308 count.set_size(0); 00309 tmpvec.set_size(0); 00310 norm_mu.set_size(0); 00311 norm_sd.set_size(0); 00312 00313 cleanup(); 00314 00315 } 00316 00317 // 00318 // convenience functions 00319 00320 void MOG_diag_kmeans(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in) { 00321 MOG_diag_kmeans_sup km; 00322 km.run(model_in, X_in, max_iter_in, trust_in, normalise_in, verbose_in); 00323 } 00324 00325 00326 } 00327
Generated on Sat Apr 19 10:41:57 2008 for IT++ by Doxygen 1.5.5