37 using namespace shogun;
50 REQUIRE(data,
"Data required for classification in apply_multiclass\n")
56 for (int32_t i=0; i<num_vecs; i++)
101 REQUIRE(data,
"Data required for training\n")
108 set_root(id3train(data, dynamic_cast<CMulticlassLabels*>(
m_labels), feature_ids, 0));
128 if (feature_id_vector.
vlen == 0)
134 int32_t most_label = labels[0];
135 int32_t most_num = 1;
138 for (int32_t i=1; i<labels.
vlen; i++)
140 while ((labels[i] == labels[i-1]) && (i<labels.
vlen))
149 most_label = labels[i-1];
155 node->data.class_label = most_label;
161 int32_t best_feature_index = -1;
164 float64_t gain = informational_gain_attribute(i,feats,class_labels);
169 best_feature_index = i;
175 for (int32_t i=0; i<num_vecs; i++)
181 for (int32_t i=0; i<best_labels_unique.
vlen; i++)
184 int32_t num_cols = 0;
185 float64_t active_feature_value = best_labels_unique[i];
187 for (int32_t j=0; j<num_vecs; j++)
189 if ( active_feature_value == best_feature_values[j])
198 for (int32_t j=0; j<num_vecs; j++)
201 if (active_feature_value == sample[best_feature_index])
204 for (int32_t k=0; k<sample.
size(); k++)
206 if (k != best_feature_index)
207 mat(++idx, cnt) = sample[k];
210 new_labels_vector[cnt] = class_labels->
get_labels()[j];
218 for (int32_t j=0;j<feature_id_vector.
vlen;j++)
220 if (j!=best_feature_index)
221 new_feature_id_vector[++cnt] = feature_id_vector[j];
227 node_t* child = id3train(new_data, new_class_labels, new_feature_id_vector, level+1);
228 child->data.transit_if_feature_value = active_feature_value;
229 node->data.attribute_id = feature_id_vector[best_feature_index];
230 node->add_child(child);
241 float64_t CID3ClassifierTree::informational_gain_attribute(int32_t attr_no,
CFeatures* data,
244 REQUIRE(data,
"Data required for information gain calculation\n")
246 "Dense data required for information gain calculation\n")
250 int32_t num_vecs = feats->get_num_vectors();
255 for (int32_t i=0; i<num_vecs; i++)
256 attribute_values[i] = (feats->get_feature_vector(i))[attr_no];
261 for (int32_t i=0; i<attr_val_unique.vlen; i++)
264 int32_t attr_count=0;
266 for (int32_t j=0; j<num_vecs; j++)
268 if (attribute_values[j] == attr_val_unique[i])
275 for (int32_t j=0; j<num_vecs; j++)
277 if (attribute_values[j] == attr_val_unique[i])
278 sub_class[count++] = class_labels->
get_label(j);
282 float64_t sub_entropy = entropy(sub_labels);
283 gain += sub_entropy*(attr_count-0.f)/(num_vecs-0.f);
288 float64_t data_entropy = entropy(class_labels);
289 gain = data_entropy-gain;
313 if (log_ratios[i] != 0)