SHOGUN
3.2.1
首页
相关页面
模块
类
文件
文件列表
文件成员
全部
类
命名空间
文件
函数
变量
类型定义
枚举
枚举值
友元
宏定义
组
页
src
shogun
features
ExplicitSpecFeatures.cpp
浏览该文件的文档.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2009 Soeren Sonnenburg
8
* Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9
*/
10
11
#include <
shogun/features/ExplicitSpecFeatures.h
>
12
#include <
shogun/io/SGIO.h
>
13
14
using namespace
shogun;
15
16
CExplicitSpecFeatures::CExplicitSpecFeatures
() :
CDotFeatures
()
17
{
18
SG_UNSTABLE
(
"CExplicitSpecFeatures::CExplicitSpecFeatures()"
,
19
"\n"
);
20
21
use_normalization
=
false
;
22
num_strings
= 0;
23
alphabet_size
= 0;
24
25
spec_size
= 0;
26
k_spectrum
= NULL;
27
}
28
29
30
CExplicitSpecFeatures::CExplicitSpecFeatures
(
CStringFeatures<uint16_t>
* str,
bool
normalize) :
CDotFeatures
()
31
{
32
ASSERT
(str)
33
34
use_normalization
=normalize;
35
num_strings
= str->
get_num_vectors
();
36
spec_size
= str->
get_num_symbols
();
37
38
obtain_kmer_spectrum
(str);
39
40
SG_DEBUG
(
"SPEC size=%d, num_str=%d\n"
,
spec_size
,
num_strings
)
41
}
42
43
CExplicitSpecFeatures::CExplicitSpecFeatures
(
const
CExplicitSpecFeatures
& orig) :
CDotFeatures
(orig),
44
num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
45
{
46
k_spectrum
= SG_MALLOC(
float64_t
*,
num_strings
);
47
for
(int32_t i=0; i<
num_strings
; i++)
48
k_spectrum
[i]=
SGVector<float64_t>::clone_vector
(
k_spectrum
[i],
spec_size
);
49
}
50
51
CExplicitSpecFeatures::~CExplicitSpecFeatures
()
52
{
53
delete_kmer_spectrum
();
54
}
55
56
int32_t
CExplicitSpecFeatures::get_dim_feature_space
()
const
57
{
58
return
spec_size
;
59
}
60
61
float64_t
CExplicitSpecFeatures::dot
(int32_t vec_idx1,
CDotFeatures
* df, int32_t vec_idx2)
62
{
63
ASSERT
(df)
64
ASSERT
(df->
get_feature_type
() ==
get_feature_type
())
65
ASSERT
(df->
get_feature_class
() ==
get_feature_class
())
66
CExplicitSpecFeatures
* sf = (
CExplicitSpecFeatures
*) df;
67
68
ASSERT
(vec_idx1 <
num_strings
)
69
ASSERT
(vec_idx2 < sf->
num_strings
)
70
float64_t
* vec1=
k_spectrum
[vec_idx1];
71
float64_t
* vec2=sf->k_spectrum[vec_idx2];
72
73
return
SGVector<float64_t>::dot
(vec1, vec2,
spec_size
);
74
}
75
76
float64_t
CExplicitSpecFeatures::dense_dot
(int32_t vec_idx1,
const
float64_t
* vec2, int32_t vec2_len)
77
{
78
ASSERT
(vec2_len ==
spec_size
)
79
ASSERT
(vec_idx1 <
num_strings
)
80
float64_t
* vec1=
k_spectrum
[vec_idx1];
81
float64_t
result=0;
82
83
for
(int32_t i=0; i<
spec_size
; i++)
84
result+=vec1[i]*vec2[i];
85
86
return
result;
87
}
88
89
void
CExplicitSpecFeatures::add_to_dense_vec
(
float64_t
alpha, int32_t vec_idx1,
float64_t
* vec2, int32_t vec2_len,
bool
abs_val)
90
{
91
ASSERT
(vec2_len ==
spec_size
)
92
ASSERT
(vec_idx1 <
num_strings
)
93
float64_t
* vec1=
k_spectrum
[vec_idx1];
94
95
if
(abs_val)
96
{
97
for
(int32_t i=0; i<
spec_size
; i++)
98
vec2[i]+=alpha*
CMath::abs
(vec1[i]);
99
}
100
else
101
{
102
for
(int32_t i=0; i<
spec_size
; i++)
103
vec2[i]+=alpha*vec1[i];
104
}
105
}
106
107
void
CExplicitSpecFeatures::obtain_kmer_spectrum
(
CStringFeatures<uint16_t>
* str)
108
{
109
k_spectrum
= SG_MALLOC(
float64_t
*,
num_strings
);
110
111
for
(int32_t i=0; i<
num_strings
; i++)
112
{
113
k_spectrum
[i]=SG_MALLOC(
float64_t
,
spec_size
);
114
memset(
k_spectrum
[i], 0,
sizeof
(
float64_t
)*
spec_size
);
115
116
int32_t len=0;
117
bool
free_fv;
118
uint16_t* fv=str->
get_feature_vector
(i, len, free_fv);
119
120
for
(int32_t j=0; j<len; j++)
121
k_spectrum
[i][fv[j]]++;
122
123
str->
free_feature_vector
(fv, i, free_fv);
124
125
if
(
use_normalization
)
126
{
127
float64_t
n=0;
128
for
(int32_t j=0; j<
spec_size
; j++)
129
n+=
CMath::sq
(
k_spectrum
[i][j]);
130
131
n=
CMath::sqrt
(n);
132
133
for
(int32_t j=0; j<
spec_size
; j++)
134
k_spectrum
[i][j]/=n;
135
}
136
}
137
}
138
139
void
CExplicitSpecFeatures::delete_kmer_spectrum
()
140
{
141
for
(int32_t i=0; i<
num_strings
; i++)
142
SG_FREE(
k_spectrum
[i]);
143
144
SG_FREE(
k_spectrum
);
145
k_spectrum
=NULL;
146
}
147
148
CFeatures
*
CExplicitSpecFeatures::duplicate
()
const
149
{
150
return
new
CExplicitSpecFeatures
(*
this
);
151
}
152
153
154
155
void
*
CExplicitSpecFeatures::get_feature_iterator
(int32_t vector_index)
156
{
157
SG_NOTIMPLEMENTED
158
return
NULL;
159
}
160
161
bool
CExplicitSpecFeatures::get_next_feature
(int32_t& index,
float64_t
& value,
void
* iterator)
162
{
163
SG_NOTIMPLEMENTED
164
return
false
;
165
}
166
167
void
CExplicitSpecFeatures::free_feature_iterator
(
void
* iterator)
168
{
169
SG_NOTIMPLEMENTED
170
}
171
172
int32_t
CExplicitSpecFeatures::get_nnz_features_for_vector
(int32_t num)
173
{
174
SG_NOTIMPLEMENTED
175
return
0;
176
}
177
178
EFeatureType
CExplicitSpecFeatures::get_feature_type
()
const
179
{
180
return
F_UNKNOWN
;
181
}
182
183
EFeatureClass
CExplicitSpecFeatures::get_feature_class
()
const
184
{
185
return
C_SPEC
;
186
}
187
188
int32_t
CExplicitSpecFeatures::get_num_vectors
()
const
189
{
190
return
num_strings
;
191
}
SHOGUN
机器学习工具包 - 项目文档