SHOGUN
3.2.1
首页
相关页面
模块
类
文件
文件列表
文件成员
全部
类
命名空间
文件
函数
变量
类型定义
枚举
枚举值
友元
宏定义
组
页
src
shogun
features
streaming
StreamingHashedDocDotFeatures.h
浏览该文件的文档.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2013 Evangelos Anagnostopoulos
8
* Copyright (C) 2013 Evangelos Anagnostopoulos
9
*/
10
#ifndef _STREAMING_HASHEDDOCDOTFEATURES__H__
11
#define _STREAMING_HASHEDDOCDOTFEATURES__H__
12
13
#include <
shogun/features/StringFeatures.h
>
14
#include <
shogun/features/streaming/StreamingDotFeatures.h
>
15
#include <
shogun/lib/Tokenizer.h
>
16
#include <
shogun/converter/HashedDocConverter.h
>
17
#include <
shogun/io/streaming/InputParser.h
>
18
#include <
shogun/io/streaming/StreamingFileFromStringFeatures.h
>
19
20
namespace
shogun
21
{
22
class
CStreamingDotFeatures;
23
class
CTokenizer;
24
class
CHashedDocConverter;
25
40
class
CStreamingHashedDocDotFeatures
:
public
CStreamingDotFeatures
41
{
42
public
:
44
CStreamingHashedDocDotFeatures
();
45
57
CStreamingHashedDocDotFeatures
(
CStreamingFile
* file,
bool
is_labelled, int32_t size,
58
CTokenizer
* tzer, int32_t bits=20);
59
76
CStreamingHashedDocDotFeatures
(
CStringFeatures<char>
* dot_features,
CTokenizer
* tzer,
77
int32_t bits=20,
float64_t
* lab=NULL);
78
80
virtual
~CStreamingHashedDocDotFeatures
();
81
88
virtual
float32_t
dot
(
CStreamingDotFeatures
* df);
89
95
virtual
float32_t
dense_dot
(
const
float32_t
* vec2, int32_t vec2_len);
96
104
virtual
void
add_to_dense_vec
(
float32_t
alpha,
float32_t
* vec2,
105
int32_t vec2_len,
bool
abs_val=
false
);
106
114
virtual
int32_t
get_dim_feature_space
()
const
;
115
121
virtual
const
char
*
get_name
()
const
;
122
128
virtual
int32_t
get_num_vectors
()
const
;
129
135
virtual
CFeatures
*
duplicate
()
const
;
136
146
virtual
void
set_vector_reader
();
147
157
virtual
void
set_vector_and_label_reader
();
158
164
virtual
EFeatureType
get_feature_type
()
const
;
165
171
virtual
EFeatureClass
get_feature_class
()
const
;
172
177
virtual
void
start_parser
();
178
182
virtual
void
end_parser
();
183
191
virtual
float64_t
get_label
();
192
198
virtual
bool
get_next_example
();
199
205
virtual
void
release_example
();
206
212
virtual
int32_t
get_num_features
();
213
218
SGSparseVector<float64_t>
get_vector
();
219
224
void
set_normalization
(
bool
normalize);
225
233
void
set_k_skip_n_grams
(int32_t k, int32_t n);
234
235
private
:
236
void
init(
CStreamingFile
* file,
bool
is_labelled, int32_t size,
CTokenizer
* tzer,
237
int32_t bits,
bool
normalize, int32_t n_grams, int32_t skips);
238
239
protected
:
240
242
int32_t
num_bits
;
243
245
SGSparseVector<float64_t>
current_vector
;
246
248
CTokenizer
*
tokenizer
;
249
251
CHashedDocConverter
*
converter
;
252
254
CInputParser<char>
parser
;
255
257
float64_t
current_label
;
258
};
259
}
260
261
#endif // _STREAMING_HASHEDDOCDOTFEATURES__H__
SHOGUN
机器学习工具包 - 项目文档