SHOGUN
3.2.1
首页
相关页面
模块
类
文件
文件列表
文件成员
全部
类
命名空间
文件
函数
变量
类型定义
枚举
枚举值
友元
宏定义
组
页
src
shogun
features
HashedDocDotFeatures.h
浏览该文件的文档.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2013 Evangelos Anagnostopoulos
8
* Copyright (C) 2013 Evangelos Anagnostopoulos
9
*/
10
11
#ifndef _HASHEDDOCDOTFEATURES__H__
12
#define _HASHEDDOCDOTFEATURES__H__
13
14
#include <
shogun/features/DotFeatures.h
>
15
#include <
shogun/features/StringFeatures.h
>
16
#include <
shogun/converter/HashedDocConverter.h
>
17
#include <
shogun/lib/Tokenizer.h
>
18
19
namespace
shogun {
20
template
<
class
ST>
class
CStringFeatures;
21
template
<
class
ST>
class
SGMatrix;
22
class
CDotFeatures;
23
class
CHashedDocConverter;
24
class
CTokenizer;
25
36
class
CHashedDocDotFeatures
:
public
CDotFeatures
37
{
38
public
:
39
50
CHashedDocDotFeatures
(int32_t hash_bits=0,
CStringFeatures<char>
* docs=NULL,
51
CTokenizer
* tzer=NULL,
bool
normalize=
true
, int32_t n_grams=1, int32_t skips=0, int32_t size=0);
52
54
CHashedDocDotFeatures
(
const
CHashedDocDotFeatures
& orig);
55
60
CHashedDocDotFeatures
(
CFile
* loader);
61
63
virtual
~CHashedDocDotFeatures
();
64
72
virtual
int32_t
get_dim_feature_space
()
const
;
73
81
virtual
float64_t
dot
(int32_t vec_idx1,
CDotFeatures
* df, int32_t vec_idx2);
82
88
virtual
float64_t
dense_dot_sgvec
(int32_t vec_idx1,
const
SGVector<float64_t>
vec2);
89
96
virtual
float64_t
dense_dot
(int32_t vec_idx1,
const
float64_t
* vec2, int32_t vec2_len);
97
106
virtual
void
add_to_dense_vec
(
float64_t
alpha, int32_t vec_idx1,
float64_t
* vec2, int32_t vec2_len,
bool
abs_val=
false
);
107
115
virtual
int32_t
get_nnz_features_for_vector
(int32_t num);
116
127
virtual
void
*
get_feature_iterator
(int32_t vector_index);
128
140
virtual
bool
get_next_feature
(int32_t& index,
float64_t
& value,
void
* iterator);
141
148
virtual
void
free_feature_iterator
(
void
* iterator);
149
154
void
set_doc_collection
(
CStringFeatures<char>
* docs);
155
156
virtual
const
char
*
get_name
()
const
;
157
162
virtual
CFeatures
*
duplicate
()
const
;
163
168
virtual
EFeatureType
get_feature_type
()
const
;
169
174
virtual
EFeatureClass
get_feature_class
()
const
;
175
180
virtual
int32_t
get_num_vectors
()
const
;
181
190
static
uint32_t
calculate_token_hash
(
char
* token, int32_t length,
191
int32_t
num_bits
, uint32_t seed);
192
193
private
:
194
void
init(int32_t hash_bits,
CStringFeatures<char>
* docs,
CTokenizer
* tzer,
195
bool
normalize, int32_t n_grams, int32_t skips);
196
197
protected
:
199
CStringFeatures<char>
*
doc_collection
;
200
202
int32_t
num_bits
;
203
205
CTokenizer
*
tokenizer
;
206
208
bool
should_normalize
;
209
211
int32_t
ngrams
;
212
214
int32_t
tokens_to_skip
;
215
};
216
}
217
218
#endif
SHOGUN
机器学习工具包 - 项目文档