SHOGUN
3.2.1
首页
相关页面
模块
类
文件
文件列表
文件成员
全部
类
命名空间
文件
函数
变量
类型定义
枚举
枚举值
友元
宏定义
组
页
src
shogun
classifier
vw
VwParser.h
浏览该文件的文档.
1
/*
2
* Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3
* embodied in the content of this file are licensed under the BSD
4
* (revised) open source license.
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* Written (W) 2011 Shashwat Lal Das
12
* Adaptation of Vowpal Wabbit v5.1.
13
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14
*/
15
16
#ifndef _VW_PARSER_H__
17
#define _VW_PARSER_H__
18
19
#include <
shogun/base/SGObject.h
>
20
#include <
shogun/io/SGIO.h
>
21
#include <
shogun/lib/Hash.h
>
22
#include <
shogun/classifier/vw/vw_common.h
>
23
#include <
shogun/classifier/vw/cache/VwCacheWriter.h
>
24
25
namespace
shogun
26
{
28
enum
E_VW_PARSER_TYPE
29
{
30
T_VW
= 1,
31
T_SVMLIGHT
= 2,
32
T_DENSE
= 3
33
};
34
46
class
CVwParser
:
public
CSGObject
47
{
48
public
:
52
CVwParser
();
53
59
CVwParser
(
CVwEnvironment
* env_to_use);
60
64
virtual
~CVwParser
();
65
71
CVwEnvironment
*
get_env
()
72
{
73
SG_REF
(
env
);
74
return
env
;
75
}
76
82
void
set_env
(
CVwEnvironment
* env_to_use)
83
{
84
env
= env_to_use;
85
SG_REF
(
env
);
86
}
87
94
void
set_cache_parameters
(
char
* fname,
EVwCacheType
type =
C_NATIVE
)
95
{
96
init_cache
(fname, type);
97
}
98
104
EVwCacheType
get_cache_type
()
105
{
106
return
cache_type
;
107
}
108
114
void
set_write_cache
(
bool
wr_cache)
115
{
116
write_cache
= wr_cache;
117
if
(wr_cache)
118
init_cache
(NULL);
119
else
120
if
(
cache_writer
)
121
SG_UNREF
(
cache_writer
);
122
}
123
129
bool
get_write_cache
()
130
{
131
return
write_cache
;
132
}
133
139
void
set_mm
(
float64_t
label)
140
{
141
env
->
min_label
=
CMath::min
(
env
->
min_label
, label);
142
if
(label != FLT_MAX)
143
env
->
max_label
=
CMath::max
(
env
->
max_label
, label);
144
}
145
152
void
noop_mm
(
float64_t
label) { }
153
160
void
set_minmax
(
float64_t
label)
161
{
162
set_mm
(label);
163
}
164
173
int32_t
read_features
(
CIOBuffer
* buf,
VwExample
*& ex);
174
183
int32_t
read_svmlight_features
(
CIOBuffer
* buf,
VwExample
*& ae);
184
193
int32_t
read_dense_features
(
CIOBuffer
* buf,
VwExample
*& ae);
194
200
virtual
const
char
*
get_name
()
const
{
return
"VwParser"
; }
201
202
protected
:
209
void
init_cache
(
char
* fname,
EVwCacheType
type =
C_NATIVE
);
210
219
void
feature_value
(
substring
&s,
v_array<substring>
& name,
float32_t
&v);
220
229
void
tokenize
(
char
delim,
substring
s,
v_array<substring>
&ret);
230
241
inline
char
*
safe_index
(
char
*start,
char
v,
char
*max)
242
{
243
while
(start != max && *start != v)
244
start++;
245
return
start;
246
}
247
248
public
:
250
hash_func_t
hasher
;
251
252
protected
:
254
CVwEnvironment
*
env
;
256
CVwCacheWriter
*
cache_writer
;
258
EVwCacheType
cache_type
;
260
bool
write_cache
;
261
262
private
:
264
v_array<substring>
channels;
265
v_array<substring>
words;
266
v_array<substring>
name;
267
};
268
269
}
270
#endif // _VW_PARSER_H__
SHOGUN
机器学习工具包 - 项目文档