SHOGUN
3.2.1
首页
相关页面
模块
类
文件
文件列表
文件成员
全部
类
命名空间
文件
函数
变量
类型定义
枚举
枚举值
友元
宏定义
组
页
src
shogun
lib
DelimiterTokenizer.h
浏览该文件的文档.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2013 Evangelos Anagnostopoulos
8
* Copyright (C) 2013 Evangelos Anagnostopoulos
9
*/
10
11
#ifndef _DELIMITERTOKENIZER__H__
12
#define _DELIMITERTOKENIZER__H__
13
14
#include <
shogun/lib/Tokenizer.h
>
15
16
namespace
shogun
17
{
18
class
CTokenizer;
19
26
class
CDelimiterTokenizer
:
public
CTokenizer
27
{
28
public
:
33
CDelimiterTokenizer
(
bool
skip_delimiters =
false
);
34
39
CDelimiterTokenizer
(
const
CDelimiterTokenizer
& orig);
40
42
virtual
~CDelimiterTokenizer
() {}
43
48
virtual
void
set_text
(
SGVector<char>
txt);
49
55
virtual
bool
has_next
();
56
65
virtual
index_t
next_token_idx
(
index_t
& start);
66
72
virtual
const
char
*
get_name
()
const
;
73
77
void
init_for_whitespace
();
78
79
CDelimiterTokenizer
*
get_copy
();
80
82
void
clear_delimiters
();
83
88
bool
get_skip_delimiters
()
const
;
89
94
void
set_skip_delimiters
(
bool
skip_delimiters);
95
96
private
:
97
void
init();
98
99
public
:
101
SGVector<bool>
delimiters
;
102
103
protected
:
105
index_t
last_idx
;
106
108
bool
skip_consecutive_delimiters
;
109
};
110
}
111
#endif
/* _WHITESPACETOKENIZER__H__ */
112
SHOGUN
机器学习工具包 - 项目文档