ANTLR Support Libraries 2.7.1+
CharScanner.hpp
Go to the documentation of this file.
1 #ifndef INC_CharScanner_hpp__
2 #define INC_CharScanner_hpp__
3 
4 /* ANTLR Translator Generator
5  * Project led by Terence Parr at http://www.jGuru.com
6  * Software rights: http://www.antlr.org/license.html
7  *
8  * $Id: //depot/code/org.antlr/release/antlr-2.7.7/lib/cpp/antlr/CharScanner.hpp#2 $
9  */
10 
11 #include <antlr/config.hpp>
12 
13 #include <map>
14 #include <strings.h>
15 #include <cstdio>
16 
17 #ifdef HAS_NOT_CCTYPE_H
18 #include <ctype.h>
19 #else
20 #include <cctype>
21 #endif
22 
23 #if ( _MSC_VER == 1200 )
24 // VC6 seems to need this
25 // note that this is not a standard C++ include file.
26 # include <stdio.h>
27 #endif
28 
29 #include <antlr/TokenStream.hpp>
33 #include <antlr/InputBuffer.hpp>
34 #include <antlr/BitSet.hpp>
36 
37 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
38 namespace antlr {
39 #endif
40 
42 
43 ANTLR_C_USING(tolower)
44 
45 #ifdef ANTLR_REALLY_NO_STRCASECMP
46 // Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
47 // on the mac has neither...
48 inline int strcasecmp(const char *s1, const char *s2)
49 {
50  while (true)
51  {
52  char c1 = tolower(*s1++),
53  c2 = tolower(*s2++);
54  if (c1 < c2) return -1;
55  if (c1 > c2) return 1;
56  if (c1 == 0) return 0;
57  }
58 }
59 #else
60 #ifdef NO_STRCASECMP
61 ANTLR_C_USING(stricmp)
62 #else
63 ANTLR_C_USING(strcasecmp)
64 #endif
65 #endif
66 
69 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
70 private:
72 public:
73 #ifdef NO_TEMPLATE_PARTS
74  CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
75 #endif
77  : scanner(theScanner)
78  {
79  }
80  bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
81 // defaults are good enough..
82  // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
83  // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
84 };
85 
89 protected:
90  typedef RefToken (*factory_type)();
91 public:
92  CharScanner(InputBuffer& cb, bool case_sensitive );
93  CharScanner(InputBuffer* cb, bool case_sensitive );
94  CharScanner(const LexerSharedInputState& state, bool case_sensitive );
95 
96  virtual ~CharScanner()
97  {
98  }
99 
100  virtual int LA(unsigned int i);
101 
102  virtual void append(char c)
103  {
104  if (saveConsumedInput)
105  {
106  size_t l = text.length();
107 
108  if ((l%256) == 0)
109  text.reserve(l+256);
110 
111  text.replace(l,0,&c,1);
112  }
113  }
114 
115  virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
116  {
117  if( saveConsumedInput )
118  text += s;
119  }
120 
121  virtual void commit()
122  {
123  inputState->getInput().commit();
124  }
125 
129  virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
130  {
131  consume();
132  consumeUntil(tokenSet);
133  }
134 
135  virtual void consume()
136  {
137  if (inputState->guessing == 0)
138  {
139  int c = LA(1);
140  if (caseSensitive)
141  {
142  append(c);
143  }
144  else
145  {
146  // use input.LA(), not LA(), to get original case
147  // CharScanner.LA() would toLower it.
148  append(inputState->getInput().LA(1));
149  }
150 
151  // RK: in a sense I don't like this automatic handling.
152  if (c == '\t')
153  tab();
154  else
155  inputState->column++;
156  }
157  inputState->getInput().consume();
158  }
159 
161  virtual void consumeUntil(int c)
162  {
163  for(;;)
164  {
165  int la_1 = LA(1);
166  if( la_1 == EOF_CHAR || la_1 == c )
167  break;
168  consume();
169  }
170  }
171 
173  virtual void consumeUntil(const BitSet& set)
174  {
175  for(;;)
176  {
177  int la_1 = LA(1);
178  if( la_1 == EOF_CHAR || set.member(la_1) )
179  break;
180  consume();
181  }
182  }
183 
185  virtual unsigned int mark()
186  {
187  return inputState->getInput().mark();
188  }
190  virtual void rewind(unsigned int pos)
191  {
192  inputState->getInput().rewind(pos);
193  }
194 
196  virtual void match(int c)
197  {
198  int la_1 = LA(1);
199  if ( la_1 != c )
200  throw MismatchedCharException(la_1, c, false, this);
201  consume();
202  }
203 
207  virtual void match(const BitSet& b)
208  {
209  int la_1 = LA(1);
210 
211  if ( !b.member(la_1) )
212  throw MismatchedCharException( la_1, b, false, this );
213  consume();
214  }
215 
219  virtual void match( const char* s )
220  {
221  while( *s != '\0' )
222  {
223  // the & 0xFF is here to prevent sign extension lateron
224  int la_1 = LA(1), c = (*s++ & 0xFF);
225 
226  if ( la_1 != c )
227  throw MismatchedCharException(la_1, c, false, this);
228 
229  consume();
230  }
231  }
235  virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
236  {
237  size_t len = s.length();
238 
239  for (size_t i = 0; i < len; i++)
240  {
241  // the & 0xFF is here to prevent sign extension lateron
242  int la_1 = LA(1), c = (s[i] & 0xFF);
243 
244  if ( la_1 != c )
245  throw MismatchedCharException(la_1, c, false, this);
246 
247  consume();
248  }
249  }
253  virtual void matchNot(int c)
254  {
255  int la_1 = LA(1);
256 
257  if ( la_1 == c )
258  throw MismatchedCharException(la_1, c, true, this);
259 
260  consume();
261  }
265  virtual void matchRange(int c1, int c2)
266  {
267  int la_1 = LA(1);
268 
269  if ( la_1 < c1 || la_1 > c2 )
270  throw MismatchedCharException(la_1, c1, c2, false, this);
271 
272  consume();
273  }
274 
275  virtual bool getCaseSensitive() const
276  {
277  return caseSensitive;
278  }
279 
280  virtual void setCaseSensitive(bool t)
281  {
282  caseSensitive = t;
283  }
284 
285  virtual bool getCaseSensitiveLiterals() const=0;
286 
288  virtual int getLine() const
289  {
290  return inputState->line;
291  }
292 
294  virtual void setLine(int l)
295  {
296  inputState->line = l;
297  }
298 
300  virtual int getColumn() const
301  {
302  return inputState->column;
303  }
305  virtual void setColumn(int c)
306  {
307  inputState->column = c;
308  }
309 
311  virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
312  {
313  return inputState->filename;
314  }
316  virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
317  {
318  inputState->filename = f;
319  }
320 
321  virtual bool getCommitToPath() const
322  {
323  return commitToPath;
324  }
325 
326  virtual void setCommitToPath(bool commit)
327  {
328  commitToPath = commit;
329  }
330 
332  virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
333  {
334  return text;
335  }
336 
337  virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
338  {
339  text = s;
340  }
341 
342  virtual void resetText()
343  {
344  text = "";
345  inputState->tokenStartColumn = inputState->column;
346  inputState->tokenStartLine = inputState->line;
347  }
348 
349  virtual RefToken getTokenObject() const
350  {
351  return _returnToken;
352  }
353 
357  virtual void newline()
358  {
359  ++inputState->line;
360  inputState->column = 1;
361  }
362 
367  virtual void tab()
368  {
369  int c = getColumn();
370  int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
371  setColumn( nc );
372  }
374  int setTabsize( int size )
375  {
376  int oldsize = tabsize;
377  tabsize = size;
378  return oldsize;
379  }
381  int getTabSize() const
382  {
383  return tabsize;
384  }
385 
387  virtual void reportError(const RecognitionException& e);
388 
390  virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
391 
393  virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
394 
396  {
397  return inputState->getInput();
398  }
399 
401  {
402  return inputState;
403  }
404 
408  {
409  inputState = state;
410  }
411 
413  virtual void setTokenObjectFactory(factory_type factory)
414  {
415  tokenFactory = factory;
416  }
417 
421  virtual int testLiteralsTable(int ttype) const
422  {
423  ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
424  if (i != literals.end())
425  ttype = (*i).second;
426  return ttype;
427  }
428 
434  virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
435  {
436  ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
437  if (i != literals.end())
438  ttype = (*i).second;
439  return ttype;
440  }
441 
443  virtual int toLower(int c) const
444  {
445  // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
446  // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
447  // this one is more structural. Maybe make this configurable.
448  return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
449  }
450 
466  virtual void uponEOF()
467  {
468  }
469 
471  virtual void traceIndent();
472  virtual void traceIn(const char* rname);
473  virtual void traceOut(const char* rname);
474 
475 #ifndef NO_STATIC_CONSTS
476  static const int EOF_CHAR = EOF;
477 #else
478  enum {
479  EOF_CHAR = EOF
480  };
481 #endif
482 protected:
484  bool saveConsumedInput;
486  factory_type tokenFactory;
488  ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
489 
491 
494 
500 
501  int tabsize;
502 
504  virtual RefToken makeToken(int t)
505  {
506  RefToken tok = tokenFactory();
507  tok->setType(t);
508  tok->setColumn(inputState->tokenStartColumn);
509  tok->setLine(inputState->tokenStartLine);
510  return tok;
511  }
512 
515  class Tracer {
516  private:
518  const char* text;
519 
520  Tracer(const Tracer& other); // undefined
521  Tracer& operator=(const Tracer& other); // undefined
522  public:
523  Tracer( CharScanner* p,const char* t )
524  : parser(p), text(t)
525  {
526  parser->traceIn(text);
527  }
529  {
530  parser->traceOut(text);
531  }
532  };
533 
535 private:
536  CharScanner( const CharScanner& other ); // undefined
537  CharScanner& operator=( const CharScanner& other ); // undefined
538 
539 #ifndef NO_STATIC_CONSTS
540  static const int NO_CHAR = 0;
541 #else
542  enum {
543  NO_CHAR = 0
544  };
545 #endif
546 };
547 
548 inline int CharScanner::LA(unsigned int i)
549 {
550  int c = inputState->getInput().LA(i);
551 
552  if ( caseSensitive )
553  return c;
554  else
555  return toLower(c); // VC 6 tolower bug caught in toLower.
556 }
557 
558 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
559 {
560  if (scanner->getCaseSensitiveLiterals())
561  return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
562  else
563  {
564 #ifdef NO_STRCASECMP
565  return (stricmp(x.c_str(),y.c_str())<0);
566 #else
567  return (strcasecmp(x.c_str(),y.c_str())<0);
568 #endif
569  }
570 }
571 
572 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
573 }
574 #endif
575 
576 #endif //INC_CharScanner_hpp__
virtual void consumeUntil(const BitSet &set)
Definition: CharScanner.hpp:173
virtual RefToken getTokenObject() const
Definition: CharScanner.hpp:349
virtual int getLine() const
Get the line the scanner currently is in (starts at 1)
Definition: CharScanner.hpp:288
virtual void match(const char *s)
Definition: CharScanner.hpp:219
virtual void match(int c)
See if input contains character &#39;c&#39; throw MismatchedCharException if not.
Definition: CharScanner.hpp:196
virtual void newline()
Definition: CharScanner.hpp:357
Definition: ANTLRException.hpp:15
Definition: TokenRefCount.hpp:43
virtual void setColumn(int c)
set the column number
Definition: CharScanner.hpp:305
virtual void rewind(unsigned int pos)
Rewind the scanner to a previously marked position.
Definition: CharScanner.hpp:190
virtual bool getCaseSensitive() const
Definition: CharScanner.hpp:275
virtual void setTokenObjectFactory(factory_type factory)
Set the factory for created tokens.
Definition: CharScanner.hpp:413
virtual void match(const BitSet &b)
Definition: CharScanner.hpp:207
virtual int testLiteralsTable(const std ::string &txt, int ttype) const
Definition: CharScanner.hpp:434
virtual int toLower(int c) const
Override this method to get more specific case handling.
Definition: CharScanner.hpp:443
virtual bool getCaseSensitiveLiterals() const =0
Definition: InputBuffer.hpp:31
~Tracer()
Definition: CharScanner.hpp:528
virtual void setCommitToPath(bool commit)
Definition: CharScanner.hpp:326
factory_type tokenFactory
Factory for tokens.
Definition: CharScanner.hpp:486
virtual int getColumn() const
Get the column the scanner currently is in (starts at 1)
Definition: CharScanner.hpp:300
Tracer(CharScanner *p, const char *t)
Definition: CharScanner.hpp:523
virtual void tab()
Definition: CharScanner.hpp:367
virtual const std ::string & getText() const
Definition: CharScanner.hpp:332
virtual void consumeUntil(int c)
Definition: CharScanner.hpp:161
bool commitToPath
Definition: CharScanner.hpp:499
int getTabSize() const
Return the tabsize used by the scanner.
Definition: CharScanner.hpp:381
bool operator()(const std ::string &x, const std ::string &y) const
Definition: CharScanner.hpp:558
virtual LexerSharedInputState getInputState()
Definition: CharScanner.hpp:400
#define ANTLR_API
Definition: config.hpp:22
virtual void append(char c)
Definition: CharScanner.hpp:102
Definition: CharScanner.hpp:69
int tokenStartLine
Definition: LexerSharedInputState.hpp:128
std ::map< std ::string, int, CharScannerLiteralsLess > literals
Definition: CharScanner.hpp:488
virtual void match(const std ::string &s)
Definition: CharScanner.hpp:235
virtual void consume()
Definition: CharScanner.hpp:135
Definition: CharScanner.hpp:88
int tokenStartColumn
Definition: LexerSharedInputState.hpp:127
virtual const std ::string & getFilename() const
get the filename for the file currently used
Definition: CharScanner.hpp:311
virtual int LA(unsigned int i)
Definition: CharScanner.hpp:548
virtual void recover(const RecognitionException &ex, const BitSet &tokenSet)
Definition: CharScanner.hpp:129
virtual void setInputState(LexerSharedInputState state)
Definition: CharScanner.hpp:407
virtual void setFilename(const std ::string &f)
Set the filename the scanner is using (used in error messages)
Definition: CharScanner.hpp:316
Definition: MismatchedCharException.hpp:21
virtual unsigned int mark()
Mark the current position and return a id for it.
Definition: CharScanner.hpp:185
int setTabsize(int size)
set the tabsize. Returns the old tabsize
Definition: CharScanner.hpp:374
virtual ~CharScanner()
Definition: CharScanner.hpp:96
virtual bool getCommitToPath() const
Definition: CharScanner.hpp:321
virtual void setCaseSensitive(bool t)
Definition: CharScanner.hpp:280
CharScanner * parser
Definition: CharScanner.hpp:517
virtual void traceOut(const char *rname)
Definition: CharScanner.cpp:92
virtual void resetText()
Definition: CharScanner.hpp:342
TokenRefCount< Token > RefToken
Definition: TokenRefCount.hpp:92
const CharScanner * scanner
Definition: CharScanner.hpp:71
virtual void matchNot(int c)
Definition: CharScanner.hpp:253
virtual void uponEOF()
Definition: CharScanner.hpp:466
std ::string text
Definition: CharScanner.hpp:483
bool caseSensitive
Is this lexer case sensitive.
Definition: CharScanner.hpp:487
const char * text
Definition: CharScanner.hpp:518
virtual void commit()
Definition: CharScanner.hpp:121
Definition: TokenStream.hpp:22
virtual void matchRange(int c1, int c2)
Definition: CharScanner.hpp:265
Definition: BitSet.hpp:40
virtual RefToken makeToken(int t)
Create a new RefToken of type t.
Definition: CharScanner.hpp:504
bool member(unsigned int el) const
Definition: BitSet.cpp:40
Definition: RecognitionException.hpp:18
int traceDepth
Definition: CharScanner.hpp:534
virtual InputBuffer & getInputBuffer()
Definition: CharScanner.hpp:395
virtual int testLiteralsTable(int ttype) const
Definition: CharScanner.hpp:421
CharScannerLiteralsLess(const CharScanner *theScanner)
Definition: CharScanner.hpp:76
RefToken _returnToken
used to return tokens w/o using return val
Definition: CharScanner.hpp:490
virtual void setText(const std ::string &s)
Definition: CharScanner.hpp:337
#define ANTLR_USE_NAMESPACE(_x_)
Definition: config.hpp:18
virtual void append(const std ::string &s)
Definition: CharScanner.hpp:115
int tabsize
tab size the scanner uses.
Definition: CharScanner.hpp:501
LexerSharedInputState inputState
Input state, gives access to input stream, shared among different lexers.
Definition: CharScanner.hpp:493
#define ANTLR_C_USING(_x_)
Definition: config.hpp:21
virtual void setLine(int l)
set the line number
Definition: CharScanner.hpp:294
virtual void traceIn(const char *rname)
Definition: CharScanner.cpp:84
Definition: CharScanner.hpp:515