54: def scan_tokens encoder, options
55:
56: state = :initial
57: label_expected = true
58: case_expected = false
59: label_expected_before_preproc_line = nil
60: in_preproc_line = false
61:
62: until eos?
63:
64: case state
65:
66: when :initial
67:
68: if match = scan(/ \s+ | \\\n /x)
69: if in_preproc_line && match != "\\\n" && match.index(?\n)
70: in_preproc_line = false
71: label_expected = label_expected_before_preproc_line
72: end
73: encoder.text_token match, :space
74:
75: elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
76: encoder.text_token match, :comment
77:
78: elsif match = scan(/ \# \s* if \s* 0 /x)
79: match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
80: encoder.text_token match, :comment
81:
82: elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
83: label_expected = match =~ /[;\{\}]/
84: if case_expected
85: label_expected = true if match == ':'
86: case_expected = false
87: end
88: encoder.text_token match, :operator
89:
90: elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
91: kind = IDENT_KIND[match]
92: if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
93: kind = :label
94: match << matched
95: else
96: label_expected = false
97: if kind == :keyword
98: case match
99: when 'class'
100: state = :class_name_expected
101: when 'case', 'default'
102: case_expected = true
103: end
104: end
105: end
106: encoder.text_token match, kind
107:
108: elsif match = scan(/\$/)
109: encoder.text_token match, :ident
110:
111: elsif match = scan(/L?"/)
112: encoder.begin_group :string
113: if match[0] == ?L
114: encoder.text_token match, 'L', :modifier
115: match = '"'
116: end
117: state = :string
118: encoder.text_token match, :delimiter
119:
120: elsif match = scan(/#[ \t]*(\w*)/)
121: encoder.text_token match, :preprocessor
122: in_preproc_line = true
123: label_expected_before_preproc_line = label_expected
124: state = :include_expected if self[1] == 'include'
125:
126: elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
127: label_expected = false
128: encoder.text_token match, :char
129:
130: elsif match = scan(/0[xX][0-9A-Fa-f]+/)
131: label_expected = false
132: encoder.text_token match, :hex
133:
134: elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
135: label_expected = false
136: encoder.text_token match, :octal
137:
138: elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
139: label_expected = false
140: encoder.text_token match, :integer
141:
142: elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
143: label_expected = false
144: encoder.text_token match, :float
145:
146: else
147: encoder.text_token getch, :error
148:
149: end
150:
151: when :string
152: if match = scan(/[^\\"]+/)
153: encoder.text_token match, :content
154: elsif match = scan(/"/)
155: encoder.text_token match, :delimiter
156: encoder.end_group :string
157: state = :initial
158: label_expected = false
159: elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
160: encoder.text_token match, :char
161: elsif match = scan(/ \\ | $ /x)
162: encoder.end_group :string
163: encoder.text_token match, :error
164: state = :initial
165: label_expected = false
166: else
167: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
168: end
169:
170: when :include_expected
171: if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
172: encoder.text_token match, :include
173: state = :initial
174:
175: elsif match = scan(/\s+/)
176: encoder.text_token match, :space
177: state = :initial if match.index ?\n
178:
179: else
180: state = :initial
181:
182: end
183:
184: when :class_name_expected
185: if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
186: encoder.text_token match, :class
187: state = :initial
188:
189: elsif match = scan(/\s+/)
190: encoder.text_token match, :space
191:
192: else
193: encoder.text_token getch, :error
194: state = :initial
195:
196: end
197:
198: else
199: raise_inspect 'Unknown state', encoder
200:
201: end
202:
203: end
204:
205: if state == :string
206: encoder.end_group :string
207: end
208:
209: encoder
210: end