44: def scan_tokens encoder, options
45:
46: state = :initial
47: label_expected = true
48: case_expected = false
49: label_expected_before_preproc_line = nil
50: in_preproc_line = false
51:
52: until eos?
53:
54: case state
55:
56: when :initial
57:
58: if match = scan(/ \s+ | \\\n /x)
59: if in_preproc_line && match != "\\\n" && match.index(?\n)
60: in_preproc_line = false
61: label_expected = label_expected_before_preproc_line
62: end
63: encoder.text_token match, :space
64:
65: elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
66: encoder.text_token match, :comment
67:
68: elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
69: label_expected = match =~ /[;\{\}]/
70: if case_expected
71: label_expected = true if match == ':'
72: case_expected = false
73: end
74: encoder.text_token match, :operator
75:
76: elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
77: kind = IDENT_KIND[match]
78: if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
79: kind = :label
80: match << matched
81: else
82: label_expected = false
83: if kind == :keyword
84: case match
85: when 'case', 'default'
86: case_expected = true
87: end
88: end
89: end
90: encoder.text_token match, kind
91:
92: elsif match = scan(/L?"/)
93: encoder.begin_group :string
94: if match[0] == ?L
95: encoder.text_token 'L', :modifier
96: match = '"'
97: end
98: encoder.text_token match, :delimiter
99: state = :string
100:
101: elsif match = scan(/ \# \s* if \s* 0 /x)
102: match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
103: encoder.text_token match, :comment
104:
105: elsif match = scan(/#[ \t]*(\w*)/)
106: encoder.text_token match, :preprocessor
107: in_preproc_line = true
108: label_expected_before_preproc_line = label_expected
109: state = :include_expected if self[1] == 'include'
110:
111: elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
112: label_expected = false
113: encoder.text_token match, :char
114:
115: elsif match = scan(/\$/)
116: encoder.text_token match, :ident
117:
118: elsif match = scan(/0[xX][0-9A-Fa-f]+/)
119: label_expected = false
120: encoder.text_token match, :hex
121:
122: elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
123: label_expected = false
124: encoder.text_token match, :octal
125:
126: elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
127: label_expected = false
128: encoder.text_token match, :integer
129:
130: elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
131: label_expected = false
132: encoder.text_token match, :float
133:
134: else
135: encoder.text_token getch, :error
136:
137: end
138:
139: when :string
140: if match = scan(/[^\\\n"]+/)
141: encoder.text_token match, :content
142: elsif match = scan(/"/)
143: encoder.text_token match, :delimiter
144: encoder.end_group :string
145: state = :initial
146: label_expected = false
147: elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
148: encoder.text_token match, :char
149: elsif match = scan(/ \\ | $ /x)
150: encoder.end_group :string
151: encoder.text_token match, :error
152: state = :initial
153: label_expected = false
154: else
155: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
156: end
157:
158: when :include_expected
159: if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
160: encoder.text_token match, :include
161: state = :initial
162:
163: elsif match = scan(/\s+/)
164: encoder.text_token match, :space
165: state = :initial if match.index ?\n
166:
167: else
168: state = :initial
169:
170: end
171:
172: else
173: raise_inspect 'Unknown state', encoder
174:
175: end
176:
177: end
178:
179: if state == :string
180: encoder.end_group :string
181: end
182:
183: encoder
184: end