39: def scan_tokens encoder, options
40:
41: state = :initial
42: inline_block_stack = []
43: inline_block_paren_depth = nil
44: string_delimiter = nil
45: import_clause = class_name_follows = last_token = after_def = false
46: value_expected = true
47:
48: until eos?
49:
50: case state
51:
52: when :initial
53:
54: if match = scan(/ \s+ | \\\n /x)
55: encoder.text_token match, :space
56: if match.index ?\n
57: import_clause = after_def = false
58: value_expected = true unless value_expected
59: end
60: next
61:
62: elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
63: value_expected = true
64: after_def = false
65: encoder.text_token match, :comment
66:
67: elsif bol? && match = scan(/ \#!.* /x)
68: encoder.text_token match, :doctype
69:
70: elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
71: after_def = value_expected = false
72: encoder.text_token match, :include
73:
74: elsif match = scan(/ #{IDENT} | \[\] /ox)
75: kind = IDENT_KIND[match]
76: value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
77: if last_token == '.'
78: kind = :ident
79: elsif class_name_follows
80: kind = :class
81: class_name_follows = false
82: elsif after_def && check(/\s*[({]/)
83: kind = :method
84: after_def = false
85: elsif kind == :ident && last_token != '?' && check(/:/)
86: kind = :key
87: else
88: class_name_follows = true if match == 'class' || (import_clause && match == 'as')
89: import_clause = match == 'import'
90: after_def = true if match == 'def'
91: end
92: encoder.text_token match, kind
93:
94: elsif match = scan(/;/)
95: import_clause = after_def = false
96: value_expected = true
97: encoder.text_token match, :operator
98:
99: elsif match = scan(/\{/)
100: class_name_follows = after_def = false
101: value_expected = true
102: encoder.text_token match, :operator
103: if !inline_block_stack.empty?
104: inline_block_paren_depth += 1
105: end
106:
107:
108: elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
109: && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
110: value_expected = true
111: value_expected = :regexp if match == '~'
112: after_def = false
113: encoder.text_token match, :operator
114:
115: elsif match = scan(/ [)\]}] /x)
116: value_expected = after_def = false
117: if !inline_block_stack.empty? && match == '}'
118: inline_block_paren_depth -= 1
119: if inline_block_paren_depth == 0
120: encoder.text_token match, :inline_delimiter
121: encoder.end_group :inline
122: state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
123: next
124: end
125: end
126: encoder.text_token match, :operator
127:
128: elsif check(/[\d.]/)
129: after_def = value_expected = false
130: if match = scan(/0[xX][0-9A-Fa-f]+/)
131: encoder.text_token match, :hex
132: elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
133: encoder.text_token match, :octal
134: elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
135: encoder.text_token match, :float
136: elsif match = scan(/\d+[lLgG]?/)
137: encoder.text_token match, :integer
138: end
139:
140: elsif match = scan(/'''|"""/)
141: after_def = value_expected = false
142: state = :multiline_string
143: encoder.begin_group :string
144: string_delimiter = match
145: encoder.text_token match, :delimiter
146:
147:
148: elsif match = scan(/["']/)
149: after_def = value_expected = false
150: state = match == '/' ? :regexp : :string
151: encoder.begin_group state
152: string_delimiter = match
153: encoder.text_token match, :delimiter
154:
155: elsif value_expected && match = scan(/\//)
156: after_def = value_expected = false
157: encoder.begin_group :regexp
158: state = :regexp
159: string_delimiter = '/'
160: encoder.text_token match, :delimiter
161:
162: elsif match = scan(/ @ #{IDENT} /ox)
163: after_def = value_expected = false
164: encoder.text_token match, :annotation
165:
166: elsif match = scan(/\//)
167: after_def = false
168: value_expected = true
169: encoder.text_token match, :operator
170:
171: else
172: encoder.text_token getch, :error
173:
174: end
175:
176: when :string, :regexp, :multiline_string
177: if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
178: encoder.text_token match, :content
179:
180: elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
181: encoder.text_token match, :delimiter
182: if state == :regexp
183:
184: modifiers = scan(/[ix]+/)
185: encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
186: end
187: state = :string if state == :multiline_string
188: encoder.end_group state
189: string_delimiter = nil
190: after_def = value_expected = false
191: state = :initial
192: next
193:
194: elsif (state == :string || state == :multiline_string) &&
195: (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
196: if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
197: encoder.text_token match, :content
198: else
199: encoder.text_token match, :char
200: end
201: elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
202: encoder.text_token match, :char
203:
204: elsif match = scan(/ \$ #{IDENT} /mox)
205: encoder.begin_group :inline
206: encoder.text_token '$', :inline_delimiter
207: match = match[1..-1]
208: encoder.text_token match, IDENT_KIND[match]
209: encoder.end_group :inline
210: next
211: elsif match = scan(/ \$ \{ /x)
212: encoder.begin_group :inline
213: encoder.text_token match, :inline_delimiter
214: inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
215: inline_block_paren_depth = 1
216: state = :initial
217: next
218:
219: elsif match = scan(/ \$ /mx)
220: encoder.text_token match, :content
221:
222: elsif match = scan(/ \\. /mx)
223: encoder.text_token match, :content
224:
225: elsif match = scan(/ \\ | \n /x)
226: encoder.end_group state
227: encoder.text_token match, :error
228: after_def = value_expected = false
229: state = :initial
230:
231: else
232: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
233:
234: end
235:
236: else
237: raise_inspect 'Unknown state', encoder
238:
239: end
240:
241: last_token = match unless [:space, :comment, :doctype].include? kind
242:
243: end
244:
245: if [:multiline_string, :string, :regexp].include? state
246: encoder.end_group state
247: end
248:
249: encoder
250: end