51: def scan_tokens encoder, options
52:
53: state = :initial
54: string_delimiter = nil
55: package_name_expected = false
56: class_name_follows = false
57: last_token_dot = false
58:
59: until eos?
60:
61: case state
62:
63: when :initial
64:
65: if match = scan(/ \s+ | \\\n /x)
66: encoder.text_token match, :space
67: next
68:
69: elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
70: encoder.text_token match, :comment
71: next
72:
73: elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
74: encoder.text_token match, package_name_expected
75:
76: elsif match = scan(/ #{IDENT} | \[\] /ox)
77: kind = IDENT_KIND[match]
78: if last_token_dot
79: kind = :ident
80: elsif class_name_follows
81: kind = :class
82: class_name_follows = false
83: else
84: case match
85: when 'import'
86: package_name_expected = :include
87: when 'package'
88: package_name_expected = :namespace
89: when 'class', 'interface'
90: class_name_follows = true
91: end
92: end
93: encoder.text_token match, kind
94:
95: elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
96: encoder.text_token match, :operator
97:
98: elsif match = scan(/;/)
99: package_name_expected = false
100: encoder.text_token match, :operator
101:
102: elsif match = scan(/\{/)
103: class_name_follows = false
104: encoder.text_token match, :operator
105:
106: elsif check(/[\d.]/)
107: if match = scan(/0[xX][0-9A-Fa-f]+/)
108: encoder.text_token match, :hex
109: elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
110: encoder.text_token match, :octal
111: elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
112: encoder.text_token match, :float
113: elsif match = scan(/\d+[lL]?/)
114: encoder.text_token match, :integer
115: end
116:
117: elsif match = scan(/["']/)
118: state = :string
119: encoder.begin_group state
120: string_delimiter = match
121: encoder.text_token match, :delimiter
122:
123: elsif match = scan(/ @ #{IDENT} /ox)
124: encoder.text_token match, :annotation
125:
126: else
127: encoder.text_token getch, :error
128:
129: end
130:
131: when :string
132: if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
133: encoder.text_token match, :content
134: elsif match = scan(/["'\/]/)
135: encoder.text_token match, :delimiter
136: encoder.end_group state
137: state = :initial
138: string_delimiter = nil
139: elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
140: if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
141: encoder.text_token match, :content
142: else
143: encoder.text_token match, :char
144: end
145: elsif match = scan(/\\./m)
146: encoder.text_token match, :content
147: elsif match = scan(/ \\ | $ /x)
148: encoder.end_group state
149: state = :initial
150: encoder.text_token match, :error
151: else
152: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
153: end
154:
155: else
156: raise_inspect 'Unknown state', encoder
157:
158: end
159:
160: last_token_dot = match == '.'
161:
162: end
163:
164: if state == :string
165: encoder.end_group state
166: end
167:
168: encoder
169: end