83: def scan_tokens encoder, options
84: state = options[:state] || @state
85: plain_string_content = @plain_string_content
86: in_tag = in_attribute = nil
87:
88: encoder.begin_group :string if state == :attribute_value_string
89:
90: until eos?
91:
92: if state != :in_special_tag && match = scan(/\s+/m)
93: encoder.text_token match, :space
94:
95: else
96:
97: case state
98:
99: when :initial
100: if match = scan(/<!--(?:.*?-->|.*)/m)
101: encoder.text_token match, :comment
102: elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
103: encoder.text_token match, :doctype
104: elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
105: encoder.text_token match, :preprocessor
106: elsif match = scan(/<\?(?:.*?\?>|.*)/m)
107: encoder.text_token match, :comment
108: elsif match = scan(/<\/[-\w.:]*>?/m)
109: in_tag = nil
110: encoder.text_token match, :tag
111: elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
112: encoder.text_token match, :tag
113: in_tag = self[1]
114: if self[2]
115: state = :in_special_tag if in_tag
116: else
117: state = :attribute
118: end
119: elsif match = scan(/[^<>&]+/)
120: encoder.text_token match, :plain
121: elsif match = scan(/#{ENTITY}/ox)
122: encoder.text_token match, :entity
123: elsif match = scan(/[<>&]/)
124: in_tag = nil
125: encoder.text_token match, :error
126: else
127: raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
128: end
129:
130: when :attribute
131: if match = scan(/#{TAG_END}/o)
132: encoder.text_token match, :tag
133: in_attribute = nil
134: if in_tag
135: state = :in_special_tag
136: else
137: state = :initial
138: end
139: elsif match = scan(/#{ATTR_NAME}/o)
140: in_attribute = IN_ATTRIBUTE[match]
141: encoder.text_token match, :attribute_name
142: state = :attribute_equal
143: else
144: in_tag = nil
145: encoder.text_token getch, :error
146: end
147:
148: when :attribute_equal
149: if match = scan(/=/)
150: encoder.text_token match, :operator
151: state = :attribute_value
152: elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
153: state = :attribute
154: next
155: else
156: encoder.text_token getch, :error
157: state = :attribute
158: end
159:
160: when :attribute_value
161: if match = scan(/#{ATTR_NAME}/o)
162: encoder.text_token match, :attribute_value
163: state = :attribute
164: elsif match = scan(/["']/)
165: if in_attribute == :script
166: encoder.begin_group :inline
167: encoder.text_token match, :inline_delimiter
168: if scan(/javascript:[ \t]*/)
169: encoder.text_token matched, :comment
170: end
171: code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
172: scan_java_script encoder, code
173: match = scan(/["']/)
174: encoder.text_token match, :inline_delimiter if match
175: encoder.end_group :inline
176: state = :attribute
177: in_attribute = nil
178: else
179: encoder.begin_group :string
180: state = :attribute_value_string
181: plain_string_content = PLAIN_STRING_CONTENT[match]
182: encoder.text_token match, :delimiter
183: end
184: elsif match = scan(/#{TAG_END}/o)
185: encoder.text_token match, :tag
186: state = :initial
187: else
188: encoder.text_token getch, :error
189: end
190:
191: when :attribute_value_string
192: if match = scan(plain_string_content)
193: encoder.text_token match, :content
194: elsif match = scan(/['"]/)
195: encoder.text_token match, :delimiter
196: encoder.end_group :string
197: state = :attribute
198: elsif match = scan(/#{ENTITY}/ox)
199: encoder.text_token match, :entity
200: elsif match = scan(/&/)
201: encoder.text_token match, :content
202: elsif match = scan(/[\n>]/)
203: encoder.end_group :string
204: state = :initial
205: encoder.text_token match, :error
206: end
207:
208: when :in_special_tag
209: case in_tag
210: when 'script'
211: encoder.text_token match, :space if match = scan(/[ \t]*\n/)
212: if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
213: code = self[2] || self[4]
214: closing = self[3]
215: encoder.text_token self[1], :comment
216: else
217: code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
218: closing = false
219: end
220: unless code.empty?
221: encoder.begin_group :inline
222: scan_java_script encoder, code
223: encoder.end_group :inline
224: end
225: encoder.text_token closing, :comment if closing
226: state = :initial
227: else
228: raise 'unknown special tag: %p' % [in_tag]
229: end
230:
231: else
232: raise_inspect 'Unknown state: %p' % [state], encoder
233:
234: end
235:
236: end
237:
238: end
239:
240: if options[:keep_state]
241: @state = state
242: @plain_string_content = plain_string_content
243: end
244:
245: encoder.end_group :string if state == :attribute_value_string
246:
247: encoder
248: end