Class CodeRay::Scanners::HTML
In: lib/coderay/scanners/html.rb
Parent: Scanner

HTML Scanner

Alias: xhtml

See also: Scanners::XML

Methods

Constants

EVENT_ATTRIBUTES = %w( onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick ondrag ondragdrop ondragend ondragenter ondragleave ondragover ondragstart ondrop ondurationchange onemptied onended onerror onfocus onformchange onforminput onhashchange oninput oninvalid onkeydown onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart onmessage onmousedown onmousemove onmouseout onmouseover onmouseup onmousewheel onmove onoffline ononline onpagehide onpageshow onpause onplay onplaying onpopstate onprogress onratechange onreadystatechange onredo onreset onresize onscroll onseeked onseeking onselect onshow onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload onvolumechange onwaiting )
IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil). add(EVENT_ATTRIBUTES, :script)

Public Instance methods

[Source]

    # File lib/coderay/scanners/html.rb, line 61
61:     def reset
62:       super
63:       @state = :initial
64:       @plain_string_content = nil
65:     end

Protected Instance methods

[Source]

    # File lib/coderay/scanners/html.rb, line 74
74:     def scan_java_script encoder, code
75:       if code && !code.empty?
76:         @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
77:         # encoder.begin_group :inline
78:         @java_script_scanner.tokenize code, :tokens => encoder
79:         # encoder.end_group :inline
80:       end
81:     end

[Source]

     # File lib/coderay/scanners/html.rb, line 83
 83:     def scan_tokens encoder, options
 84:       state = options[:state] || @state
 85:       plain_string_content = @plain_string_content
 86:       in_tag = in_attribute = nil
 87:       
 88:       encoder.begin_group :string if state == :attribute_value_string
 89:       
 90:       until eos?
 91:         
 92:         if state != :in_special_tag && match = scan(/\s+/m)
 93:           encoder.text_token match, :space
 94:           
 95:         else
 96:           
 97:           case state
 98:           
 99:           when :initial
100:             if match = scan(/<!--(?:.*?-->|.*)/m)
101:               encoder.text_token match, :comment
102:             elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
103:               encoder.text_token match, :doctype
104:             elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
105:               encoder.text_token match, :preprocessor
106:             elsif match = scan(/<\?(?:.*?\?>|.*)/m)
107:               encoder.text_token match, :comment
108:             elsif match = scan(/<\/[-\w.:]*>?/m)
109:               in_tag = nil
110:               encoder.text_token match, :tag
111:             elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
112:               encoder.text_token match, :tag
113:               in_tag = self[1]
114:               if self[2]
115:                 state = :in_special_tag if in_tag
116:               else
117:                 state = :attribute
118:               end
119:             elsif match = scan(/[^<>&]+/)
120:               encoder.text_token match, :plain
121:             elsif match = scan(/#{ENTITY}/ox)
122:               encoder.text_token match, :entity
123:             elsif match = scan(/[<>&]/)
124:               in_tag = nil
125:               encoder.text_token match, :error
126:             else
127:               raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
128:             end
129:             
130:           when :attribute
131:             if match = scan(/#{TAG_END}/o)
132:               encoder.text_token match, :tag
133:               in_attribute = nil
134:               if in_tag
135:                 state = :in_special_tag
136:               else
137:                 state = :initial
138:               end
139:             elsif match = scan(/#{ATTR_NAME}/o)
140:               in_attribute = IN_ATTRIBUTE[match]
141:               encoder.text_token match, :attribute_name
142:               state = :attribute_equal
143:             else
144:               in_tag = nil
145:               encoder.text_token getch, :error
146:             end
147:             
148:           when :attribute_equal
149:             if match = scan(/=/)  #/
150:               encoder.text_token match, :operator
151:               state = :attribute_value
152:             elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
153:               state = :attribute
154:               next
155:             else
156:               encoder.text_token getch, :error
157:               state = :attribute
158:             end
159:             
160:           when :attribute_value
161:             if match = scan(/#{ATTR_NAME}/o)
162:               encoder.text_token match, :attribute_value
163:               state = :attribute
164:             elsif match = scan(/["']/)
165:               if in_attribute == :script
166:                 encoder.begin_group :inline
167:                 encoder.text_token match, :inline_delimiter
168:                 if scan(/javascript:[ \t]*/)
169:                   encoder.text_token matched, :comment
170:                 end
171:                 code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
172:                 scan_java_script encoder, code
173:                 match = scan(/["']/)
174:                 encoder.text_token match, :inline_delimiter if match
175:                 encoder.end_group :inline
176:                 state = :attribute
177:                 in_attribute = nil
178:               else
179:                 encoder.begin_group :string
180:                 state = :attribute_value_string
181:                 plain_string_content = PLAIN_STRING_CONTENT[match]
182:                 encoder.text_token match, :delimiter
183:               end
184:             elsif match = scan(/#{TAG_END}/o)
185:               encoder.text_token match, :tag
186:               state = :initial
187:             else
188:               encoder.text_token getch, :error
189:             end
190:             
191:           when :attribute_value_string
192:             if match = scan(plain_string_content)
193:               encoder.text_token match, :content
194:             elsif match = scan(/['"]/)
195:               encoder.text_token match, :delimiter
196:               encoder.end_group :string
197:               state = :attribute
198:             elsif match = scan(/#{ENTITY}/ox)
199:               encoder.text_token match, :entity
200:             elsif match = scan(/&/)
201:               encoder.text_token match, :content
202:             elsif match = scan(/[\n>]/)
203:               encoder.end_group :string
204:               state = :initial
205:               encoder.text_token match, :error
206:             end
207:             
208:           when :in_special_tag
209:             case in_tag
210:             when 'script'
211:               encoder.text_token match, :space if match = scan(/[ \t]*\n/)
212:               if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
213:                 code = self[2] || self[4]
214:                 closing = self[3]
215:                 encoder.text_token self[1], :comment
216:               else
217:                 code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
218:                 closing = false
219:               end
220:               unless code.empty?
221:                 encoder.begin_group :inline
222:                 scan_java_script encoder, code
223:                 encoder.end_group :inline
224:               end
225:               encoder.text_token closing, :comment if closing
226:               state = :initial
227:             else
228:               raise 'unknown special tag: %p' % [in_tag]
229:             end
230:             
231:           else
232:             raise_inspect 'Unknown state: %p' % [state], encoder
233:             
234:           end
235:           
236:         end
237:         
238:       end
239:       
240:       if options[:keep_state]
241:         @state = state
242:         @plain_string_content = plain_string_content
243:       end
244:       
245:       encoder.end_group :string if state == :attribute_value_string
246:       
247:       encoder
248:     end

[Source]

    # File lib/coderay/scanners/html.rb, line 69
69:     def setup
70:       @state = :initial
71:       @plain_string_content = nil
72:     end

[Validate]