234: def scan_tokens encoder, options
235:
236: if check(RE::PHP_START) ||
237: (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) ||
238: check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
239: check(/.{1,100}#{RE::PHP_START}/om)
240:
241: states = [:initial]
242: else
243:
244: states = [:initial, :php]
245: end
246:
247: label_expected = true
248: case_expected = false
249:
250: heredoc_delimiter = nil
251: delimiter = nil
252: modifier = nil
253:
254: until eos?
255:
256: case states.last
257:
258: when :initial
259: if match = scan(RE::PHP_START)
260: encoder.text_token match, :inline_delimiter
261: label_expected = true
262: states << :php
263: else
264: match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
265: @html_scanner.tokenize match unless match.empty?
266: end
267:
268: when :php
269: if match = scan(/\s+/)
270: encoder.text_token match, :space
271:
272: elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
273: encoder.text_token match, :comment
274:
275: elsif match = scan(RE::IDENTIFIER)
276: kind = Words::IDENT_KIND[match]
277: if kind == :ident && label_expected && check(/:(?!:)/)
278: kind = :label
279: label_expected = true
280: else
281: label_expected = false
282: if kind == :ident && match =~ /^[A-Z]/
283: kind = :constant
284: elsif kind == :keyword
285: case match
286: when 'class'
287: states << :class_expected
288: when 'function'
289: states << :function_expected
290: when 'case', 'default'
291: case_expected = true
292: end
293: elsif match == 'b' && check(/['"]/)
294: modifier = match
295: next
296: end
297: end
298: encoder.text_token match, kind
299:
300: elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
301: label_expected = false
302: encoder.text_token match, :float
303:
304: elsif match = scan(/0x[0-9a-fA-F]+/)
305: label_expected = false
306: encoder.text_token match, :hex
307:
308: elsif match = scan(/\d+/)
309: label_expected = false
310: encoder.text_token match, :integer
311:
312: elsif match = scan(/['"`]/)
313: encoder.begin_group :string
314: if modifier
315: encoder.text_token modifier, :modifier
316: modifier = nil
317: end
318: delimiter = match
319: encoder.text_token match, :delimiter
320: states.push match == "'" ? :sqstring : :dqstring
321:
322: elsif match = scan(RE::VARIABLE)
323: label_expected = false
324: encoder.text_token match, Words::VARIABLE_KIND[match]
325:
326: elsif match = scan(/\{/)
327: encoder.text_token match, :operator
328: label_expected = true
329: states.push :php
330:
331: elsif match = scan(/\}/)
332: if states.size == 1
333: encoder.text_token match, :error
334: else
335: states.pop
336: if states.last.is_a?(::Array)
337: delimiter = states.last[1]
338: states[-1] = states.last[0]
339: encoder.text_token match, :delimiter
340: encoder.end_group :inline
341: else
342: encoder.text_token match, :operator
343: label_expected = true
344: end
345: end
346:
347: elsif match = scan(/@/)
348: label_expected = false
349: encoder.text_token match, :exception
350:
351: elsif match = scan(RE::PHP_END)
352: encoder.text_token match, :inline_delimiter
353: states = [:initial]
354:
355: elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
356: encoder.begin_group :string
357:
358: heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
359: encoder.text_token match, :delimiter
360: states.push self[3] ? :sqstring : :dqstring
361: heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
362:
363: elsif match = scan(/#{RE::OPERATOR}/o)
364: label_expected = match == ';'
365: if case_expected
366: label_expected = true if match == ':'
367: case_expected = false
368: end
369: encoder.text_token match, :operator
370:
371: else
372: encoder.text_token getch, :error
373:
374: end
375:
376: when :sqstring
377: if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
378: encoder.text_token match, :content
379: elsif !heredoc_delimiter && match = scan(/'/)
380: encoder.text_token match, :delimiter
381: encoder.end_group :string
382: delimiter = nil
383: label_expected = false
384: states.pop
385: elsif heredoc_delimiter && match = scan(/\n/)
386: if scan heredoc_delimiter
387: encoder.text_token "\n", :content
388: encoder.text_token matched, :delimiter
389: encoder.end_group :string
390: heredoc_delimiter = nil
391: label_expected = false
392: states.pop
393: else
394: encoder.text_token match, :content
395: end
396: elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
397: encoder.text_token match, :char
398: elsif match = scan(/\\./m)
399: encoder.text_token match, :content
400: elsif match = scan(/\\/)
401: encoder.text_token match, :error
402: else
403: states.pop
404: end
405:
406: when :dqstring
407: if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
408: encoder.text_token match, :content
409: elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
410: encoder.text_token match, :delimiter
411: encoder.end_group :string
412: delimiter = nil
413: label_expected = false
414: states.pop
415: elsif heredoc_delimiter && match = scan(/\n/)
416: if scan heredoc_delimiter
417: encoder.text_token "\n", :content
418: encoder.text_token matched, :delimiter
419: encoder.end_group :string
420: heredoc_delimiter = nil
421: label_expected = false
422: states.pop
423: else
424: encoder.text_token match, :content
425: end
426: elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
427: encoder.text_token match, :char
428: elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
429: encoder.text_token match, :char
430: elsif match = scan(/\\./m)
431: encoder.text_token match, :content
432: elsif match = scan(/\\/)
433: encoder.text_token match, :error
434: elsif match = scan(/#{RE::VARIABLE}/o)
435: if check(/\[#{RE::IDENTIFIER}\]/o)
436: encoder.begin_group :inline
437: encoder.text_token match, :local_variable
438: encoder.text_token scan(/\[/), :operator
439: encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
440: encoder.text_token scan(/\]/), :operator
441: encoder.end_group :inline
442: elsif check(/\[/)
443: match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
444: encoder.text_token match, :error
445: elsif check(/->#{RE::IDENTIFIER}/o)
446: encoder.begin_group :inline
447: encoder.text_token match, :local_variable
448: encoder.text_token scan(/->/), :operator
449: encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
450: encoder.end_group :inline
451: elsif check(/->/)
452: match << scan(/->/)
453: encoder.text_token match, :error
454: else
455: encoder.text_token match, :local_variable
456: end
457: elsif match = scan(/\{/)
458: if check(/\$/)
459: encoder.begin_group :inline
460: states[-1] = [states.last, delimiter]
461: delimiter = nil
462: states.push :php
463: encoder.text_token match, :delimiter
464: else
465: encoder.text_token match, :content
466: end
467: elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
468: encoder.text_token match, :local_variable
469: elsif match = scan(/\$/)
470: encoder.text_token match, :content
471: else
472: states.pop
473: end
474:
475: when :class_expected
476: if match = scan(/\s+/)
477: encoder.text_token match, :space
478: elsif match = scan(/#{RE::IDENTIFIER}/o)
479: encoder.text_token match, :class
480: states.pop
481: else
482: states.pop
483: end
484:
485: when :function_expected
486: if match = scan(/\s+/)
487: encoder.text_token match, :space
488: elsif match = scan(/&/)
489: encoder.text_token match, :operator
490: elsif match = scan(/#{RE::IDENTIFIER}/o)
491: encoder.text_token match, :function
492: states.pop
493: else
494: states.pop
495: end
496:
497: else
498: raise_inspect 'Unknown state!', encoder, states
499: end
500:
501: end
502:
503: encoder
504: end