Class CodeRay::Scanners::PHP
In: lib/coderay/scanners/php.rb
Parent: Scanner

Scanner for PHP.

Original by Stefan Walk.

Methods

Constants

KINDS_NOT_LOC = HTML::KINDS_NOT_LOC

Protected Instance methods

[Source]

    # File lib/coderay/scanners/php.rb, line 23
23:     def reset_instance
24:       super
25:       @html_scanner.reset
26:     end

[Source]

     # File lib/coderay/scanners/php.rb, line 234
234:     def scan_tokens encoder, options
235:       
236:       if check(RE::PHP_START) ||  # starts with <?
237:        (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
238:        check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
239:        check(/.{1,100}#{RE::PHP_START}/om)  # PHP start after max 100 chars
240:         # is HTML with embedded PHP, so start with HTML
241:         states = [:initial]
242:       else
243:         # is just PHP, so start with PHP surrounded by HTML
244:         states = [:initial, :php]
245:       end
246:       
247:       label_expected = true
248:       case_expected = false
249:       
250:       heredoc_delimiter = nil
251:       delimiter = nil
252:       modifier = nil
253:       
254:       until eos?
255:         
256:         case states.last
257:         
258:         when :initial  # HTML
259:           if match = scan(RE::PHP_START)
260:             encoder.text_token match, :inline_delimiter
261:             label_expected = true
262:             states << :php
263:           else
264:             match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
265:             @html_scanner.tokenize match unless match.empty?
266:           end
267:         
268:         when :php
269:           if match = scan(/\s+/)
270:             encoder.text_token match, :space
271:           
272:           elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
273:             encoder.text_token match, :comment
274:           
275:           elsif match = scan(RE::IDENTIFIER)
276:             kind = Words::IDENT_KIND[match]
277:             if kind == :ident && label_expected && check(/:(?!:)/)
278:               kind = :label
279:               label_expected = true
280:             else
281:               label_expected = false
282:               if kind == :ident && match =~ /^[A-Z]/
283:                 kind = :constant
284:               elsif kind == :keyword
285:                 case match
286:                 when 'class'
287:                   states << :class_expected
288:                 when 'function'
289:                   states << :function_expected
290:                 when 'case', 'default'
291:                   case_expected = true
292:                 end
293:               elsif match == 'b' && check(/['"]/)  # binary string literal
294:                 modifier = match
295:                 next
296:               end
297:             end
298:             encoder.text_token match, kind
299:           
300:           elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
301:             label_expected = false
302:             encoder.text_token match, :float
303:           
304:           elsif match = scan(/0x[0-9a-fA-F]+/)
305:             label_expected = false
306:             encoder.text_token match, :hex
307:           
308:           elsif match = scan(/\d+/)
309:             label_expected = false
310:             encoder.text_token match, :integer
311:           
312:           elsif match = scan(/['"`]/)
313:             encoder.begin_group :string
314:             if modifier
315:               encoder.text_token modifier, :modifier
316:               modifier = nil
317:             end
318:             delimiter = match
319:             encoder.text_token match, :delimiter
320:             states.push match == "'" ? :sqstring : :dqstring
321:           
322:           elsif match = scan(RE::VARIABLE)
323:             label_expected = false
324:             encoder.text_token match, Words::VARIABLE_KIND[match]
325:           
326:           elsif match = scan(/\{/)
327:             encoder.text_token match, :operator
328:             label_expected = true
329:             states.push :php
330:           
331:           elsif match = scan(/\}/)
332:             if states.size == 1
333:               encoder.text_token match, :error
334:             else
335:               states.pop
336:               if states.last.is_a?(::Array)
337:                 delimiter = states.last[1]
338:                 states[-1] = states.last[0]
339:                 encoder.text_token match, :delimiter
340:                 encoder.end_group :inline
341:               else
342:                 encoder.text_token match, :operator
343:                 label_expected = true
344:               end
345:             end
346:           
347:           elsif match = scan(/@/)
348:             label_expected = false
349:             encoder.text_token match, :exception
350:           
351:           elsif match = scan(RE::PHP_END)
352:             encoder.text_token match, :inline_delimiter
353:             states = [:initial]
354:           
355:           elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
356:             encoder.begin_group :string
357:             # warn 'heredoc in heredoc?' if heredoc_delimiter
358:             heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
359:             encoder.text_token match, :delimiter
360:             states.push self[3] ? :sqstring : :dqstring
361:             heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
362:           
363:           elsif match = scan(/#{RE::OPERATOR}/o)
364:             label_expected = match == ';'
365:             if case_expected
366:               label_expected = true if match == ':'
367:               case_expected = false
368:             end
369:             encoder.text_token match, :operator
370:           
371:           else
372:             encoder.text_token getch, :error
373:           
374:           end
375:         
376:         when :sqstring
377:           if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
378:             encoder.text_token match, :content
379:           elsif !heredoc_delimiter && match = scan(/'/)
380:             encoder.text_token match, :delimiter
381:             encoder.end_group :string
382:             delimiter = nil
383:             label_expected = false
384:             states.pop
385:           elsif heredoc_delimiter && match = scan(/\n/)
386:             if scan heredoc_delimiter
387:               encoder.text_token "\n", :content
388:               encoder.text_token matched, :delimiter
389:               encoder.end_group :string
390:               heredoc_delimiter = nil
391:               label_expected = false
392:               states.pop
393:             else
394:               encoder.text_token match, :content
395:             end
396:           elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
397:             encoder.text_token match, :char
398:           elsif match = scan(/\\./m)
399:             encoder.text_token match, :content
400:           elsif match = scan(/\\/)
401:             encoder.text_token match, :error
402:           else
403:             states.pop
404:           end
405:         
406:         when :dqstring
407:           if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
408:             encoder.text_token match, :content
409:           elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
410:             encoder.text_token match, :delimiter
411:             encoder.end_group :string
412:             delimiter = nil
413:             label_expected = false
414:             states.pop
415:           elsif heredoc_delimiter && match = scan(/\n/)
416:             if scan heredoc_delimiter
417:               encoder.text_token "\n", :content
418:               encoder.text_token matched, :delimiter
419:               encoder.end_group :string
420:               heredoc_delimiter = nil
421:               label_expected = false
422:               states.pop
423:             else
424:               encoder.text_token match, :content
425:             end
426:           elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
427:             encoder.text_token match, :char
428:           elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
429:             encoder.text_token match, :char
430:           elsif match = scan(/\\./m)
431:             encoder.text_token match, :content
432:           elsif match = scan(/\\/)
433:             encoder.text_token match, :error
434:           elsif match = scan(/#{RE::VARIABLE}/o)
435:             if check(/\[#{RE::IDENTIFIER}\]/o)
436:               encoder.begin_group :inline
437:               encoder.text_token match, :local_variable
438:               encoder.text_token scan(/\[/), :operator
439:               encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
440:               encoder.text_token scan(/\]/), :operator
441:               encoder.end_group :inline
442:             elsif check(/\[/)
443:               match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
444:               encoder.text_token match, :error
445:             elsif check(/->#{RE::IDENTIFIER}/o)
446:               encoder.begin_group :inline
447:               encoder.text_token match, :local_variable
448:               encoder.text_token scan(/->/), :operator
449:               encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
450:               encoder.end_group :inline
451:             elsif check(/->/)
452:               match << scan(/->/)
453:               encoder.text_token match, :error
454:             else
455:               encoder.text_token match, :local_variable
456:             end
457:           elsif match = scan(/\{/)
458:             if check(/\$/)
459:               encoder.begin_group :inline
460:               states[-1] = [states.last, delimiter]
461:               delimiter = nil
462:               states.push :php
463:               encoder.text_token match, :delimiter
464:             else
465:               encoder.text_token match, :content
466:             end
467:           elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
468:             encoder.text_token match, :local_variable
469:           elsif match = scan(/\$/)
470:             encoder.text_token match, :content
471:           else
472:             states.pop
473:           end
474:         
475:         when :class_expected
476:           if match = scan(/\s+/)
477:             encoder.text_token match, :space
478:           elsif match = scan(/#{RE::IDENTIFIER}/o)
479:             encoder.text_token match, :class
480:             states.pop
481:           else
482:             states.pop
483:           end
484:         
485:         when :function_expected
486:           if match = scan(/\s+/)
487:             encoder.text_token match, :space
488:           elsif match = scan(/&/)
489:             encoder.text_token match, :operator
490:           elsif match = scan(/#{RE::IDENTIFIER}/o)
491:             encoder.text_token match, :function
492:             states.pop
493:           else
494:             states.pop
495:           end
496:         
497:         else
498:           raise_inspect 'Unknown state!', encoder, states
499:         end
500:         
501:       end
502:       
503:       encoder
504:     end

[Source]

    # File lib/coderay/scanners/php.rb, line 19
19:     def setup
20:       @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
21:     end

[Validate]