Class CodeRay::Scanners::Ruby
In: lib/coderay/scanners/ruby/string_state.rb
lib/coderay/scanners/ruby.rb
Parent: Object

This scanner is really complex, since Ruby is a complex language!

It tries to highlight 100% of all common code, and 90% of strange codes.

It is optimized for HTML highlighting, and is not very useful for parsing or pretty printing.

Methods

Public Instance methods

[Source]

    # File lib/coderay/scanners/ruby.rb, line 19
19:     def interpreted_string_state
20:       StringState.new :string, true, '"'
21:     end

Protected Instance methods

[Source]

     # File lib/coderay/scanners/ruby.rb, line 29
 29:     def scan_tokens encoder, options
 30:       state, heredocs = options[:state] || @state
 31:       heredocs = heredocs.dup if heredocs.is_a?(Array)
 32:       
 33:       if state && state.instance_of?(StringState)
 34:         encoder.begin_group state.type
 35:       end
 36:       
 37:       last_state = nil
 38:       
 39:       method_call_expected = false
 40:       value_expected = true
 41:       
 42:       inline_block_stack = nil
 43:       inline_block_curly_depth = 0
 44:       
 45:       if heredocs
 46:         state = heredocs.shift
 47:         encoder.begin_group state.type
 48:         heredocs = nil if heredocs.empty?
 49:       end
 50:       
 51:       # def_object_stack = nil
 52:       # def_object_paren_depth = 0
 53:       
 54:       patterns = Patterns  # avoid constant lookup
 55:       
 56:       unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
 57:       
 58:       until eos?
 59:         
 60:         if state.instance_of? ::Symbol
 61:           
 62:           if match = scan(/[ \t\f\v]+/)
 63:             encoder.text_token match, :space
 64:             
 65:           elsif match = scan(/\n/)
 66:             if heredocs
 67:               unscan  # heredoc scanning needs \n at start
 68:               state = heredocs.shift
 69:               encoder.begin_group state.type
 70:               heredocs = nil if heredocs.empty?
 71:             else
 72:               state = :initial if state == :undef_comma_expected
 73:               encoder.text_token match, :space
 74:               value_expected = true
 75:             end
 76:             
 77:           elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
 78:             encoder.text_token match, self[1] ? :doctype : :comment
 79:             
 80:           elsif match = scan(/\\\n/)
 81:             if heredocs
 82:               unscan  # heredoc scanning needs \n at start
 83:               encoder.text_token scan(/\\/), :space
 84:               state = heredocs.shift
 85:               encoder.begin_group state.type
 86:               heredocs = nil if heredocs.empty?
 87:             else
 88:               encoder.text_token match, :space
 89:             end
 90:             
 91:           elsif state == :initial
 92:             
 93:             # IDENTS #
 94:             if !method_call_expected &&
 95:                match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
 96:                                       /#{patterns::METHOD_NAME}/o)
 97:               value_expected = false
 98:               kind = patterns::IDENT_KIND[match]
 99:               if kind == :ident
100:                 if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
101:                   kind = :constant
102:                 end
103:               elsif kind == :keyword
104:                 state = patterns::KEYWORD_NEW_STATE[match]
105:                 value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
106:               end
107:               value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
108:               encoder.text_token match, kind
109:               
110:             elsif method_call_expected &&
111:                match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
112:                                       /#{patterns::METHOD_AFTER_DOT}/o)
113:               if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
114:                 encoder.text_token match, :constant
115:               else
116:                 encoder.text_token match, :ident
117:               end
118:               method_call_expected = false
119:               value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
120:               
121:             # OPERATORS #
122:             elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
123:               method_call_expected = self[1]
124:               value_expected = !method_call_expected && self[2]
125:               if inline_block_stack
126:                 case match
127:                 when '{'
128:                   inline_block_curly_depth += 1
129:                 when '}'
130:                   inline_block_curly_depth -= 1
131:                   if inline_block_curly_depth == 0  # closing brace of inline block reached
132:                     state, inline_block_curly_depth, heredocs = inline_block_stack.pop
133:                     inline_block_stack = nil if inline_block_stack.empty?
134:                     heredocs = nil if heredocs && heredocs.empty?
135:                     encoder.text_token match, :inline_delimiter
136:                     encoder.end_group :inline
137:                     next
138:                   end
139:                 end
140:               end
141:               encoder.text_token match, :operator
142:               
143:             elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
144:                                          /#{patterns::SYMBOL}/o)
145:               case delim = match[1]
146:               when ?', ?"
147:                 encoder.begin_group :symbol
148:                 encoder.text_token ':', :symbol
149:                 match = delim.chr
150:                 encoder.text_token match, :delimiter
151:                 state = self.class::StringState.new :symbol, delim == ?", match
152:               else
153:                 encoder.text_token match, :symbol
154:                 value_expected = false
155:               end
156:               
157:             elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
158:               encoder.begin_group :string
159:               if match.size == 1
160:                 encoder.text_token match, :delimiter
161:                 state = self.class::StringState.new :string, match == '"', match  # important for streaming
162:               else
163:                 encoder.text_token match[0,1], :delimiter
164:                 encoder.text_token match[1..-2], :content if match.size > 2
165:                 encoder.text_token match[-1,1], :delimiter
166:                 encoder.end_group :string
167:                 value_expected = false
168:               end
169:               
170:             elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
171:                                          /#{patterns::INSTANCE_VARIABLE}/o)
172:               value_expected = false
173:               encoder.text_token match, :instance_variable
174:               
175:             elsif value_expected && match = scan(/\//)
176:               encoder.begin_group :regexp
177:               encoder.text_token match, :delimiter
178:               state = self.class::StringState.new :regexp, true, '/'
179:               
180:             elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
181:               if method_call_expected
182:                 encoder.text_token match, :error
183:                 method_call_expected = false
184:               else
185:                 encoder.text_token match, self[1] ? :float : :integer  # TODO: send :hex/:octal/:binary
186:               end
187:               value_expected = false
188:               
189:             elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
190:               value_expected = true
191:               encoder.text_token match, :operator
192:               
193:             elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
194:               quote = self[3]
195:               delim = self[quote ? 4 : 2]
196:               kind = patterns::QUOTE_TO_TYPE[quote]
197:               encoder.begin_group kind
198:               encoder.text_token match, :delimiter
199:               encoder.end_group kind
200:               heredocs ||= []  # create heredocs if empty
201:               heredocs << self.class::StringState.new(kind, quote != "'", delim,
202:                 self[1] == '-' ? :indented : :linestart)
203:               value_expected = false
204:               
205:             elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
206:               kind = patterns::FANCY_STRING_KIND[self[1]]
207:               encoder.begin_group kind
208:               state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
209:               encoder.text_token match, :delimiter
210:               
211:             elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
212:               value_expected = false
213:               encoder.text_token match, :integer
214:               
215:             elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
216:               value_expected = true
217:               encoder.text_token match, :operator
218:               
219:             elsif match = scan(/`/)
220:               encoder.begin_group :shell
221:               encoder.text_token match, :delimiter
222:               state = self.class::StringState.new :shell, true, match
223:               
224:             elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
225:                                          /#{patterns::GLOBAL_VARIABLE}/o)
226:               encoder.text_token match, :global_variable
227:               value_expected = false
228:               
229:             elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
230:                                          /#{patterns::CLASS_VARIABLE}/o)
231:               encoder.text_token match, :class_variable
232:               value_expected = false
233:               
234:             elsif match = scan(/\\\z/)
235:               encoder.text_token match, :space
236:               
237:             else
238:               if method_call_expected
239:                 method_call_expected = false
240:                 next
241:               end
242:               unless unicode
243:                 # check for unicode
244:                 $DEBUG_BEFORE, $DEBUG = $DEBUG, false
245:                 begin
246:                   if check(/./mu).size > 1
247:                     # seems like we should try again with unicode
248:                     unicode = true
249:                   end
250:                 rescue
251:                   # bad unicode char; use getch
252:                 ensure
253:                   $DEBUG = $DEBUG_BEFORE
254:                 end
255:                 next if unicode
256:               end
257:               
258:               encoder.text_token getch, :error
259:               
260:             end
261:             
262:             if last_state
263:               state = last_state
264:               last_state = nil
265:             end
266:             
267:           elsif state == :def_expected
268:             if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
269:                                       /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
270:               encoder.text_token match, :method
271:               state = :initial
272:             else
273:               last_state = :dot_expected
274:               state = :initial
275:             end
276:             
277:           elsif state == :dot_expected
278:             if match = scan(/\.|::/)
279:               # invalid definition
280:               state = :def_expected
281:               encoder.text_token match, :operator
282:             else
283:               state = :initial
284:             end
285:             
286:           elsif state == :module_expected
287:             if match = scan(/<</)
288:               encoder.text_token match, :operator
289:             else
290:               state = :initial
291:               if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
292:                                         / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
293:                 encoder.text_token match, :class
294:               end
295:             end
296:             
297:           elsif state == :undef_expected
298:             state = :undef_comma_expected
299:             if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
300:                                       /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
301:               encoder.text_token match, :method
302:             elsif match = scan(/#{patterns::SYMBOL}/o)
303:               case delim = match[1]
304:               when ?', ?"
305:                 encoder.begin_group :symbol
306:                 encoder.text_token ':', :symbol
307:                 match = delim.chr
308:                 encoder.text_token match, :delimiter
309:                 state = self.class::StringState.new :symbol, delim == ?", match
310:                 state.next_state = :undef_comma_expected
311:               else
312:                 encoder.text_token match, :symbol
313:               end
314:             else
315:               state = :initial
316:             end
317:             
318:           elsif state == :undef_comma_expected
319:             if match = scan(/,/)
320:               encoder.text_token match, :operator
321:               state = :undef_expected
322:             else
323:               state = :initial
324:             end
325:             
326:           elsif state == :alias_expected
327:             match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
328:                                    /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
329:             
330:             if match
331:               encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
332:               encoder.text_token self[2], :space
333:               encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
334:             end
335:             state = :initial
336:             
337:           else
338:             #:nocov:
339:             raise_inspect 'Unknown state: %p' % [state], encoder
340:             #:nocov:
341:           end
342:           
343:         else  # StringState
344:           
345:           match = scan_until(state.pattern) || scan_rest
346:           unless match.empty?
347:             encoder.text_token match, :content
348:             break if eos?
349:           end
350:           
351:           if state.heredoc && self[1]  # end of heredoc
352:             match = getch
353:             match << scan_until(/$/) unless eos?
354:             encoder.text_token match, :delimiter unless match.empty?
355:             encoder.end_group state.type
356:             state = state.next_state
357:             next
358:           end
359:           
360:           case match = getch
361:           
362:           when state.delim
363:             if state.paren_depth
364:               state.paren_depth -= 1
365:               if state.paren_depth > 0
366:                 encoder.text_token match, :content
367:                 next
368:               end
369:             end
370:             encoder.text_token match, :delimiter
371:             if state.type == :regexp && !eos?
372:               match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
373:               encoder.text_token match, :modifier unless match.empty?
374:             end
375:             encoder.end_group state.type
376:             value_expected = false
377:             state = state.next_state
378:             
379:           when '\\'
380:             if state.interpreted
381:               if esc = scan(/#{patterns::ESCAPE}/o)
382:                 encoder.text_token match + esc, :char
383:               else
384:                 encoder.text_token match, :error
385:               end
386:             else
387:               case esc = getch
388:               when nil
389:                 encoder.text_token match, :content
390:               when state.delim, '\\'
391:                 encoder.text_token match + esc, :char
392:               else
393:                 encoder.text_token match + esc, :content
394:               end
395:             end
396:             
397:           when '#'
398:             case peek(1)
399:             when '{'
400:               inline_block_stack ||= []
401:               inline_block_stack << [state, inline_block_curly_depth, heredocs]
402:               value_expected = true
403:               state = :initial
404:               inline_block_curly_depth = 1
405:               encoder.begin_group :inline
406:               encoder.text_token match + getch, :inline_delimiter
407:             when '$', '@'
408:               encoder.text_token match, :escape
409:               last_state = state
410:               state = :initial
411:             else
412:               #:nocov:
413:               raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
414:               #:nocov:
415:             end
416:             
417:           when state.opening_paren
418:             state.paren_depth += 1
419:             encoder.text_token match, :content
420:             
421:           else
422:             #:nocov
423:             raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
424:             #:nocov:
425:             
426:           end
427:           
428:         end
429:         
430:       end
431:       
432:       # cleaning up
433:       if state.is_a? StringState
434:         encoder.end_group state.type
435:       end
436:       
437:       if options[:keep_state]
438:         if state.is_a?(StringState) && state.heredoc
439:           (heredocs ||= []).unshift state
440:           state = :initial
441:         elsif heredocs && heredocs.empty?
442:           heredocs = nil
443:         end
444:         @state = state, heredocs
445:       end
446:       
447:       if inline_block_stack
448:         until inline_block_stack.empty?
449:           state, = *inline_block_stack.pop
450:           encoder.end_group :inline
451:           encoder.end_group state.type
452:         end
453:       end
454:       
455:       encoder
456:     end

[Source]

    # File lib/coderay/scanners/ruby.rb, line 25
25:     def setup
26:       @state = :initial
27:     end

[Validate]