Class | CodeRay::Scanners::Ruby |
In: |
lib/coderay/scanners/ruby/string_state.rb
lib/coderay/scanners/ruby.rb |
Parent: | Object |
# File lib/coderay/scanners/ruby.rb, line 19 19: def interpreted_string_state 20: StringState.new :string, true, '"' 21: end
# File lib/coderay/scanners/ruby.rb, line 29 29: def scan_tokens encoder, options 30: state, heredocs = options[:state] || @state 31: heredocs = heredocs.dup if heredocs.is_a?(Array) 32: 33: if state && state.instance_of?(StringState) 34: encoder.begin_group state.type 35: end 36: 37: last_state = nil 38: 39: method_call_expected = false 40: value_expected = true 41: 42: inline_block_stack = nil 43: inline_block_curly_depth = 0 44: 45: if heredocs 46: state = heredocs.shift 47: encoder.begin_group state.type 48: heredocs = nil if heredocs.empty? 49: end 50: 51: # def_object_stack = nil 52: # def_object_paren_depth = 0 53: 54: patterns = Patterns # avoid constant lookup 55: 56: unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' 57: 58: until eos? 59: 60: if state.instance_of? ::Symbol 61: 62: if match = scan(/[ \t\f\v]+/) 63: encoder.text_token match, :space 64: 65: elsif match = scan(/\n/) 66: if heredocs 67: unscan # heredoc scanning needs \n at start 68: state = heredocs.shift 69: encoder.begin_group state.type 70: heredocs = nil if heredocs.empty? 71: else 72: state = :initial if state == :undef_comma_expected 73: encoder.text_token match, :space 74: value_expected = true 75: end 76: 77: elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/) 78: encoder.text_token match, self[1] ? :doctype : :comment 79: 80: elsif match = scan(/\\\n/) 81: if heredocs 82: unscan # heredoc scanning needs \n at start 83: encoder.text_token scan(/\\/), :space 84: state = heredocs.shift 85: encoder.begin_group state.type 86: heredocs = nil if heredocs.empty? 87: else 88: encoder.text_token match, :space 89: end 90: 91: elsif state == :initial 92: 93: # IDENTS # 94: if !method_call_expected && 95: match = scan(unicode ? /#{patterns::METHOD_NAME}/uo : 96: /#{patterns::METHOD_NAME}/o) 97: value_expected = false 98: kind = patterns::IDENT_KIND[match] 99: if kind == :ident 100: if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/)) 101: kind = :constant 102: end 103: elsif kind == :keyword 104: state = patterns::KEYWORD_NEW_STATE[match] 105: value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match] 106: end 107: value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o) 108: encoder.text_token match, kind 109: 110: elsif method_call_expected && 111: match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo : 112: /#{patterns::METHOD_AFTER_DOT}/o) 113: if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/) 114: encoder.text_token match, :constant 115: else 116: encoder.text_token match, :ident 117: end 118: method_call_expected = false 119: value_expected = check(/#{patterns::VALUE_FOLLOWS}/o) 120: 121: # OPERATORS # 122: elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x) 123: method_call_expected = self[1] 124: value_expected = !method_call_expected && self[2] 125: if inline_block_stack 126: case match 127: when '{' 128: inline_block_curly_depth += 1 129: when '}' 130: inline_block_curly_depth -= 1 131: if inline_block_curly_depth == 0 # closing brace of inline block reached 132: state, inline_block_curly_depth, heredocs = inline_block_stack.pop 133: inline_block_stack = nil if inline_block_stack.empty? 134: heredocs = nil if heredocs && heredocs.empty? 135: encoder.text_token match, :inline_delimiter 136: encoder.end_group :inline 137: next 138: end 139: end 140: end 141: encoder.text_token match, :operator 142: 143: elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo : 144: /#{patterns::SYMBOL}/o) 145: case delim = match[1] 146: when ?', ?" 147: encoder.begin_group :symbol 148: encoder.text_token ':', :symbol 149: match = delim.chr 150: encoder.text_token match, :delimiter 151: state = self.class::StringState.new :symbol, delim == ?", match 152: else 153: encoder.text_token match, :symbol 154: value_expected = false 155: end 156: 157: elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx) 158: encoder.begin_group :string 159: if match.size == 1 160: encoder.text_token match, :delimiter 161: state = self.class::StringState.new :string, match == '"', match # important for streaming 162: else 163: encoder.text_token match[0,1], :delimiter 164: encoder.text_token match[1..-2], :content if match.size > 2 165: encoder.text_token match[-1,1], :delimiter 166: encoder.end_group :string 167: value_expected = false 168: end 169: 170: elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo : 171: /#{patterns::INSTANCE_VARIABLE}/o) 172: value_expected = false 173: encoder.text_token match, :instance_variable 174: 175: elsif value_expected && match = scan(/\//) 176: encoder.begin_group :regexp 177: encoder.text_token match, :delimiter 178: state = self.class::StringState.new :regexp, true, '/' 179: 180: elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o) 181: if method_call_expected 182: encoder.text_token match, :error 183: method_call_expected = false 184: else 185: encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary 186: end 187: value_expected = false 188: 189: elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x) 190: value_expected = true 191: encoder.text_token match, :operator 192: 193: elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o) 194: quote = self[3] 195: delim = self[quote ? 4 : 2] 196: kind = patterns::QUOTE_TO_TYPE[quote] 197: encoder.begin_group kind 198: encoder.text_token match, :delimiter 199: encoder.end_group kind 200: heredocs ||= [] # create heredocs if empty 201: heredocs << self.class::StringState.new(kind, quote != "'", delim, 202: self[1] == '-' ? :indented : :linestart) 203: value_expected = false 204: 205: elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o) 206: kind = patterns::FANCY_STRING_KIND[self[1]] 207: encoder.begin_group kind 208: state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2] 209: encoder.text_token match, :delimiter 210: 211: elsif value_expected && match = scan(/#{patterns::CHARACTER}/o) 212: value_expected = false 213: encoder.text_token match, :integer 214: 215: elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x) 216: value_expected = true 217: encoder.text_token match, :operator 218: 219: elsif match = scan(/`/) 220: encoder.begin_group :shell 221: encoder.text_token match, :delimiter 222: state = self.class::StringState.new :shell, true, match 223: 224: elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo : 225: /#{patterns::GLOBAL_VARIABLE}/o) 226: encoder.text_token match, :global_variable 227: value_expected = false 228: 229: elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo : 230: /#{patterns::CLASS_VARIABLE}/o) 231: encoder.text_token match, :class_variable 232: value_expected = false 233: 234: elsif match = scan(/\\\z/) 235: encoder.text_token match, :space 236: 237: else 238: if method_call_expected 239: method_call_expected = false 240: next 241: end 242: unless unicode 243: # check for unicode 244: $DEBUG_BEFORE, $DEBUG = $DEBUG, false 245: begin 246: if check(/./mu).size > 1 247: # seems like we should try again with unicode 248: unicode = true 249: end 250: rescue 251: # bad unicode char; use getch 252: ensure 253: $DEBUG = $DEBUG_BEFORE 254: end 255: next if unicode 256: end 257: 258: encoder.text_token getch, :error 259: 260: end 261: 262: if last_state 263: state = last_state 264: last_state = nil 265: end 266: 267: elsif state == :def_expected 268: if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : 269: /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) 270: encoder.text_token match, :method 271: state = :initial 272: else 273: last_state = :dot_expected 274: state = :initial 275: end 276: 277: elsif state == :dot_expected 278: if match = scan(/\.|::/) 279: # invalid definition 280: state = :def_expected 281: encoder.text_token match, :operator 282: else 283: state = :initial 284: end 285: 286: elsif state == :module_expected 287: if match = scan(/<</) 288: encoder.text_token match, :operator 289: else 290: state = :initial 291: if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux : 292: / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox) 293: encoder.text_token match, :class 294: end 295: end 296: 297: elsif state == :undef_expected 298: state = :undef_comma_expected 299: if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : 300: /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) 301: encoder.text_token match, :method 302: elsif match = scan(/#{patterns::SYMBOL}/o) 303: case delim = match[1] 304: when ?', ?" 305: encoder.begin_group :symbol 306: encoder.text_token ':', :symbol 307: match = delim.chr 308: encoder.text_token match, :delimiter 309: state = self.class::StringState.new :symbol, delim == ?", match 310: state.next_state = :undef_comma_expected 311: else 312: encoder.text_token match, :symbol 313: end 314: else 315: state = :initial 316: end 317: 318: elsif state == :undef_comma_expected 319: if match = scan(/,/) 320: encoder.text_token match, :operator 321: state = :undef_expected 322: else 323: state = :initial 324: end 325: 326: elsif state == :alias_expected 327: match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo : 328: /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o) 329: 330: if match 331: encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method) 332: encoder.text_token self[2], :space 333: encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method) 334: end 335: state = :initial 336: 337: else 338: #:nocov: 339: raise_inspect 'Unknown state: %p' % [state], encoder 340: #:nocov: 341: end 342: 343: else # StringState 344: 345: match = scan_until(state.pattern) || scan_rest 346: unless match.empty? 347: encoder.text_token match, :content 348: break if eos? 349: end 350: 351: if state.heredoc && self[1] # end of heredoc 352: match = getch 353: match << scan_until(/$/) unless eos? 354: encoder.text_token match, :delimiter unless match.empty? 355: encoder.end_group state.type 356: state = state.next_state 357: next 358: end 359: 360: case match = getch 361: 362: when state.delim 363: if state.paren_depth 364: state.paren_depth -= 1 365: if state.paren_depth > 0 366: encoder.text_token match, :content 367: next 368: end 369: end 370: encoder.text_token match, :delimiter 371: if state.type == :regexp && !eos? 372: match = scan(/#{patterns::REGEXP_MODIFIERS}/o) 373: encoder.text_token match, :modifier unless match.empty? 374: end 375: encoder.end_group state.type 376: value_expected = false 377: state = state.next_state 378: 379: when '\\' 380: if state.interpreted 381: if esc = scan(/#{patterns::ESCAPE}/o) 382: encoder.text_token match + esc, :char 383: else 384: encoder.text_token match, :error 385: end 386: else 387: case esc = getch 388: when nil 389: encoder.text_token match, :content 390: when state.delim, '\\' 391: encoder.text_token match + esc, :char 392: else 393: encoder.text_token match + esc, :content 394: end 395: end 396: 397: when '#' 398: case peek(1) 399: when '{' 400: inline_block_stack ||= [] 401: inline_block_stack << [state, inline_block_curly_depth, heredocs] 402: value_expected = true 403: state = :initial 404: inline_block_curly_depth = 1 405: encoder.begin_group :inline 406: encoder.text_token match + getch, :inline_delimiter 407: when '$', '@' 408: encoder.text_token match, :escape 409: last_state = state 410: state = :initial 411: else 412: #:nocov: 413: raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder 414: #:nocov: 415: end 416: 417: when state.opening_paren 418: state.paren_depth += 1 419: encoder.text_token match, :content 420: 421: else 422: #:nocov 423: raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder 424: #:nocov: 425: 426: end 427: 428: end 429: 430: end 431: 432: # cleaning up 433: if state.is_a? StringState 434: encoder.end_group state.type 435: end 436: 437: if options[:keep_state] 438: if state.is_a?(StringState) && state.heredoc 439: (heredocs ||= []).unshift state 440: state = :initial 441: elsif heredocs && heredocs.empty? 442: heredocs = nil 443: end 444: @state = state, heredocs 445: end 446: 447: if inline_block_stack 448: until inline_block_stack.empty? 449: state, = *inline_block_stack.pop 450: encoder.end_group :inline 451: encoder.end_group state.type 452: end 453: end 454: 455: encoder 456: end