Class | CodeRay::Scanners::Python |
In: |
lib/coderay/scanners/python.rb
|
Parent: | Scanner |
Scanner for Python. Supports Python 3.
Based on pygments’ PythonLexer, see dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
# File lib/coderay/scanners/python.rb, line 103 103: def scan_tokens encoder, options 104: 105: state = :initial 106: string_delimiter = nil 107: string_raw = false 108: string_type = nil 109: docstring_coming = match?(/#{DOCSTRING_COMING}/o) 110: last_token_dot = false 111: unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' 112: from_import_state = [] 113: 114: until eos? 115: 116: if state == :string 117: if match = scan(STRING_DELIMITER_REGEXP[string_delimiter]) 118: encoder.text_token match, :delimiter 119: encoder.end_group string_type 120: string_type = nil 121: state = :initial 122: next 123: elsif string_delimiter.size == 3 && match = scan(/\n/) 124: encoder.text_token match, :content 125: elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter]) 126: encoder.text_token match, :content 127: elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox) 128: encoder.text_token match, :char 129: elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox) 130: encoder.text_token match, :char 131: elsif match = scan(/ \\ . /x) 132: encoder.text_token match, :content 133: elsif match = scan(/ \\ | $ /x) 134: encoder.end_group string_type 135: string_type = nil 136: encoder.text_token match, :error 137: state = :initial 138: else 139: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state 140: end 141: 142: elsif match = scan(/ [ \t]+ | \\?\n /x) 143: encoder.text_token match, :space 144: if match == "\n" 145: state = :initial if state == :include_expected 146: docstring_coming = true if match?(/#{DOCSTRING_COMING}/o) 147: end 148: next 149: 150: elsif match = scan(/ \# [^\n]* /mx) 151: encoder.text_token match, :comment 152: next 153: 154: elsif state == :initial 155: 156: if match = scan(/#{OPERATOR}/o) 157: encoder.text_token match, :operator 158: 159: elsif match = scan(/(u?r?|b)?("""|"|'''|')/i) 160: string_delimiter = self[2] 161: string_type = docstring_coming ? :docstring : :string 162: docstring_coming = false if docstring_coming 163: encoder.begin_group string_type 164: string_raw = false 165: modifiers = self[1] 166: unless modifiers.empty? 167: string_raw = !!modifiers.index(?r) 168: encoder.text_token modifiers, :modifier 169: match = string_delimiter 170: end 171: state = :string 172: encoder.text_token match, :delimiter 173: 174: # TODO: backticks 175: 176: elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) 177: kind = IDENT_KIND[match] 178: # TODO: keyword arguments 179: kind = :ident if last_token_dot 180: if kind == :old_keyword 181: kind = check(/\(/) ? :ident : :keyword 182: elsif kind == :predefined && check(/ *=/) 183: kind = :ident 184: elsif kind == :keyword 185: state = DEF_NEW_STATE[match] 186: from_import_state << match.to_sym if state == :include_expected 187: end 188: encoder.text_token match, kind 189: 190: elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/) 191: encoder.text_token match, :decorator 192: 193: elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/) 194: encoder.text_token match, :hex 195: 196: elsif match = scan(/0[bB][01]+[lL]?/) 197: encoder.text_token match, :binary 198: 199: elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) 200: if scan(/[jJ]/) 201: match << matched 202: encoder.text_token match, :imaginary 203: else 204: encoder.text_token match, :float 205: end 206: 207: elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/) 208: encoder.text_token match, :octal 209: 210: elsif match = scan(/\d+([lL])?/) 211: if self[1] == nil && scan(/[jJ]/) 212: match << matched 213: encoder.text_token match, :imaginary 214: else 215: encoder.text_token match, :integer 216: end 217: 218: else 219: encoder.text_token getch, :error 220: 221: end 222: 223: elsif state == :def_expected 224: state = :initial 225: if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) 226: encoder.text_token match, :method 227: else 228: next 229: end 230: 231: elsif state == :class_expected 232: state = :initial 233: if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) 234: encoder.text_token match, :class 235: else 236: next 237: end 238: 239: elsif state == :include_expected 240: if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o) 241: if match == 'as' 242: encoder.text_token match, :keyword 243: from_import_state << :as 244: elsif from_import_state.first == :from && match == 'import' 245: encoder.text_token match, :keyword 246: from_import_state << :import 247: elsif from_import_state.last == :as 248: # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method 249: encoder.text_token match, :ident 250: from_import_state.pop 251: elsif IDENT_KIND[match] == :keyword 252: unscan 253: match = nil 254: state = :initial 255: next 256: else 257: encoder.text_token match, :include 258: end 259: elsif match = scan(/,/) 260: from_import_state.pop if from_import_state.last == :as 261: encoder.text_token match, :operator 262: else 263: from_import_state = [] 264: state = :initial 265: next 266: end 267: 268: else 269: raise_inspect 'Unknown state', encoder, state 270: 271: end 272: 273: last_token_dot = match == '.' 274: 275: end 276: 277: if state == :string 278: encoder.end_group string_type 279: end 280: 281: encoder 282: end