Class CodeRay::Scanners::Python
In: lib/coderay/scanners/python.rb
Parent: Scanner

Scanner for Python. Supports Python 3.

Based on pygments’ PythonLexer, see dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.

Methods

Protected Instance methods

[Source]

     # File lib/coderay/scanners/python.rb, line 103
103:     def scan_tokens encoder, options
104:       
105:       state = :initial
106:       string_delimiter = nil
107:       string_raw = false
108:       string_type = nil
109:       docstring_coming = match?(/#{DOCSTRING_COMING}/o)
110:       last_token_dot = false
111:       unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
112:       from_import_state = []
113:       
114:       until eos?
115:         
116:         if state == :string
117:           if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
118:             encoder.text_token match, :delimiter
119:             encoder.end_group string_type
120:             string_type = nil
121:             state = :initial
122:             next
123:           elsif string_delimiter.size == 3 && match = scan(/\n/)
124:             encoder.text_token match, :content
125:           elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
126:             encoder.text_token match, :content
127:           elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
128:             encoder.text_token match, :char
129:           elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
130:             encoder.text_token match, :char
131:           elsif match = scan(/ \\ . /x)
132:             encoder.text_token match, :content
133:           elsif match = scan(/ \\ | $ /x)
134:             encoder.end_group string_type
135:             string_type = nil
136:             encoder.text_token match, :error
137:             state = :initial
138:           else
139:             raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
140:           end
141:         
142:         elsif match = scan(/ [ \t]+ | \\?\n /x)
143:           encoder.text_token match, :space
144:           if match == "\n"
145:             state = :initial if state == :include_expected
146:             docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
147:           end
148:           next
149:         
150:         elsif match = scan(/ \# [^\n]* /mx)
151:           encoder.text_token match, :comment
152:           next
153:         
154:         elsif state == :initial
155:           
156:           if match = scan(/#{OPERATOR}/o)
157:             encoder.text_token match, :operator
158:           
159:           elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
160:             string_delimiter = self[2]
161:             string_type = docstring_coming ? :docstring : :string
162:             docstring_coming = false if docstring_coming
163:             encoder.begin_group string_type
164:             string_raw = false
165:             modifiers = self[1]
166:             unless modifiers.empty?
167:               string_raw = !!modifiers.index(?r)
168:               encoder.text_token modifiers, :modifier
169:               match = string_delimiter
170:             end
171:             state = :string
172:             encoder.text_token match, :delimiter
173:           
174:           # TODO: backticks
175:           
176:           elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
177:             kind = IDENT_KIND[match]
178:             # TODO: keyword arguments
179:             kind = :ident if last_token_dot
180:             if kind == :old_keyword
181:               kind = check(/\(/) ? :ident : :keyword
182:             elsif kind == :predefined && check(/ *=/)
183:               kind = :ident
184:             elsif kind == :keyword
185:               state = DEF_NEW_STATE[match]
186:               from_import_state << match.to_sym if state == :include_expected
187:             end
188:             encoder.text_token match, kind
189:           
190:           elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
191:             encoder.text_token match, :decorator
192:           
193:           elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
194:             encoder.text_token match, :hex
195:           
196:           elsif match = scan(/0[bB][01]+[lL]?/)
197:             encoder.text_token match, :binary
198:           
199:           elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
200:             if scan(/[jJ]/)
201:               match << matched
202:               encoder.text_token match, :imaginary
203:             else
204:               encoder.text_token match, :float
205:             end
206:           
207:           elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
208:             encoder.text_token match, :octal
209:           
210:           elsif match = scan(/\d+([lL])?/)
211:             if self[1] == nil && scan(/[jJ]/)
212:               match << matched
213:               encoder.text_token match, :imaginary
214:             else
215:               encoder.text_token match, :integer
216:             end
217:           
218:           else
219:             encoder.text_token getch, :error
220:           
221:           end
222:             
223:         elsif state == :def_expected
224:           state = :initial
225:           if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
226:             encoder.text_token match, :method
227:           else
228:             next
229:           end
230:         
231:         elsif state == :class_expected
232:           state = :initial
233:           if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
234:             encoder.text_token match, :class
235:           else
236:             next
237:           end
238:           
239:         elsif state == :include_expected
240:           if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
241:             if match == 'as'
242:               encoder.text_token match, :keyword
243:               from_import_state << :as
244:             elsif from_import_state.first == :from && match == 'import'
245:               encoder.text_token match, :keyword
246:               from_import_state << :import
247:             elsif from_import_state.last == :as
248:               # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
249:               encoder.text_token match, :ident
250:               from_import_state.pop
251:             elsif IDENT_KIND[match] == :keyword
252:               unscan
253:               match = nil
254:               state = :initial
255:               next
256:             else
257:               encoder.text_token match, :include
258:             end
259:           elsif match = scan(/,/)
260:             from_import_state.pop if from_import_state.last == :as
261:             encoder.text_token match, :operator
262:           else
263:             from_import_state = []
264:             state = :initial
265:             next
266:           end
267:           
268:         else
269:           raise_inspect 'Unknown state', encoder, state
270:           
271:         end
272:         
273:         last_token_dot = match == '.'
274:         
275:       end
276:       
277:       if state == :string
278:         encoder.end_group string_type
279:       end
280:       
281:       encoder
282:     end

[Validate]