patchedast.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. import collections
  2. import re
  3. import warnings
  4. from rope.base import ast, codeanalyze, exceptions
  5. def get_patched_ast(source, sorted_children=False):
  6. """Adds ``region`` and ``sorted_children`` fields to nodes
  7. Adds ``sorted_children`` field only if `sorted_children` is True.
  8. """
  9. return patch_ast(ast.parse(source), source, sorted_children)
  10. def patch_ast(node, source, sorted_children=False):
  11. """Patches the given node
  12. After calling, each node in `node` will have a new field named
  13. `region` that is a tuple containing the start and end offsets
  14. of the code that generated it.
  15. If `sorted_children` is true, a `sorted_children` field will
  16. be created for each node, too. It is a list containing child
  17. nodes as well as whitespaces and comments that occur between
  18. them.
  19. """
  20. if hasattr(node, 'region'):
  21. return node
  22. walker = _PatchingASTWalker(source, children=sorted_children)
  23. ast.call_for_nodes(node, walker)
  24. return node
  25. def node_region(patched_ast_node):
  26. """Get the region of a patched ast node"""
  27. return patched_ast_node.region
  28. def write_ast(patched_ast_node):
  29. """Extract source form a patched AST node with `sorted_children` field
  30. If the node is patched with sorted_children turned off you can use
  31. `node_region` function for obtaining code using module source code.
  32. """
  33. result = []
  34. for child in patched_ast_node.sorted_children:
  35. if isinstance(child, ast.AST):
  36. result.append(write_ast(child))
  37. else:
  38. result.append(child)
  39. return ''.join(result)
  40. class MismatchedTokenError(exceptions.RopeError):
  41. pass
  42. class _PatchingASTWalker(object):
  43. def __init__(self, source, children=False):
  44. self.source = _Source(source)
  45. self.children = children
  46. self.lines = codeanalyze.SourceLinesAdapter(source)
  47. self.children_stack = []
  48. Number = object()
  49. String = object()
  50. def __call__(self, node):
  51. method = getattr(self, '_' + node.__class__.__name__, None)
  52. if method is not None:
  53. return method(node)
  54. # ???: Unknown node; what should we do here?
  55. warnings.warn('Unknown node type <%s>; please report!'
  56. % node.__class__.__name__, RuntimeWarning)
  57. node.region = (self.source.offset, self.source.offset)
  58. if self.children:
  59. node.sorted_children = ast.get_children(node)
  60. def _handle(self, node, base_children, eat_parens=False, eat_spaces=False):
  61. if hasattr(node, 'region'):
  62. # ???: The same node was seen twice; what should we do?
  63. warnings.warn(
  64. 'Node <%s> has been already patched; please report!' %
  65. node.__class__.__name__, RuntimeWarning)
  66. return
  67. base_children = collections.deque(base_children)
  68. self.children_stack.append(base_children)
  69. children = collections.deque()
  70. formats = []
  71. suspected_start = self.source.offset
  72. start = suspected_start
  73. first_token = True
  74. while base_children:
  75. child = base_children.popleft()
  76. if child is None:
  77. continue
  78. offset = self.source.offset
  79. if isinstance(child, ast.AST):
  80. ast.call_for_nodes(child, self)
  81. token_start = child.region[0]
  82. else:
  83. if child is self.String:
  84. region = self.source.consume_string(
  85. end=self._find_next_statement_start())
  86. elif child is self.Number:
  87. region = self.source.consume_number()
  88. elif child == '!=':
  89. # INFO: This has been added to handle deprecated ``<>``
  90. region = self.source.consume_not_equal()
  91. else:
  92. region = self.source.consume(child)
  93. child = self.source[region[0]:region[1]]
  94. token_start = region[0]
  95. if not first_token:
  96. formats.append(self.source[offset:token_start])
  97. if self.children:
  98. children.append(self.source[offset:token_start])
  99. else:
  100. first_token = False
  101. start = token_start
  102. if self.children:
  103. children.append(child)
  104. start = self._handle_parens(children, start, formats)
  105. if eat_parens:
  106. start = self._eat_surrounding_parens(
  107. children, suspected_start, start)
  108. if eat_spaces:
  109. if self.children:
  110. children.appendleft(self.source[0:start])
  111. end_spaces = self.source[self.source.offset:]
  112. self.source.consume(end_spaces)
  113. if self.children:
  114. children.append(end_spaces)
  115. start = 0
  116. if self.children:
  117. node.sorted_children = children
  118. node.region = (start, self.source.offset)
  119. self.children_stack.pop()
  120. def _handle_parens(self, children, start, formats):
  121. """Changes `children` and returns new start"""
  122. opens, closes = self._count_needed_parens(formats)
  123. old_end = self.source.offset
  124. new_end = None
  125. for i in range(closes):
  126. new_end = self.source.consume(')')[1]
  127. if new_end is not None:
  128. if self.children:
  129. children.append(self.source[old_end:new_end])
  130. new_start = start
  131. for i in range(opens):
  132. new_start = self.source.rfind_token('(', 0, new_start)
  133. if new_start != start:
  134. if self.children:
  135. children.appendleft(self.source[new_start:start])
  136. start = new_start
  137. return start
  138. def _eat_surrounding_parens(self, children, suspected_start, start):
  139. index = self.source.rfind_token('(', suspected_start, start)
  140. if index is not None:
  141. old_start = start
  142. old_offset = self.source.offset
  143. start = index
  144. if self.children:
  145. children.appendleft(self.source[start + 1:old_start])
  146. children.appendleft('(')
  147. token_start, token_end = self.source.consume(')')
  148. if self.children:
  149. children.append(self.source[old_offset:token_start])
  150. children.append(')')
  151. return start
  152. def _count_needed_parens(self, children):
  153. start = 0
  154. opens = 0
  155. for child in children:
  156. if not isinstance(child, basestring):
  157. continue
  158. if child == '' or child[0] in '\'"':
  159. continue
  160. index = 0
  161. while index < len(child):
  162. if child[index] == ')':
  163. if opens > 0:
  164. opens -= 1
  165. else:
  166. start += 1
  167. if child[index] == '(':
  168. opens += 1
  169. if child[index] == '#':
  170. try:
  171. index = child.index('\n', index)
  172. except ValueError:
  173. break
  174. index += 1
  175. return start, opens
  176. def _find_next_statement_start(self):
  177. for children in reversed(self.children_stack):
  178. for child in children:
  179. if isinstance(child, ast.stmt):
  180. return child.col_offset \
  181. + self.lines.get_line_start(child.lineno)
  182. return len(self.source.source)
  183. _operators = {'And': 'and', 'Or': 'or', 'Add': '+', 'Sub': '-', 'Mult': '*',
  184. 'Div': '/', 'Mod': '%', 'Pow': '**', 'LShift': '<<',
  185. 'RShift': '>>', 'BitOr': '|', 'BitAnd': '&', 'BitXor': '^',
  186. 'FloorDiv': '//', 'Invert': '~', 'Not': 'not', 'UAdd': '+',
  187. 'USub': '-', 'Eq': '==', 'NotEq': '!=', 'Lt': '<',
  188. 'LtE': '<=', 'Gt': '>', 'GtE': '>=', 'Is': 'is',
  189. 'IsNot': 'is not', 'In': 'in', 'NotIn': 'not in'}
  190. def _get_op(self, node):
  191. return self._operators[node.__class__.__name__].split(' ')
  192. def _Attribute(self, node):
  193. self._handle(node, [node.value, '.', node.attr])
  194. def _Assert(self, node):
  195. children = ['assert', node.test]
  196. if node.msg:
  197. children.append(',')
  198. children.append(node.msg)
  199. self._handle(node, children)
  200. def _Assign(self, node):
  201. children = self._child_nodes(node.targets, '=')
  202. children.append('=')
  203. children.append(node.value)
  204. self._handle(node, children)
  205. def _AugAssign(self, node):
  206. children = [node.target]
  207. children.extend(self._get_op(node.op))
  208. children.extend(['=', node.value])
  209. self._handle(node, children)
  210. def _Repr(self, node):
  211. self._handle(node, ['`', node.value, '`'])
  212. def _BinOp(self, node):
  213. children = [node.left] + self._get_op(node.op) + [node.right]
  214. self._handle(node, children)
  215. def _BoolOp(self, node):
  216. self._handle(node, self._child_nodes(node.values,
  217. self._get_op(node.op)[0]))
  218. def _Break(self, node):
  219. self._handle(node, ['break'])
  220. def _Call(self, node):
  221. children = [node.func, '(']
  222. args = list(node.args) + node.keywords
  223. children.extend(self._child_nodes(args, ','))
  224. if node.starargs is not None:
  225. if args:
  226. children.append(',')
  227. children.extend(['*', node.starargs])
  228. if node.kwargs is not None:
  229. if args or node.starargs is not None:
  230. children.append(',')
  231. children.extend(['**', node.kwargs])
  232. children.append(')')
  233. self._handle(node, children)
  234. def _ClassDef(self, node):
  235. children = []
  236. if getattr(node, 'decorator_list', None):
  237. for decorator in node.decorator_list:
  238. children.append('@')
  239. children.append(decorator)
  240. children.extend(['class', node.name])
  241. if node.bases:
  242. children.append('(')
  243. children.extend(self._child_nodes(node.bases, ','))
  244. children.append(')')
  245. children.append(':')
  246. children.extend(node.body)
  247. self._handle(node, children)
  248. def _Compare(self, node):
  249. children = []
  250. children.append(node.left)
  251. for op, expr in zip(node.ops, node.comparators):
  252. children.extend(self._get_op(op))
  253. children.append(expr)
  254. self._handle(node, children)
  255. def _Delete(self, node):
  256. self._handle(node, ['del'] + self._child_nodes(node.targets, ','))
  257. def _Num(self, node):
  258. self._handle(node, [self.Number])
  259. def _Str(self, node):
  260. self._handle(node, [self.String])
  261. def _Continue(self, node):
  262. self._handle(node, ['continue'])
  263. def _Dict(self, node):
  264. children = []
  265. children.append('{')
  266. if node.keys:
  267. for index, (key, value) in enumerate(zip(node.keys, node.values)):
  268. children.extend([key, ':', value])
  269. if index < len(node.keys) - 1:
  270. children.append(',')
  271. children.append('}')
  272. self._handle(node, children)
  273. def _Ellipsis(self, node):
  274. self._handle(node, ['...'])
  275. def _Expr(self, node):
  276. self._handle(node, [node.value])
  277. def _Exec(self, node):
  278. children = []
  279. children.extend(['exec', node.body])
  280. if node.globals:
  281. children.extend(['in', node.globals])
  282. if node.locals:
  283. children.extend([',', node.locals])
  284. self._handle(node, children)
  285. def _ExtSlice(self, node):
  286. children = []
  287. for index, dim in enumerate(node.dims):
  288. if index > 0:
  289. children.append(',')
  290. children.append(dim)
  291. self._handle(node, children)
  292. def _For(self, node):
  293. children = ['for', node.target, 'in', node.iter, ':']
  294. children.extend(node.body)
  295. if node.orelse:
  296. children.extend(['else', ':'])
  297. children.extend(node.orelse)
  298. self._handle(node, children)
  299. def _ImportFrom(self, node):
  300. children = ['from']
  301. if node.level:
  302. children.append('.' * node.level)
  303. children.extend([node.module or '', # see comment at rope.base.ast.walk
  304. 'import'])
  305. children.extend(self._child_nodes(node.names, ','))
  306. self._handle(node, children)
  307. def _alias(self, node):
  308. children = [node.name]
  309. if node.asname:
  310. children.extend(['as', node.asname])
  311. self._handle(node, children)
  312. def _FunctionDef(self, node):
  313. children = []
  314. try:
  315. decorators = getattr(node, 'decorator_list')
  316. except AttributeError:
  317. decorators = getattr(node, 'decorators', None)
  318. if decorators:
  319. for decorator in decorators:
  320. children.append('@')
  321. children.append(decorator)
  322. children.extend(['def', node.name, '(', node.args])
  323. children.extend([')', ':'])
  324. children.extend(node.body)
  325. self._handle(node, children)
  326. def _arguments(self, node):
  327. children = []
  328. args = list(node.args)
  329. defaults = [None] * (len(args) - len(node.defaults)) + list(node.defaults)
  330. for index, (arg, default) in enumerate(zip(args, defaults)):
  331. if index > 0:
  332. children.append(',')
  333. self._add_args_to_children(children, arg, default)
  334. if node.vararg is not None:
  335. if args:
  336. children.append(',')
  337. children.extend(['*', node.vararg])
  338. if node.kwarg is not None:
  339. if args or node.vararg is not None:
  340. children.append(',')
  341. children.extend(['**', node.kwarg])
  342. self._handle(node, children)
  343. def _add_args_to_children(self, children, arg, default):
  344. if isinstance(arg, (list, tuple)):
  345. self._add_tuple_parameter(children, arg)
  346. else:
  347. children.append(arg)
  348. if default is not None:
  349. children.append('=')
  350. children.append(default)
  351. def _add_tuple_parameter(self, children, arg):
  352. children.append('(')
  353. for index, token in enumerate(arg):
  354. if index > 0:
  355. children.append(',')
  356. if isinstance(token, (list, tuple)):
  357. self._add_tuple_parameter(children, token)
  358. else:
  359. children.append(token)
  360. children.append(')')
  361. def _GeneratorExp(self, node):
  362. children = [node.elt]
  363. children.extend(node.generators)
  364. self._handle(node, children, eat_parens=True)
  365. def _comprehension(self, node):
  366. children = ['for', node.target, 'in', node.iter]
  367. if node.ifs:
  368. for if_ in node.ifs:
  369. children.append('if')
  370. children.append(if_)
  371. self._handle(node, children)
  372. def _Global(self, node):
  373. children = self._child_nodes(node.names, ',')
  374. children.insert(0, 'global')
  375. self._handle(node, children)
  376. def _If(self, node):
  377. if self._is_elif(node):
  378. children = ['elif']
  379. else:
  380. children = ['if']
  381. children.extend([node.test, ':'])
  382. children.extend(node.body)
  383. if node.orelse:
  384. if len(node.orelse) == 1 and self._is_elif(node.orelse[0]):
  385. pass
  386. else:
  387. children.extend(['else', ':'])
  388. children.extend(node.orelse)
  389. self._handle(node, children)
  390. def _is_elif(self, node):
  391. if not isinstance(node, ast.If):
  392. return False
  393. offset = self.lines.get_line_start(node.lineno) + node.col_offset
  394. word = self.source[offset:offset + 4]
  395. # XXX: This is a bug; the offset does not point to the first
  396. alt_word = self.source[offset - 5:offset - 1]
  397. return 'elif' in (word, alt_word)
  398. def _IfExp(self, node):
  399. return self._handle(node, [node.body, 'if', node.test,
  400. 'else', node.orelse])
  401. def _Import(self, node):
  402. children = ['import']
  403. children.extend(self._child_nodes(node.names, ','))
  404. self._handle(node, children)
  405. def _keyword(self, node):
  406. self._handle(node, [node.arg, '=', node.value])
  407. def _Lambda(self, node):
  408. self._handle(node, ['lambda', node.args, ':', node.body])
  409. def _List(self, node):
  410. self._handle(node, ['['] + self._child_nodes(node.elts, ',') + [']'])
  411. def _ListComp(self, node):
  412. children = ['[', node.elt]
  413. children.extend(node.generators)
  414. children.append(']')
  415. self._handle(node, children)
  416. def _Module(self, node):
  417. self._handle(node, list(node.body), eat_spaces=True)
  418. def _Name(self, node):
  419. self._handle(node, [node.id])
  420. def _Pass(self, node):
  421. self._handle(node, ['pass'])
  422. def _Print(self, node):
  423. children = ['print']
  424. if node.dest:
  425. children.extend(['>>', node.dest])
  426. if node.values:
  427. children.append(',')
  428. children.extend(self._child_nodes(node.values, ','))
  429. if not node.nl:
  430. children.append(',')
  431. self._handle(node, children)
  432. def _Raise(self, node):
  433. children = ['raise']
  434. if node.type:
  435. children.append(node.type)
  436. if node.inst:
  437. children.append(',')
  438. children.append(node.inst)
  439. if node.tback:
  440. children.append(',')
  441. children.append(node.tback)
  442. self._handle(node, children)
  443. def _Return(self, node):
  444. children = ['return']
  445. if node.value:
  446. children.append(node.value)
  447. self._handle(node, children)
  448. def _Sliceobj(self, node):
  449. children = []
  450. for index, slice in enumerate(node.nodes):
  451. if index > 0:
  452. children.append(':')
  453. if slice:
  454. children.append(slice)
  455. self._handle(node, children)
  456. def _Index(self, node):
  457. self._handle(node, [node.value])
  458. def _Subscript(self, node):
  459. self._handle(node, [node.value, '[', node.slice, ']'])
  460. def _Slice(self, node):
  461. children = []
  462. if node.lower:
  463. children.append(node.lower)
  464. children.append(':')
  465. if node.upper:
  466. children.append(node.upper)
  467. if node.step:
  468. children.append(':')
  469. children.append(node.step)
  470. self._handle(node, children)
  471. def _TryFinally(self, node):
  472. children = []
  473. if len(node.body) != 1 or not isinstance(node.body[0], ast.TryExcept):
  474. children.extend(['try', ':'])
  475. children.extend(node.body)
  476. children.extend(['finally', ':'])
  477. children.extend(node.finalbody)
  478. self._handle(node, children)
  479. def _TryExcept(self, node):
  480. children = ['try', ':']
  481. children.extend(node.body)
  482. children.extend(node.handlers)
  483. if node.orelse:
  484. children.extend(['else', ':'])
  485. children.extend(node.orelse)
  486. self._handle(node, children)
  487. def _ExceptHandler(self, node):
  488. self._excepthandler(node)
  489. def _excepthandler(self, node):
  490. children = ['except']
  491. if node.type:
  492. children.append(node.type)
  493. if node.name:
  494. children.extend([',', node.name])
  495. children.append(':')
  496. children.extend(node.body)
  497. self._handle(node, children)
  498. def _Tuple(self, node):
  499. if node.elts:
  500. self._handle(node, self._child_nodes(node.elts, ','),
  501. eat_parens=True)
  502. else:
  503. self._handle(node, ['(', ')'])
  504. def _UnaryOp(self, node):
  505. children = self._get_op(node.op)
  506. children.append(node.operand)
  507. self._handle(node, children)
  508. def _Yield(self, node):
  509. children = ['yield']
  510. if node.value:
  511. children.append(node.value)
  512. self._handle(node, children)
  513. def _While(self, node):
  514. children = ['while', node.test, ':']
  515. children.extend(node.body)
  516. if node.orelse:
  517. children.extend(['else', ':'])
  518. children.extend(node.orelse)
  519. self._handle(node, children)
  520. def _With(self, node):
  521. children = ['with', node.context_expr]
  522. if node.optional_vars:
  523. children.extend(['as', node.optional_vars])
  524. children.append(':')
  525. children.extend(node.body)
  526. self._handle(node, children)
  527. def _child_nodes(self, nodes, separator):
  528. children = []
  529. for index, child in enumerate(nodes):
  530. children.append(child)
  531. if index < len(nodes) - 1:
  532. children.append(separator)
  533. return children
  534. class _Source(object):
  535. def __init__(self, source):
  536. self.source = source
  537. self.offset = 0
  538. def consume(self, token):
  539. try:
  540. while True:
  541. new_offset = self.source.index(token, self.offset)
  542. if self._good_token(token, new_offset):
  543. break
  544. else:
  545. self._skip_comment()
  546. except (ValueError, TypeError):
  547. raise MismatchedTokenError(
  548. 'Token <%s> at %s cannot be matched' %
  549. (token, self._get_location()))
  550. self.offset = new_offset + len(token)
  551. return (new_offset, self.offset)
  552. def consume_string(self, end=None):
  553. if _Source._string_pattern is None:
  554. original = codeanalyze.get_string_pattern()
  555. pattern = r'(%s)((\s|\\\n|#[^\n]*\n)*(%s))*' % \
  556. (original, original)
  557. _Source._string_pattern = re.compile(pattern)
  558. repattern = _Source._string_pattern
  559. return self._consume_pattern(repattern, end)
  560. def consume_number(self):
  561. if _Source._number_pattern is None:
  562. _Source._number_pattern = re.compile(
  563. self._get_number_pattern())
  564. repattern = _Source._number_pattern
  565. return self._consume_pattern(repattern)
  566. def consume_not_equal(self):
  567. if _Source._not_equals_pattern is None:
  568. _Source._not_equals_pattern = re.compile(r'<>|!=')
  569. repattern = _Source._not_equals_pattern
  570. return self._consume_pattern(repattern)
  571. def _good_token(self, token, offset, start=None):
  572. """Checks whether consumed token is in comments"""
  573. if start is None:
  574. start = self.offset
  575. try:
  576. comment_index = self.source.rindex('#', start, offset)
  577. except ValueError:
  578. return True
  579. try:
  580. new_line_index = self.source.rindex('\n', start, offset)
  581. except ValueError:
  582. return False
  583. return comment_index < new_line_index
  584. def _skip_comment(self):
  585. self.offset = self.source.index('\n', self.offset + 1)
  586. def _get_location(self):
  587. lines = self.source[:self.offset].split('\n')
  588. return (len(lines), len(lines[-1]))
  589. def _consume_pattern(self, repattern, end=None):
  590. while True:
  591. if end is None:
  592. end = len(self.source)
  593. match = repattern.search(self.source, self.offset, end)
  594. if self._good_token(match.group(), match.start()):
  595. break
  596. else:
  597. self._skip_comment()
  598. self.offset = match.end()
  599. return match.start(), match.end()
  600. def till_token(self, token):
  601. new_offset = self.source.index(token, self.offset)
  602. return self[self.offset:new_offset]
  603. def rfind_token(self, token, start, end):
  604. index = start
  605. while True:
  606. try:
  607. index = self.source.rindex(token, start, end)
  608. if self._good_token(token, index, start=start):
  609. return index
  610. else:
  611. end = index
  612. except ValueError:
  613. return None
  614. def from_offset(self, offset):
  615. return self[offset:self.offset]
  616. def find_backwards(self, pattern, offset):
  617. return self.source.rindex(pattern, 0, offset)
  618. def __getitem__(self, index):
  619. return self.source[index]
  620. def __getslice__(self, i, j):
  621. return self.source[i:j]
  622. def _get_number_pattern(self):
  623. # HACK: It is merely an approaximation and does the job
  624. integer = r'(0|0x)?[\da-fA-F]+[lL]?'
  625. return r'(%s(\.\d*)?|(\.\d+))([eE][-+]?\d*)?[jJ]?' % integer
  626. _string_pattern = None
  627. _number_pattern = None
  628. _not_equals_pattern = None