worder.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. import bisect
  2. import keyword
  3. import rope.base.simplify
  4. def get_name_at(resource, offset):
  5. source_code = resource.read()
  6. word_finder = Worder(source_code)
  7. return word_finder.get_word_at(offset)
  8. class Worder(object):
  9. """A class for finding boundaries of words and expressions
  10. Note that in these methods, offset should be the index of the
  11. character not the index of the character after it.
  12. """
  13. def __init__(self, code, handle_ignores=False):
  14. simplified = rope.base.simplify.real_code(code)
  15. self.code_finder = _RealFinder(simplified, code)
  16. self.handle_ignores = handle_ignores
  17. self.code = code
  18. def _init_ignores(self):
  19. ignores = rope.base.simplify.ignored_regions(self.code)
  20. self.dumb_finder = _RealFinder(self.code, self.code)
  21. self.starts = [ignored[0] for ignored in ignores]
  22. self.ends = [ignored[1] for ignored in ignores]
  23. def _context_call(self, name, offset):
  24. if self.handle_ignores:
  25. if not hasattr(self, 'starts'):
  26. self._init_ignores()
  27. start = bisect.bisect(self.starts, offset)
  28. if start > 0 and offset < self.ends[start - 1]:
  29. return getattr(self.dumb_finder, name)(offset)
  30. return getattr(self.code_finder, name)(offset)
  31. def get_primary_at(self, offset):
  32. return self._context_call('get_primary_at', offset)
  33. def get_word_at(self, offset):
  34. return self._context_call('get_word_at', offset)
  35. def get_primary_range(self, offset):
  36. return self._context_call('get_primary_range', offset)
  37. def get_splitted_primary_before(self, offset):
  38. return self._context_call('get_splitted_primary_before', offset)
  39. def get_word_range(self, offset):
  40. return self._context_call('get_word_range', offset)
  41. def is_function_keyword_parameter(self, offset):
  42. return self.code_finder.is_function_keyword_parameter(offset)
  43. def is_a_class_or_function_name_in_header(self, offset):
  44. return self.code_finder.is_a_class_or_function_name_in_header(offset)
  45. def is_from_statement_module(self, offset):
  46. return self.code_finder.is_from_statement_module(offset)
  47. def is_from_aliased(self, offset):
  48. return self.code_finder.is_from_aliased(offset)
  49. def find_parens_start_from_inside(self, offset):
  50. return self.code_finder.find_parens_start_from_inside(offset)
  51. def is_a_name_after_from_import(self, offset):
  52. return self.code_finder.is_a_name_after_from_import(offset)
  53. def is_from_statement(self, offset):
  54. return self.code_finder.is_from_statement(offset)
  55. def get_from_aliased(self, offset):
  56. return self.code_finder.get_from_aliased(offset)
  57. def is_import_statement(self, offset):
  58. return self.code_finder.is_import_statement(offset)
  59. def is_assigned_here(self, offset):
  60. return self.code_finder.is_assigned_here(offset)
  61. def is_a_function_being_called(self, offset):
  62. return self.code_finder.is_a_function_being_called(offset)
  63. def get_word_parens_range(self, offset):
  64. return self.code_finder.get_word_parens_range(offset)
  65. def is_name_assigned_in_class_body(self, offset):
  66. return self.code_finder.is_name_assigned_in_class_body(offset)
  67. def is_on_function_call_keyword(self, offset):
  68. return self.code_finder.is_on_function_call_keyword(offset)
  69. def _find_parens_start(self, offset):
  70. return self.code_finder._find_parens_start(offset)
  71. def get_parameters(self, first, last):
  72. return self.code_finder.get_parameters(first, last)
  73. def get_from_module(self, offset):
  74. return self.code_finder.get_from_module(offset)
  75. def is_assigned_in_a_tuple_assignment(self, offset):
  76. return self.code_finder.is_assigned_in_a_tuple_assignment(offset)
  77. def get_assignment_type(self, offset):
  78. return self.code_finder.get_assignment_type(offset)
  79. def get_function_and_args_in_header(self, offset):
  80. return self.code_finder.get_function_and_args_in_header(offset)
  81. def get_lambda_and_args(self, offset):
  82. return self.code_finder.get_lambda_and_args(offset)
  83. def find_function_offset(self, offset):
  84. return self.code_finder.find_function_offset(offset)
  85. class _RealFinder(object):
  86. def __init__(self, code, raw):
  87. self.code = code
  88. self.raw = raw
  89. def _find_word_start(self, offset):
  90. current_offset = offset
  91. while current_offset >= 0 and self._is_id_char(current_offset):
  92. current_offset -= 1
  93. return current_offset + 1
  94. def _find_word_end(self, offset):
  95. while offset + 1 < len(self.code) and self._is_id_char(offset + 1):
  96. offset += 1
  97. return offset
  98. def _find_last_non_space_char(self, offset):
  99. while offset >= 0 and self.code[offset].isspace():
  100. if self.code[offset] == '\n':
  101. return offset
  102. offset -= 1
  103. return max(-1, offset)
  104. def get_word_at(self, offset):
  105. offset = self._get_fixed_offset(offset)
  106. return self.raw[self._find_word_start(offset):
  107. self._find_word_end(offset) + 1]
  108. def _get_fixed_offset(self, offset):
  109. if offset >= len(self.code):
  110. return offset - 1
  111. if not self._is_id_char(offset):
  112. if offset > 0 and self._is_id_char(offset - 1):
  113. return offset - 1
  114. if offset < len(self.code) - 1 and self._is_id_char(offset + 1):
  115. return offset + 1
  116. return offset
  117. def _is_id_char(self, offset):
  118. return self.code[offset].isalnum() or self.code[offset] == '_'
  119. def _find_string_start(self, offset):
  120. kind = self.code[offset]
  121. try:
  122. return self.code.rindex(kind, 0, offset)
  123. except ValueError:
  124. return 0
  125. def _find_parens_start(self, offset):
  126. offset = self._find_last_non_space_char(offset - 1)
  127. while offset >= 0 and self.code[offset] not in '[({':
  128. if self.code[offset] not in ':,':
  129. offset = self._find_primary_start(offset)
  130. offset = self._find_last_non_space_char(offset - 1)
  131. return offset
  132. def _find_atom_start(self, offset):
  133. old_offset = offset
  134. if self.code[offset] == '\n':
  135. return offset + 1
  136. if self.code[offset].isspace():
  137. offset = self._find_last_non_space_char(offset)
  138. if self.code[offset] in '\'"':
  139. return self._find_string_start(offset)
  140. if self.code[offset] in ')]}':
  141. return self._find_parens_start(offset)
  142. if self._is_id_char(offset):
  143. return self._find_word_start(offset)
  144. return old_offset
  145. def _find_primary_without_dot_start(self, offset):
  146. """It tries to find the undotted primary start
  147. It is different from `self._get_atom_start()` in that it
  148. follows function calls, too; such as in ``f(x)``.
  149. """
  150. last_atom = offset
  151. offset = self._find_last_non_space_char(last_atom)
  152. while offset > 0 and self.code[offset] in ')]':
  153. last_atom = self._find_parens_start(offset)
  154. offset = self._find_last_non_space_char(last_atom - 1)
  155. if offset >= 0 and (self.code[offset] in '"\'})]' or
  156. self._is_id_char(offset)):
  157. atom_start = self._find_atom_start(offset)
  158. if not keyword.iskeyword(self.code[atom_start:offset + 1]):
  159. return atom_start
  160. return last_atom
  161. def _find_primary_start(self, offset):
  162. if offset >= len(self.code):
  163. offset = len(self.code) - 1
  164. if self.code[offset] != '.':
  165. offset = self._find_primary_without_dot_start(offset)
  166. else:
  167. offset = offset + 1
  168. while offset > 0:
  169. prev = self._find_last_non_space_char(offset - 1)
  170. if offset <= 0 or self.code[prev] != '.':
  171. break
  172. offset = self._find_primary_without_dot_start(prev - 1)
  173. if not self._is_id_char(offset):
  174. break
  175. return offset
  176. def get_primary_at(self, offset):
  177. offset = self._get_fixed_offset(offset)
  178. start, end = self.get_primary_range(offset)
  179. return self.raw[start:end].strip()
  180. def get_splitted_primary_before(self, offset):
  181. """returns expression, starting, starting_offset
  182. This function is used in `rope.codeassist.assist` function.
  183. """
  184. if offset == 0:
  185. return ('', '', 0)
  186. end = offset - 1
  187. word_start = self._find_atom_start(end)
  188. real_start = self._find_primary_start(end)
  189. if self.code[word_start:offset].strip() == '':
  190. word_start = end
  191. if self.code[end].isspace():
  192. word_start = end
  193. if self.code[real_start:word_start].strip() == '':
  194. real_start = word_start
  195. if real_start == word_start == end and not self._is_id_char(end):
  196. return ('', '', offset)
  197. if real_start == word_start:
  198. return ('', self.raw[word_start:offset], word_start)
  199. else:
  200. if self.code[end] == '.':
  201. return (self.raw[real_start:end], '', offset)
  202. last_dot_position = word_start
  203. if self.code[word_start] != '.':
  204. last_dot_position = self._find_last_non_space_char(word_start - 1)
  205. last_char_position = self._find_last_non_space_char(last_dot_position - 1)
  206. if self.code[word_start].isspace():
  207. word_start = offset
  208. return (self.raw[real_start:last_char_position + 1],
  209. self.raw[word_start:offset], word_start)
  210. def _get_line_start(self, offset):
  211. try:
  212. return self.code.rindex('\n', 0, offset + 1)
  213. except ValueError:
  214. return 0
  215. def _get_line_end(self, offset):
  216. try:
  217. return self.code.index('\n', offset)
  218. except ValueError:
  219. return len(self.code)
  220. def is_name_assigned_in_class_body(self, offset):
  221. word_start = self._find_word_start(offset - 1)
  222. word_end = self._find_word_end(offset) + 1
  223. if '.' in self.code[word_start:word_end]:
  224. return False
  225. line_start = self._get_line_start(word_start)
  226. line = self.code[line_start:word_start].strip()
  227. return not line and self.get_assignment_type(offset) == '='
  228. def is_a_class_or_function_name_in_header(self, offset):
  229. word_start = self._find_word_start(offset - 1)
  230. line_start = self._get_line_start(word_start)
  231. prev_word = self.code[line_start:word_start].strip()
  232. return prev_word in ['def', 'class']
  233. def _find_first_non_space_char(self, offset):
  234. if offset >= len(self.code):
  235. return len(self.code)
  236. while offset < len(self.code) and self.code[offset].isspace():
  237. if self.code[offset] == '\n':
  238. return offset
  239. offset += 1
  240. return offset
  241. def is_a_function_being_called(self, offset):
  242. word_end = self._find_word_end(offset) + 1
  243. next_char = self._find_first_non_space_char(word_end)
  244. return next_char < len(self.code) and \
  245. self.code[next_char] == '(' and \
  246. not self.is_a_class_or_function_name_in_header(offset)
  247. def _find_import_end(self, start):
  248. return self._get_line_end(start)
  249. def is_import_statement(self, offset):
  250. try:
  251. last_import = self.code.rindex('import ', 0, offset)
  252. except ValueError:
  253. return False
  254. return self._find_import_end(last_import + 7) >= offset
  255. def is_from_statement(self, offset):
  256. try:
  257. last_from = self.code.rindex('from ', 0, offset)
  258. from_import = self.code.index(' import ', last_from)
  259. from_names = from_import + 8
  260. except ValueError:
  261. return False
  262. from_names = self._find_first_non_space_char(from_names)
  263. return self._find_import_end(from_names) >= offset
  264. def is_from_statement_module(self, offset):
  265. if offset >= len(self.code) - 1:
  266. return False
  267. stmt_start = self._find_primary_start(offset)
  268. line_start = self._get_line_start(stmt_start)
  269. prev_word = self.code[line_start:stmt_start].strip()
  270. return prev_word == 'from'
  271. def is_a_name_after_from_import(self, offset):
  272. try:
  273. if len(self.code) > offset and self.code[offset] == '\n':
  274. line_start = self._get_line_start(offset - 1)
  275. else:
  276. line_start = self._get_line_start(offset)
  277. last_from = self.code.rindex('from ', line_start, offset)
  278. from_import = self.code.index(' import ', last_from)
  279. from_names = from_import + 8
  280. except ValueError:
  281. return False
  282. if from_names - 1 > offset:
  283. return False
  284. return self._find_import_end(from_names) >= offset
  285. def get_from_module(self, offset):
  286. try:
  287. last_from = self.code.rindex('from ', 0, offset)
  288. import_offset = self.code.index(' import ', last_from)
  289. end = self._find_last_non_space_char(import_offset)
  290. return self.get_primary_at(end)
  291. except ValueError:
  292. pass
  293. def is_from_aliased(self, offset):
  294. if not self.is_a_name_after_from_import(offset):
  295. return False
  296. try:
  297. end = self._find_word_end(offset)
  298. as_end = min(self._find_word_end(end + 1), len(self.code))
  299. as_start = self._find_word_start(as_end)
  300. if self.code[as_start:as_end + 1] == 'as':
  301. return True
  302. except ValueError:
  303. return False
  304. def get_from_aliased(self, offset):
  305. try:
  306. end = self._find_word_end(offset)
  307. as_ = self._find_word_end(end + 1)
  308. alias = self._find_word_end(as_ + 1)
  309. start = self._find_word_start(alias)
  310. return self.raw[start:alias + 1]
  311. except ValueError:
  312. pass
  313. def is_function_keyword_parameter(self, offset):
  314. word_end = self._find_word_end(offset)
  315. if word_end + 1 == len(self.code):
  316. return False
  317. next_char = self._find_first_non_space_char(word_end + 1)
  318. equals = self.code[next_char:next_char + 2]
  319. if equals == '==' or not equals.startswith('='):
  320. return False
  321. word_start = self._find_word_start(offset)
  322. prev_char = self._find_last_non_space_char(word_start - 1)
  323. return prev_char - 1 >= 0 and self.code[prev_char] in ',('
  324. def is_on_function_call_keyword(self, offset):
  325. stop = self._get_line_start(offset)
  326. if self._is_id_char(offset):
  327. offset = self._find_word_start(offset) - 1
  328. offset = self._find_last_non_space_char(offset)
  329. if offset <= stop or self.code[offset] not in '(,':
  330. return False
  331. parens_start = self.find_parens_start_from_inside(offset)
  332. return stop < parens_start
  333. def find_parens_start_from_inside(self, offset):
  334. stop = self._get_line_start(offset)
  335. opens = 1
  336. while offset > stop:
  337. if self.code[offset] == '(':
  338. break
  339. if self.code[offset] != ',':
  340. offset = self._find_primary_start(offset)
  341. offset -= 1
  342. return max(stop, offset)
  343. def is_assigned_here(self, offset):
  344. return self.get_assignment_type(offset) is not None
  345. def get_assignment_type(self, offset):
  346. # XXX: does not handle tuple assignments
  347. word_end = self._find_word_end(offset)
  348. next_char = self._find_first_non_space_char(word_end + 1)
  349. single = self.code[next_char:next_char + 1]
  350. double = self.code[next_char:next_char + 2]
  351. triple = self.code[next_char:next_char + 3]
  352. if double not in ('==', '<=', '>=', '!='):
  353. for op in [single, double, triple]:
  354. if op.endswith('='):
  355. return op
  356. def get_primary_range(self, offset):
  357. start = self._find_primary_start(offset)
  358. end = self._find_word_end(offset) + 1
  359. return (start, end)
  360. def get_word_range(self, offset):
  361. offset = max(0, offset)
  362. start = self._find_word_start(offset)
  363. end = self._find_word_end(offset) + 1
  364. return (start, end)
  365. def get_word_parens_range(self, offset, opening='(', closing=')'):
  366. end = self._find_word_end(offset)
  367. start_parens = self.code.index(opening, end)
  368. index = start_parens
  369. open_count = 0
  370. while index < len(self.code):
  371. if self.code[index] == opening:
  372. open_count += 1
  373. if self.code[index] == closing:
  374. open_count -= 1
  375. if open_count == 0:
  376. return (start_parens, index + 1)
  377. index += 1
  378. return (start_parens, index)
  379. def get_parameters(self, first, last):
  380. keywords = []
  381. args = []
  382. current = self._find_last_non_space_char(last - 1)
  383. while current > first:
  384. primary_start = current
  385. current = self._find_primary_start(current)
  386. while current != first and self.code[current] not in '=,':
  387. current = self._find_last_non_space_char(current - 1)
  388. primary = self.raw[current + 1:primary_start + 1].strip()
  389. if self.code[current] == '=':
  390. primary_start = current - 1
  391. current -= 1
  392. while current != first and self.code[current] not in ',':
  393. current = self._find_last_non_space_char(current - 1)
  394. param_name = self.raw[current + 1:primary_start + 1].strip()
  395. keywords.append((param_name, primary))
  396. else:
  397. args.append(primary)
  398. current = self._find_last_non_space_char(current - 1)
  399. args.reverse()
  400. keywords.reverse()
  401. return args, keywords
  402. def is_assigned_in_a_tuple_assignment(self, offset):
  403. start = self._get_line_start(offset)
  404. end = self._get_line_end(offset)
  405. primary_start = self._find_primary_start(offset)
  406. primary_end = self._find_word_end(offset)
  407. prev_char_offset = self._find_last_non_space_char(primary_start - 1)
  408. next_char_offset = self._find_first_non_space_char(primary_end + 1)
  409. next_char = prev_char = ''
  410. if prev_char_offset >= start:
  411. prev_char = self.code[prev_char_offset]
  412. if next_char_offset < end:
  413. next_char = self.code[next_char_offset]
  414. try:
  415. equals_offset = self.code.index('=', start, end)
  416. except ValueError:
  417. return False
  418. if prev_char not in '(,' and next_char not in ',)':
  419. return False
  420. parens_start = self.find_parens_start_from_inside(offset)
  421. # XXX: only handling (x, y) = value
  422. return offset < equals_offset and \
  423. self.code[start:parens_start].strip() == ''
  424. def get_function_and_args_in_header(self, offset):
  425. offset = self.find_function_offset(offset)
  426. lparens, rparens = self.get_word_parens_range(offset)
  427. return self.raw[offset:rparens + 1]
  428. def find_function_offset(self, offset, definition='def '):
  429. while True:
  430. offset = self.code.index(definition, offset)
  431. if offset == 0 or not self._is_id_char(offset - 1):
  432. break
  433. offset += 1
  434. def_ = offset + 4
  435. return self._find_first_non_space_char(def_)
  436. def get_lambda_and_args(self, offset):
  437. offset = self.find_function_offset(offset, definition = 'lambda ')
  438. lparens, rparens = self.get_word_parens_range(offset, opening=' ', closing=':')
  439. return self.raw[offset:rparens + 1]