| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358 |
- import bisect
- import re
- import token
- import tokenize
- class ChangeCollector(object):
- def __init__(self, text):
- self.text = text
- self.changes = []
- def add_change(self, start, end, new_text=None):
- if new_text is None:
- new_text = self.text[start:end]
- self.changes.append((start, end, new_text))
- def get_changed(self):
- if not self.changes:
- return None
- def compare_changes(change1, change2):
- return cmp(change1[:2], change2[:2])
- self.changes.sort(compare_changes)
- pieces = []
- last_changed = 0
- for change in self.changes:
- start, end, text = change
- pieces.append(self.text[last_changed:start] + text)
- last_changed = end
- if last_changed < len(self.text):
- pieces.append(self.text[last_changed:])
- result = ''.join(pieces)
- if result != self.text:
- return result
- class SourceLinesAdapter(object):
- """Adapts source to Lines interface
- Note: The creation of this class is expensive.
- """
- def __init__(self, source_code):
- self.code = source_code
- self.starts = None
- self._initialize_line_starts()
- def _initialize_line_starts(self):
- self.starts = []
- self.starts.append(0)
- try:
- i = 0
- while True:
- i = self.code.index('\n', i) + 1
- self.starts.append(i)
- except ValueError:
- pass
- self.starts.append(len(self.code) + 1)
- def get_line(self, lineno):
- return self.code[self.starts[lineno - 1]:
- self.starts[lineno] - 1]
- def length(self):
- return len(self.starts) - 1
- def get_line_number(self, offset):
- return bisect.bisect(self.starts, offset)
- def get_line_start(self, lineno):
- return self.starts[lineno - 1]
- def get_line_end(self, lineno):
- return self.starts[lineno] - 1
- class ArrayLinesAdapter(object):
- def __init__(self, lines):
- self.lines = lines
- def get_line(self, line_number):
- return self.lines[line_number - 1]
- def length(self):
- return len(self.lines)
- class LinesToReadline(object):
- def __init__(self, lines, start):
- self.lines = lines
- self.current = start
- def readline(self):
- if self.current <= self.lines.length():
- self.current += 1
- return self.lines.get_line(self.current - 1) + '\n'
- return ''
- def __call__(self):
- return self.readline()
- class _CustomGenerator(object):
- def __init__(self, lines):
- self.lines = lines
- self.in_string = ''
- self.open_count = 0
- self.continuation = False
- def __call__(self):
- size = self.lines.length()
- result = []
- i = 1
- while i <= size:
- while i <= size and not self.lines.get_line(i).strip():
- i += 1
- if i <= size:
- start = i
- while True:
- line = self.lines.get_line(i)
- self._analyze_line(line)
- if not (self.continuation or self.open_count or
- self.in_string) or i == size:
- break
- i += 1
- result.append((start, i))
- i += 1
- return result
- _main_chars = re.compile(r'[\'|"|#|\\|\[|\]|\{|\}|\(|\)]')
- def _analyze_line(self, line):
- char = None
- for match in self._main_chars.finditer(line):
- char = match.group()
- i = match.start()
- if char in '\'"':
- if not self.in_string:
- self.in_string = char
- if char * 3 == line[i:i + 3]:
- self.in_string = char * 3
- elif self.in_string == line[i:i + len(self.in_string)] and \
- not (i > 0 and line[i - 1] == '\\' and
- not (i > 1 and line[i - 2] == '\\')):
- self.in_string = ''
- if self.in_string:
- continue
- if char == '#':
- break
- if char in '([{':
- self.open_count += 1
- elif char in ')]}':
- self.open_count -= 1
- if line and char != '#' and line.endswith('\\'):
- self.continuation = True
- else:
- self.continuation = False
- def custom_generator(lines):
- return _CustomGenerator(lines)()
- class LogicalLineFinder(object):
- def __init__(self, lines):
- self.lines = lines
- def logical_line_in(self, line_number):
- indents = count_line_indents(self.lines.get_line(line_number))
- tries = 0
- while True:
- block_start = get_block_start(self.lines, line_number, indents)
- try:
- return self._block_logical_line(block_start, line_number)
- except IndentationError, e:
- tries += 1
- if tries == 5:
- raise e
- lineno = e.lineno + block_start - 1
- indents = count_line_indents(self.lines.get_line(lineno))
- def generate_starts(self, start_line=1, end_line=None):
- for start, end in self.generate_regions(start_line, end_line):
- yield start
- def generate_regions(self, start_line=1, end_line=None):
- # XXX: `block_start` should be at a better position!
- block_start = 1
- readline = LinesToReadline(self.lines, block_start)
- shifted = start_line - block_start + 1
- try:
- for start, end in self._logical_lines(readline):
- real_start = start + block_start - 1
- real_start = self._first_non_blank(real_start)
- if end_line is not None and real_start >= end_line:
- break
- real_end = end + block_start - 1
- if real_start >= start_line:
- yield (real_start, real_end)
- except tokenize.TokenError, e:
- pass
- def _block_logical_line(self, block_start, line_number):
- readline = LinesToReadline(self.lines, block_start)
- shifted = line_number - block_start + 1
- region = self._calculate_logical(readline, shifted)
- start = self._first_non_blank(region[0] + block_start - 1)
- if region[1] is None:
- end = self.lines.length()
- else:
- end = region[1] + block_start - 1
- return start, end
- def _calculate_logical(self, readline, line_number):
- last_end = 1
- try:
- for start, end in self._logical_lines(readline):
- if line_number <= end:
- return (start, end)
- last_end = end + 1
- except tokenize.TokenError, e:
- current = e.args[1][0]
- return (last_end, max(last_end, current - 1))
- return (last_end, None)
- def _logical_lines(self, readline):
- last_end = 1
- for current_token in tokenize.generate_tokens(readline):
- current = current_token[2][0]
- if current_token[0] == token.NEWLINE:
- yield (last_end, current)
- last_end = current + 1
- def _first_non_blank(self, line_number):
- current = line_number
- while current < self.lines.length():
- line = self.lines.get_line(current).strip()
- if line and not line.startswith('#'):
- return current
- current += 1
- return current
- def tokenizer_generator(lines):
- return LogicalLineFinder(lines).generate_regions()
- class CachingLogicalLineFinder(object):
- def __init__(self, lines, generate=custom_generator):
- self.lines = lines
- self._generate = generate
- _starts = None
- @property
- def starts(self):
- if self._starts is None:
- self._init_logicals()
- return self._starts
- _ends = None
- @property
- def ends(self):
- if self._ends is None:
- self._init_logicals()
- return self._ends
- def _init_logicals(self):
- """Should initialize _starts and _ends attributes"""
- size = self.lines.length() + 1
- self._starts = [None] * size
- self._ends = [None] * size
- for start, end in self._generate(self.lines):
- self._starts[start] = True
- self._ends[end] = True
- def logical_line_in(self, line_number):
- start = line_number
- while start > 0 and not self.starts[start]:
- start -= 1
- if start == 0:
- try:
- start = self.starts.index(True, line_number)
- except ValueError:
- return (line_number, line_number)
- return (start, self.ends.index(True, start))
- def generate_starts(self, start_line=1, end_line=None):
- if end_line is None:
- end_line = self.lines.length()
- for index in range(start_line, end_line):
- if self.starts[index]:
- yield index
- def get_block_start(lines, lineno, maximum_indents=80):
- """Approximate block start"""
- pattern = get_block_start_patterns()
- for i in range(lineno, 0, -1):
- match = pattern.search(lines.get_line(i))
- if match is not None and \
- count_line_indents(lines.get_line(i)) <= maximum_indents:
- striped = match.string.lstrip()
- # Maybe we're in a list comprehension or generator expression
- if i > 1 and striped.startswith('if') or striped.startswith('for'):
- bracs = 0
- for j in range(i, min(i + 5, lines.length() + 1)):
- for c in lines.get_line(j):
- if c == '#':
- break
- if c in '[(':
- bracs += 1
- if c in ')]':
- bracs -= 1
- if bracs < 0:
- break
- if bracs < 0:
- break
- if bracs < 0:
- continue
- return i
- return 1
- _block_start_pattern = None
- def get_block_start_patterns():
- global _block_start_pattern
- if not _block_start_pattern:
- pattern = '^\\s*(((def|class|if|elif|except|for|while|with)\\s)|'\
- '((try|else|finally|except)\\s*:))'
- _block_start_pattern = re.compile(pattern, re.M)
- return _block_start_pattern
- def count_line_indents(line):
- indents = 0
- for char in line:
- if char == ' ':
- indents += 1
- elif char == '\t':
- indents += 8
- else:
- return indents
- return 0
- def get_string_pattern():
- start = r'(\b[uU]?[rR]?)?'
- longstr = r'%s"""(\\.|"(?!"")|\\\n|[^"\\])*"""' % start
- shortstr = r'%s"(\\.|[^"\\\n])*"' % start
- return '|'.join([longstr, longstr.replace('"', "'"),
- shortstr, shortstr.replace('"', "'")])
- def get_comment_pattern():
- return r'#[^\n]*'
|