# Copyright 2015 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """LogicalLine primitive for formatting. A logical line is the containing data structure produced by the parser. It collects all nodes (stored in FormatToken objects) that could appear on a single line if there were no line length restrictions. It's then used by the parser to perform the wrapping required to comply with the style guide. """ from yapf_third_party._ylib2to3.fixer_util import syms as python_symbols from yapf.pytree import pytree_utils from yapf.pytree import split_penalty from yapf.yapflib import format_token from yapf.yapflib import style from yapf.yapflib import subtypes class LogicalLine(object): """Represents a single logical line in the output. Attributes: depth: indentation depth of this line. This is just a numeric value used to distinguish lines that are more deeply nested than others. It is not the actual amount of spaces, which is style-dependent. """ def __init__(self, depth, tokens=None): """Constructor. Creates a new logical line with the given depth an initial list of tokens. Constructs the doubly-linked lists for format tokens using their built-in next_token and previous_token attributes. Arguments: depth: indentation depth of this line tokens: initial list of tokens """ self.depth = depth self._tokens = tokens or [] self.disable = False if self._tokens: # Set up a doubly linked list. for index, tok in enumerate(self._tokens[1:]): # Note, 'index' is the index to the previous token. tok.previous_token = self._tokens[index] self._tokens[index].next_token = tok def CalculateFormattingInformation(self): """Calculate the split penalty and total length for the tokens.""" # Say that the first token in the line should have a space before it. This # means only that if this logical line is joined with a predecessor line, # then there will be a space between them. self.first.spaces_required_before = 1 self.first.total_length = len(self.first.value) prev_token = self.first prev_length = self.first.total_length for token in self._tokens[1:]: if (token.spaces_required_before == 0 and _SpaceRequiredBetween(prev_token, token, self.disable)): token.spaces_required_before = 1 tok_len = len(token.value) if not token.is_pseudo else 0 spaces_required_before = token.spaces_required_before if isinstance(spaces_required_before, list): assert token.is_comment, token # If here, we are looking at a comment token that appears on a line # with other tokens (but because it is a comment, it is always the last # token). Rather than specifying the actual number of spaces here, # hard code a value of 0 and then set it later. This logic only works # because this comment token is guaranteed to be the last token in the # list. spaces_required_before = 0 token.total_length = prev_length + tok_len + spaces_required_before # The split penalty has to be computed before {must|can}_break_before, # because these may use it for their decision. token.split_penalty += _SplitPenalty(prev_token, token) token.must_break_before = _MustBreakBefore(prev_token, token) token.can_break_before = ( token.must_break_before or _CanBreakBefore(prev_token, token)) prev_length = token.total_length prev_token = token def Split(self): """Split the line at semicolons.""" if not self.has_semicolon or self.disable: return [self] llines = [] lline = LogicalLine(self.depth) for tok in self._tokens: if tok.value == ';': llines.append(lline) lline = LogicalLine(self.depth) else: lline.AppendToken(tok) if lline.tokens: llines.append(lline) for lline in llines: lline.first.previous_token = None lline.last.next_token = None return llines ############################################################################ # Token Access and Manipulation Methods # ############################################################################ def AppendToken(self, token): """Append a new FormatToken to the tokens contained in this line.""" if self._tokens: token.previous_token = self.last self.last.next_token = token self._tokens.append(token) @property def first(self): """Returns the first non-whitespace token.""" return self._tokens[0] @property def last(self): """Returns the last non-whitespace token.""" return self._tokens[-1] ############################################################################ # Token -> String Methods # ############################################################################ def AsCode(self, indent_per_depth=2): """Return a "code" representation of this line. The code representation shows how the line would be printed out as code. TODO(eliben): for now this is rudimentary for debugging - once we add formatting capabilities, this method will have other uses (not all tokens have spaces around them, for example). Arguments: indent_per_depth: how much spaces to indent per depth level. Returns: A string representing the line as code. """ indent = ' ' * indent_per_depth * self.depth tokens_str = ' '.join(tok.value for tok in self._tokens) return indent + tokens_str def __str__(self): # pragma: no cover return self.AsCode() def __repr__(self): # pragma: no cover tokens_repr = ','.join( '{0}({1!r})'.format(tok.name, tok.value) for tok in self._tokens) return 'LogicalLine(depth={0}, tokens=[{1}])'.format( self.depth, tokens_repr) ############################################################################ # Properties # ############################################################################ @property def tokens(self): """Access the tokens contained within this line. The caller must not modify the tokens list returned by this method. Returns: List of tokens in this line. """ return self._tokens @property def lineno(self): """Return the line number of this logical line. Returns: The line number of the first token in this logical line. """ return self.first.lineno @property def start(self): """The start of the logical line. Returns: A tuple of the starting line number and column. """ return (self.first.lineno, self.first.column) @property def end(self): """The end of the logical line. Returns: A tuple of the ending line number and column. """ return (self.last.lineno, self.last.column + len(self.last.value)) @property def is_comment(self): return self.first.is_comment @property def has_semicolon(self): return any(tok.value == ';' for tok in self._tokens) def _IsIdNumberStringToken(tok): return tok.is_keyword or tok.is_name or tok.is_number or tok.is_string def _IsUnaryOperator(tok): return subtypes.UNARY_OPERATOR in tok.subtypes def _HasPrecedence(tok): """Whether a binary operation has precedence within its context.""" node = tok.node # We let ancestor be the statement surrounding the operation that tok is the # operator in. ancestor = node.parent.parent while ancestor is not None: # Search through the ancestor nodes in the parse tree for operators with # lower precedence. predecessor_type = pytree_utils.NodeName(ancestor) if predecessor_type in ['arith_expr', 'term']: # An ancestor "arith_expr" or "term" means we have found an operator # with lower precedence than our tok. return True if predecessor_type != 'atom': # We understand the context to look for precedence within as an # arbitrary nesting of "arith_expr", "term", and "atom" nodes. If we # leave this context we have not found a lower precedence operator. return False # Under normal usage we expect a complete parse tree to be available and # we will return before we get an AttributeError from the root. ancestor = ancestor.parent def _PriorityIndicatingNoSpace(tok): """Whether to remove spaces around an operator due to precedence.""" if not tok.is_arithmetic_op or not tok.is_simple_expr: # Limit space removal to highest priority arithmetic operators return False return _HasPrecedence(tok) def _IsSubscriptColonAndValuePair(token1, token2): return (token1.is_number or token1.is_name) and token2.is_subscript_colon def _SpaceRequiredBetween(left, right, is_line_disabled): """Return True if a space is required between the left and right token.""" lval = left.value rval = right.value if (left.is_pseudo and _IsIdNumberStringToken(right) and left.previous_token and _IsIdNumberStringToken(left.previous_token)): # Space between keyword... tokens and pseudo parens. return True if left.is_pseudo or right.is_pseudo: # There should be a space after the ':' in a dictionary. if left.OpensScope(): return True # The closing pseudo-paren shouldn't affect spacing. return False if left.is_continuation or right.is_continuation: # The continuation node's value has all of the spaces it needs. return False if right.name in pytree_utils.NONSEMANTIC_TOKENS: # No space before a non-semantic token. return False if _IsIdNumberStringToken(left) and _IsIdNumberStringToken(right): # Spaces between keyword, string, number, and identifier tokens. return True if lval == ',' and rval == ':': # We do want a space between a comma and colon. return True if style.Get('SPACE_INSIDE_BRACKETS'): # Supersede the "no space before a colon or comma" check. if left.OpensScope() and rval == ':': return True if right.ClosesScope() and lval == ':': return True if (style.Get('SPACES_AROUND_SUBSCRIPT_COLON') and (_IsSubscriptColonAndValuePair(left, right) or _IsSubscriptColonAndValuePair(right, left))): # Supersede the "never want a space before a colon or comma" check. return True if rval in ':,': # Otherwise, we never want a space before a colon or comma. return False if lval == ',' and rval in ']})': # Add a space between ending ',' and closing bracket if requested. return style.Get('SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET') if lval == ',': # We want a space after a comma. return True if lval == 'from' and rval == '.': # Space before the '.' in an import statement. return True if lval == '.' and rval == 'import': # Space after the '.' in an import statement. return True if (lval == '=' and rval in {'.', ',,,'} and subtypes.DEFAULT_OR_NAMED_ASSIGN not in left.subtypes): # Space between equal and '.' as in "X = ...". return True if lval == ':' and rval in {'.', '...'}: # Space between : and ... return True if ((right.is_keyword or right.is_name) and (left.is_keyword or left.is_name)): # Don't merge two keywords/identifiers. return True if (subtypes.SUBSCRIPT_COLON in left.subtypes or subtypes.SUBSCRIPT_COLON in right.subtypes): # A subscript shouldn't have spaces separating its colons. return False if (subtypes.TYPED_NAME in left.subtypes or subtypes.TYPED_NAME in right.subtypes): # A typed argument should have a space after the colon. return True if left.is_string: if (rval == '=' and subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in right.subtypes): # If there is a type hint, then we don't want to add a space between the # equal sign and the hint. return False if rval not in '[)]}.' and not right.is_binary_op: # A string followed by something other than a subscript, closing bracket, # dot, or a binary op should have a space after it. return True if right.ClosesScope(): # A string followed by closing brackets should have a space after it # depending on SPACE_INSIDE_BRACKETS. A string followed by opening # brackets, however, should not. return style.Get('SPACE_INSIDE_BRACKETS') if subtypes.SUBSCRIPT_BRACKET in right.subtypes: # It's legal to do this in Python: 'hello'[a] return False if left.is_binary_op and lval != '**' and _IsUnaryOperator(right): # Space between the binary operator and the unary operator. return True if left.is_keyword and _IsUnaryOperator(right): # Handle things like "not -3 < x". return True if _IsUnaryOperator(left) and _IsUnaryOperator(right): # No space between two unary operators. return False if left.is_binary_op or right.is_binary_op: if lval == '**' or rval == '**': # Space around the "power" operator. return style.Get('SPACES_AROUND_POWER_OPERATOR') # Enforce spaces around binary operators except the blocked ones. block_list = style.Get('NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS') if lval in block_list or rval in block_list: return False if style.Get('ARITHMETIC_PRECEDENCE_INDICATION'): if _PriorityIndicatingNoSpace(left) or _PriorityIndicatingNoSpace(right): return False else: return True else: return True if (_IsUnaryOperator(left) and lval != 'not' and (right.is_name or right.is_number or rval == '(')): # The previous token was a unary op. No space is desired between it and # the current token. return False if (subtypes.DEFAULT_OR_NAMED_ASSIGN in left.subtypes and subtypes.TYPED_NAME not in right.subtypes): # A named argument or default parameter shouldn't have spaces around it. return style.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN') if (subtypes.DEFAULT_OR_NAMED_ASSIGN in right.subtypes and subtypes.TYPED_NAME not in left.subtypes): # A named argument or default parameter shouldn't have spaces around it. return style.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN') if (subtypes.VARARGS_LIST in left.subtypes or subtypes.VARARGS_LIST in right.subtypes): return False if (subtypes.VARARGS_STAR in left.subtypes or subtypes.KWARGS_STAR_STAR in left.subtypes): # Don't add a space after a vararg's star or a keyword's star-star. return False if lval == '@' and subtypes.DECORATOR in left.subtypes: # Decorators shouldn't be separated from the 'at' sign. return False if left.is_keyword and rval == '.': # Add space between keywords and dots. return lval not in {'None', 'print'} if lval == '.' and right.is_keyword: # Add space between keywords and dots. return rval not in {'None', 'print'} if lval == '.' or rval == '.': # Don't place spaces between dots. return False if ((lval == '(' and rval == ')') or (lval == '[' and rval == ']') or (lval == '{' and rval == '}')): # Empty objects shouldn't be separated by spaces. return False if not is_line_disabled and (left.OpensScope() or right.ClosesScope()): if (style.GetOrDefault('SPACES_AROUND_DICT_DELIMITERS', False) and ( (lval == '{' and _IsDictListTupleDelimiterTok(left, is_opening=True)) or (rval == '}' and _IsDictListTupleDelimiterTok(right, is_opening=False)))): return True if (style.GetOrDefault('SPACES_AROUND_LIST_DELIMITERS', False) and ( (lval == '[' and _IsDictListTupleDelimiterTok(left, is_opening=True)) or (rval == ']' and _IsDictListTupleDelimiterTok(right, is_opening=False)))): return True if (style.GetOrDefault('SPACES_AROUND_TUPLE_DELIMITERS', False) and ( (lval == '(' and _IsDictListTupleDelimiterTok(left, is_opening=True)) or (rval == ')' and _IsDictListTupleDelimiterTok(right, is_opening=False)))): return True if left.OpensScope() and right.OpensScope(): # Nested objects' opening brackets shouldn't be separated, unless enabled # by SPACE_INSIDE_BRACKETS. return style.Get('SPACE_INSIDE_BRACKETS') if left.ClosesScope() and right.ClosesScope(): # Nested objects' closing brackets shouldn't be separated, unless enabled # by SPACE_INSIDE_BRACKETS. return style.Get('SPACE_INSIDE_BRACKETS') if left.ClosesScope() and rval in '([': # A call, set, dictionary, or subscript that has a call or subscript after # it shouldn't have a space between them. return False if left.OpensScope() and _IsIdNumberStringToken(right): # Don't separate the opening bracket from the first item, unless enabled # by SPACE_INSIDE_BRACKETS. return style.Get('SPACE_INSIDE_BRACKETS') if left.is_name and rval in '([': # Don't separate a call or array access from the name. return False if right.ClosesScope(): # Don't separate the closing bracket from the last item, unless enabled # by SPACE_INSIDE_BRACKETS. # FIXME(morbo): This might be too permissive. return style.Get('SPACE_INSIDE_BRACKETS') if lval == 'print' and rval == '(': # Special support for the 'print' function. return False if left.OpensScope() and _IsUnaryOperator(right): # Don't separate a unary operator from the opening bracket, unless enabled # by SPACE_INSIDE_BRACKETS. return style.Get('SPACE_INSIDE_BRACKETS') if (left.OpensScope() and (subtypes.VARARGS_STAR in right.subtypes or subtypes.KWARGS_STAR_STAR in right.subtypes)): # Don't separate a '*' or '**' from the opening bracket, unless enabled # by SPACE_INSIDE_BRACKETS. return style.Get('SPACE_INSIDE_BRACKETS') if rval == ';': # Avoid spaces before a semicolon. (Why is there a semicolon?!) return False if lval == '(' and rval == 'await': # Special support for the 'await' keyword. Don't separate the 'await' # keyword from an opening paren, unless enabled by SPACE_INSIDE_BRACKETS. return style.Get('SPACE_INSIDE_BRACKETS') return True def _MustBreakBefore(prev_token, cur_token): """Return True if a line break is required before the current token.""" if prev_token.is_comment or (prev_token.previous_token and prev_token.is_pseudo and prev_token.previous_token.is_comment): # Must break if the previous token was a comment. return True if (cur_token.is_string and prev_token.is_string and IsSurroundedByBrackets(cur_token)): # We want consecutive strings to be on separate lines. This is a # reasonable assumption, because otherwise they should have written them # all on the same line, or with a '+'. return True return cur_token.must_break_before def _CanBreakBefore(prev_token, cur_token): """Return True if a line break may occur before the current token.""" pval = prev_token.value cval = cur_token.value if pval == 'yield' and cval == 'from': # Don't break before a yield argument. return False if pval in {'async', 'await'} and cval in {'def', 'with', 'for'}: # Don't break after sync keywords. return False if cur_token.split_penalty >= split_penalty.UNBREAKABLE: return False if pval == '@': # Don't break right after the beginning of a decorator. return False if cval == ':': # Don't break before the start of a block of code. return False if cval == ',': # Don't break before a comma. return False if prev_token.is_name and cval == '(': # Don't break in the middle of a function definition or call. return False if prev_token.is_name and cval == '[': # Don't break in the middle of an array dereference. return False if cur_token.is_comment and prev_token.lineno == cur_token.lineno: # Don't break a comment at the end of the line. return False if subtypes.UNARY_OPERATOR in prev_token.subtypes: # Don't break after a unary token. return False if not style.Get('ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS'): if (subtypes.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes or subtypes.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes): return False return True def IsSurroundedByBrackets(tok): """Return True if the token is surrounded by brackets.""" paren_count = 0 brace_count = 0 sq_bracket_count = 0 previous_token = tok.previous_token while previous_token: if previous_token.value == ')': paren_count -= 1 elif previous_token.value == '}': brace_count -= 1 elif previous_token.value == ']': sq_bracket_count -= 1 if previous_token.value == '(': if paren_count == 0: return previous_token paren_count += 1 elif previous_token.value == '{': if brace_count == 0: return previous_token brace_count += 1 elif previous_token.value == '[': if sq_bracket_count == 0: return previous_token sq_bracket_count += 1 previous_token = previous_token.previous_token return None def _IsDictListTupleDelimiterTok(tok, is_opening): assert tok if tok.matching_bracket is None: return False if is_opening: open_tok = tok close_tok = tok.matching_bracket else: open_tok = tok.matching_bracket close_tok = tok # There must be something in between the tokens if open_tok.next_token == close_tok: return False assert open_tok.next_token.node assert open_tok.next_token.node.parent return open_tok.next_token.node.parent.type in [ python_symbols.dictsetmaker, python_symbols.listmaker, python_symbols.testlist_gexp, ] _LOGICAL_OPERATORS = frozenset({'and', 'or'}) _BITWISE_OPERATORS = frozenset({'&', '|', '^'}) _ARITHMETIC_OPERATORS = frozenset({'+', '-', '*', '/', '%', '//', '@'}) def _SplitPenalty(prev_token, cur_token): """Return the penalty for breaking the line before the current token.""" pval = prev_token.value cval = cur_token.value if pval == 'not': return split_penalty.UNBREAKABLE if cur_token.node_split_penalty > 0: return cur_token.node_split_penalty if style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'): # Prefer to split before 'and' and 'or'. if pval in _LOGICAL_OPERATORS: return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR') if cval in _LOGICAL_OPERATORS: return 0 else: # Prefer to split after 'and' and 'or'. if pval in _LOGICAL_OPERATORS: return 0 if cval in _LOGICAL_OPERATORS: return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR') if style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'): # Prefer to split before '&', '|', and '^'. if pval in _BITWISE_OPERATORS: return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR') if cval in _BITWISE_OPERATORS: return 0 else: # Prefer to split after '&', '|', and '^'. if pval in _BITWISE_OPERATORS: return 0 if cval in _BITWISE_OPERATORS: return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR') if (subtypes.COMP_FOR in cur_token.subtypes or subtypes.COMP_IF in cur_token.subtypes): # We don't mind breaking before the 'for' or 'if' of a list comprehension. return 0 if subtypes.UNARY_OPERATOR in prev_token.subtypes: # Try not to break after a unary operator. return style.Get('SPLIT_PENALTY_AFTER_UNARY_OPERATOR') if pval == ',': # Breaking after a comma is fine, if need be. return 0 if pval == '**' or cval == '**': return split_penalty.STRONGLY_CONNECTED if (subtypes.VARARGS_STAR in prev_token.subtypes or subtypes.KWARGS_STAR_STAR in prev_token.subtypes): # Don't split after a varargs * or kwargs **. return split_penalty.UNBREAKABLE if prev_token.OpensScope() and cval != '(': # Slightly prefer return style.Get('SPLIT_PENALTY_AFTER_OPENING_BRACKET') if cval == ':': # Don't split before a colon. return split_penalty.UNBREAKABLE if cval == '=': # Don't split before an assignment. return split_penalty.UNBREAKABLE if (subtypes.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes or subtypes.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes): # Don't break before or after an default or named assignment. return split_penalty.UNBREAKABLE if cval == '==': # We would rather not split before an equality operator. return split_penalty.STRONGLY_CONNECTED if cur_token.ClosesScope(): # Give a slight penalty for splitting before the closing scope. return 100 return 0