client-py/venv/lib/python3.12/site-packages/yapf/pyparser/pyparser.py
2026-05-02 13:34:53 +05:00

163 lines
5.1 KiB
Python

# Copyright 2022 Bill Wendling, All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Simple Python Parser
Parse Python code into a list of logical lines, represented by LogicalLine
objects. This uses Python's tokenizer to generate the tokens. As such, YAPF must
be run with the appropriate Python version---Python >=3.7 for Python 3.7 code,
Python >=3.8 for Python 3.8 code, etc.
This parser uses Python's native "tokenizer" module to generate a list of tokens
for the source code. It then uses Python's native "ast" module to assign
subtypes, calculate split penalties, etc.
A "logical line" produced by Python's "tokenizer" module ends with a
tokenize.NEWLINE, rather than a tokenize.NL, making it easy to separate them
out. Comments all end with a tokentizer.NL, so we need to make sure we don't
errantly pick up non-comment tokens when parsing comment blocks.
ParseCode(): parse the code producing a list of logical lines.
"""
# TODO: Call from yapf_api.FormatCode.
import ast
import codecs
import os
import token
import tokenize
from io import StringIO
from tokenize import TokenInfo
from yapf.pyparser import split_penalty_visitor
from yapf.yapflib import format_token
from yapf.yapflib import logical_line
CONTINUATION = token.N_TOKENS
def ParseCode(unformatted_source, filename='<unknown>'):
"""Parse a string of Python code into logical lines.
This provides an alternative entry point to YAPF.
Arguments:
unformatted_source: (unicode) The code to format.
filename: (unicode) The name of the file being reformatted.
Returns:
A list of LogicalLines.
Raises:
An exception is raised if there's an error during AST parsing.
"""
if not unformatted_source.endswith(os.linesep):
unformatted_source += os.linesep
try:
ast_tree = ast.parse(unformatted_source, filename)
ast.fix_missing_locations(ast_tree)
readline = StringIO(unformatted_source).readline
tokens = tokenize.generate_tokens(readline)
except Exception:
raise
logical_lines = _CreateLogicalLines(tokens)
# Process the logical lines.
split_penalty_visitor.SplitPenalty(logical_lines).visit(ast_tree)
return logical_lines
def _CreateLogicalLines(tokens):
"""Separate tokens into logical lines.
Arguments:
tokens: (list of tokenizer.TokenInfo) Tokens generated by tokenizer.
Returns:
A list of LogicalLines.
"""
formatted_tokens = []
# Convert tokens into "TokenInfo" and add tokens for continuation markers.
prev_tok = None
for tok in tokens:
tok = TokenInfo(*tok)
if (prev_tok and prev_tok.line.rstrip().endswith('\\') and
prev_tok.start[0] < tok.start[0]):
ctok = TokenInfo(
type=CONTINUATION,
string='\\',
start=(prev_tok.start[0], prev_tok.start[1] + 1),
end=(prev_tok.end[0], prev_tok.end[0] + 2),
line=prev_tok.line)
ctok.lineno = ctok.start[0]
ctok.column = ctok.start[1]
ctok.value = '\\'
formatted_tokens.append(format_token.FormatToken(ctok, 'CONTINUATION'))
tok.lineno = tok.start[0]
tok.column = tok.start[1]
tok.value = tok.string
formatted_tokens.append(
format_token.FormatToken(tok, token.tok_name[tok.type]))
prev_tok = tok
# Generate logical lines.
logical_lines, cur_logical_line = [], []
depth = 0
for tok in formatted_tokens:
if tok.type == tokenize.ENDMARKER:
break
if tok.type == tokenize.NEWLINE:
# End of a logical line.
logical_lines.append(logical_line.LogicalLine(depth, cur_logical_line))
cur_logical_line = []
elif tok.type == tokenize.INDENT:
depth += 1
elif tok.type == tokenize.DEDENT:
depth -= 1
elif tok.type == tokenize.NL:
pass
else:
if (cur_logical_line and not tok.type == tokenize.COMMENT and
cur_logical_line[0].type == tokenize.COMMENT):
# We were parsing a comment block, but now we have real code to worry
# about. Store the comment and carry on.
logical_lines.append(logical_line.LogicalLine(depth, cur_logical_line))
cur_logical_line = []
cur_logical_line.append(tok)
# Link the FormatTokens in each line together to form a doubly linked list.
for line in logical_lines:
previous = line.first
bracket_stack = [previous] if previous.OpensScope() else []
for tok in line.tokens[1:]:
tok.previous_token = previous
previous.next_token = tok
previous = tok
# Set up the "matching_bracket" attribute.
if tok.OpensScope():
bracket_stack.append(tok)
elif tok.ClosesScope():
bracket_stack[-1].matching_bracket = tok
tok.matching_bracket = bracket_stack.pop()
return logical_lines