# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE # Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt """Check source code is ascii only or has an encoding declaration (PEP 263).""" from __future__ import annotations import re import tokenize from typing import TYPE_CHECKING from astroid import nodes from pylint.checkers import BaseRawFileChecker, BaseTokenChecker from pylint.typing import ManagedMessage if TYPE_CHECKING: from pylint.lint import PyLinter class ByIdManagedMessagesChecker(BaseRawFileChecker): """Checks for messages that are enabled or disabled by id instead of symbol.""" name = "miscellaneous" msgs = { "I0023": ( "%s", "use-symbolic-message-instead", "Used when a message is enabled or disabled by id.", {"default_enabled": False}, ) } options = () def _clear_by_id_managed_msgs(self) -> None: self.linter._by_id_managed_msgs.clear() def _get_by_id_managed_msgs(self) -> list[ManagedMessage]: return self.linter._by_id_managed_msgs def process_module(self, node: nodes.Module) -> None: """Inspect the source file to find messages activated or deactivated by id.""" managed_msgs = self._get_by_id_managed_msgs() for mod_name, msgid, symbol, lineno, is_disabled in managed_msgs: if mod_name == node.name: verb = "disable" if is_disabled else "enable" txt = f"'{msgid}' is cryptic: use '# pylint: {verb}={symbol}' instead" self.add_message("use-symbolic-message-instead", line=lineno, args=txt) self._clear_by_id_managed_msgs() class EncodingChecker(BaseTokenChecker, BaseRawFileChecker): """BaseChecker for encoding issues and fixme notes. Checks for: * warning notes in the code like FIXME, XXX * encoding issues. """ # configuration section name name = "miscellaneous" msgs = { "W0511": ( "%s", "fixme", "Used when a warning note as FIXME or XXX is detected.", ) } options = ( ( "notes", { "type": "csv", "metavar": "", "default": ("FIXME", "XXX", "TODO"), "help": ( "List of note tags to take in consideration, " "separated by a comma." ), }, ), ( "notes-rgx", { "type": "string", "metavar": "", "help": "Regular expression of note tags to take in consideration.", "default": "", }, ), ( "check-fixme-in-docstring", { "type": "yn", "metavar": "", "default": False, "help": "Whether or not to search for fixme's in docstrings.", }, ), ) def open(self) -> None: super().open() notes = "|".join(re.escape(note) for note in self.linter.config.notes) if self.linter.config.notes_rgx: notes += f"|{self.linter.config.notes_rgx}" comment_regex = rf"#\s*(?P({notes})(?=(:|\s|\Z)).*?$)" self._comment_fixme_pattern = re.compile(comment_regex, re.I) # single line docstring like '''this''' or """this""" docstring_regex = rf"((\"\"\")|(\'\'\'))\s*(?P({notes})(?=(:|\s|\Z)).*?)((\"\"\")|(\'\'\'))" self._docstring_fixme_pattern = re.compile(docstring_regex, re.I) # multiline docstrings which will be split into newlines # so we do not need to look for quotes/double-quotes multiline_docstring_regex = rf"^\s*(?P({notes})(?=(:|\s|\Z)).*$)" self._multiline_docstring_fixme_pattern = re.compile( multiline_docstring_regex, re.I ) def _check_encoding( self, lineno: int, line: bytes, file_encoding: str ) -> str | None: try: return line.decode(file_encoding) except UnicodeDecodeError: pass except LookupError: if ( line.startswith(b"#") and "coding" in str(line) and file_encoding in str(line) ): msg = f"Cannot decode using encoding '{file_encoding}', bad encoding" self.add_message("syntax-error", line=lineno, args=msg) return None def process_module(self, node: nodes.Module) -> None: """Inspect the source file to find encoding problem.""" encoding = node.file_encoding if node.file_encoding else "ascii" with node.stream() as stream: for lineno, line in enumerate(stream): self._check_encoding(lineno + 1, line, encoding) def process_tokens(self, tokens: list[tokenize.TokenInfo]) -> None: """Inspect the source to find fixme problems.""" if not self.linter.config.notes: return for token_info in tokens: if token_info.type == tokenize.COMMENT: if match := self._comment_fixme_pattern.match(token_info.string): self.add_message( "fixme", col_offset=token_info.start[1] + 1, args=match.group("msg"), line=token_info.start[0], ) elif self.linter.config.check_fixme_in_docstring: if self._is_multiline_docstring(token_info): docstring_lines = token_info.string.split("\n") for line_no, line in enumerate(docstring_lines): if match := self._multiline_docstring_fixme_pattern.match(line): self.add_message( "fixme", col_offset=token_info.start[1] + 1, args=match.group("msg"), line=token_info.start[0] + line_no, ) elif match := self._docstring_fixme_pattern.match(token_info.string): self.add_message( "fixme", col_offset=token_info.start[1] + 1, args=match.group("msg"), line=token_info.start[0], ) def _is_multiline_docstring(self, token_info: tokenize.TokenInfo) -> bool: return ( token_info.type == tokenize.STRING and (token_info.line.lstrip().startswith(('"""', "'''"))) and "\n" in token_info.line.rstrip() ) def register(linter: PyLinter) -> None: linter.register_checker(EncodingChecker(linter)) linter.register_checker(ByIdManagedMessagesChecker(linter))