# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from enum import auto, Enum
from typing import Any, Callable, Iterable, Optional, Sequence, Tuple, Union
from typing_extensions import final
from libcst._parser.parso.pgen2.generator import ReservedString
from libcst._parser.parso.python.token import PythonTokenTypes, TokenType
from libcst._parser.types.token import Token
from libcst._tabs import expand_tabs
_EOF_STR: str = "end of file (EOF)"
_INDENT_STR: str = "an indent"
_DEDENT_STR: str = "a dedent"
_NEWLINE_CHARS: str = "\r\n"
class EOFSentinel(Enum):
EOF = auto()
def get_expected_str(
encountered: Union[Token, EOFSentinel],
expected: Union[Iterable[Union[TokenType, ReservedString]], EOFSentinel],
) -> str:
if (
isinstance(encountered, EOFSentinel)
or encountered.type is PythonTokenTypes.ENDMARKER
):
encountered_str = _EOF_STR
elif encountered.type is PythonTokenTypes.INDENT:
encountered_str = _INDENT_STR
elif encountered.type is PythonTokenTypes.DEDENT:
encountered_str = _DEDENT_STR
else:
encountered_str = repr(encountered.string)
if isinstance(expected, EOFSentinel):
expected_names = [_EOF_STR]
else:
expected_names = sorted(
[
repr(el.name) if isinstance(el, TokenType) else repr(el.value)
for el in expected
]
)
if len(expected_names) > 10:
# There's too many possibilities, so it's probably not useful to list them.
# Instead, let's just abbreviate the message.
return f"Unexpectedly encountered {encountered_str}."
else:
if len(expected_names) == 1:
expected_str = expected_names[0]
else:
expected_str = f"{', '.join(expected_names[:-1])}, or {expected_names[-1]}"
return f"Encountered {encountered_str}, but expected {expected_str}."
# pyre-fixme[2]: 'Any' type isn't pyre-strict.
def _parser_syntax_error_unpickle(kwargs: Any) -> "ParserSyntaxError":
return ParserSyntaxError(**kwargs)
@final
class PartialParserSyntaxError(Exception):
"""
An internal exception that represents a partially-constructed
:class:`ParserSyntaxError`. It's raised by our internal parser conversion functions,
which don't always know the current line and column information.
This partial object only contains a message, with the expectation that the line and
column information will be filled in by :class:`libcst._base_parser.BaseParser`.
This should never be visible to the end-user.
"""
message: str
def __init__(self, message: str) -> None:
self.message = message
[docs]@final
class ParserSyntaxError(Exception):
"""
Contains an error encountered while trying to parse a piece of source code. This
exception shouldn't be constructed directly by the user, but instead may be raised
by calls to :func:`parse_module`, :func:`parse_expression`, or
:func:`parse_statement`.
This does not inherit from :class:`SyntaxError` because Python's may raise a
:class:`SyntaxError` for any number of reasons, potentially leading to unintended
behavior.
"""
#: A human-readable explanation of the syntax error without information about where
#: the error occurred.
#:
#: For a human-readable explanation of the error alongside information about where
#: it occurred, use :meth:`__str__` (via ``str(ex)``) instead.
message: str
# An internal value used to compute `editor_column` and to pretty-print where the
# syntax error occurred in the code.
_lines: Sequence[str]
#: The one-indexed line where the error occured.
raw_line: int
#: The zero-indexed column as a number of characters from the start of the line
#: where the error occured.
raw_column: int
def __init__(
self, message: str, *, lines: Sequence[str], raw_line: int, raw_column: int
) -> None:
super(ParserSyntaxError, self).__init__(message)
self.message = message
self._lines = lines
self.raw_line = raw_line
self.raw_column = raw_column
def __reduce__(
self,
) -> Tuple[Callable[..., "ParserSyntaxError"], Tuple[object, ...]]:
return (
_parser_syntax_error_unpickle,
(
{
"message": self.message,
"lines": self._lines,
"raw_line": self.raw_line,
"raw_column": self.raw_column,
},
),
)
[docs] def __str__(self) -> str:
"""
A multi-line human-readable error message of where the syntax error is in their
code. For example::
Syntax Error @ 2:1.
Incomplete input. Encountered end of file (EOF), but expected 'except', or 'finally'.
try: pass
^
"""
context = self.context
return (
f"Syntax Error @ {self.editor_line}:{self.editor_column}.\n"
+ f"{self.message}"
+ (f"\n\n{context}" if context is not None else "")
)
def __repr__(self) -> str:
return (
"ParserSyntaxError("
+ f"{self.message!r}, lines=[...], raw_line={self.raw_line!r}, "
+ f"raw_column={self.raw_column!r})"
)
@property
def context(self) -> Optional[str]:
"""
A formatted string containing the line of code with the syntax error (or a
non-empty line above it) along with a caret indicating the exact column where
the error occurred.
Return ``None`` if there's no relevant non-empty line to show. (e.g. the file
consists of only blank lines)
"""
displayed_line = self.editor_line
displayed_column = self.editor_column
# we want to avoid displaying a blank line for context. If we're on a blank line
# find the nearest line above us that isn't blank.
while displayed_line >= 1 and not len(self._lines[displayed_line - 1].strip()):
displayed_line -= 1
displayed_column = len(self._lines[displayed_line - 1])
# only show context if we managed to find a non-empty line
if len(self._lines[displayed_line - 1].strip()):
formatted_source_line = expand_tabs(self._lines[displayed_line - 1]).rstrip(
_NEWLINE_CHARS
)
# fmt: off
return (
f"{formatted_source_line}\n"
+ f"{' ' * (displayed_column - 1)}^"
)
# fmt: on
else:
return None
@property
def editor_line(self) -> int:
"""
The expected one-indexed line in the user's editor. This is the same as
:attr:`raw_line`.
"""
return self.raw_line # raw_line is already one-indexed.
@property
def editor_column(self) -> int:
"""
The expected one-indexed column that's likely to match the behavior of the
user's editor, assuming tabs expand to 1-8 spaces. This is the column number
shown when the syntax error is printed out with `str`.
This assumes single-width characters. However, because python doesn't ship with
a wcwidth function, it's hard to handle this properly without a third-party
dependency.
For a raw zero-indexed character offset without tab expansion, see
:attr:`raw_column`.
"""
prefix_str = self._lines[self.raw_line - 1][: self.raw_column]
tab_adjusted_column = len(expand_tabs(prefix_str))
# Text editors use a one-indexed column, so we need to add one to our
# zero-indexed column to get a human-readable result.
return tab_adjusted_column + 1
class MetadataException(Exception):
pass