Source code

Revision control

Copy as Markdown

Other Tools

import re
from typing import Any, Callable, List, Set, TypeVar, Union, cast
from . import ast
from .stream import EOL, FluentParserStream
from .errors import ParseError
R = TypeVar("R", bound=ast.SyntaxNode)
def with_span(fn: Callable[..., R]) -> Callable[..., R]:
def decorated(self: 'FluentParser', ps: FluentParserStream, *args: Any, **kwargs: Any) -> Any:
if not self.with_spans:
return fn(self, ps, *args, **kwargs)
start = ps.index
node = fn(self, ps, *args, **kwargs)
# Don't re-add the span if the node already has it. This may happen
# when one decorated function calls another decorated function.
if node.span is not None:
return node
end = ps.index
node.add_span(start, end)
return node
return decorated
class FluentParser:
"""This class is used to parse Fluent source content.
``with_spans`` enables source information in the form of
:class:`.ast.Span` objects for each :class:`.ast.SyntaxNode`.
"""
def __init__(self, with_spans: bool = True):
self.with_spans = with_spans
def parse(self, source: str) -> ast.Resource:
"""Create a :class:`.ast.Resource` from a Fluent source.
"""
ps = FluentParserStream(source)
ps.skip_blank_block()
entries: List[ast.EntryType] = []
last_comment = None
while ps.current_char:
entry = self.get_entry_or_junk(ps)
blank_lines = ps.skip_blank_block()
# Regular Comments require special logic. Comments may be attached
# to Messages or Terms if they are followed immediately by them.
# However they should parse as standalone when they're followed by
# Junk. Consequently, we only attach Comments once we know that the
# Message or the Term parsed successfully.
if isinstance(entry, ast.Comment) and len(blank_lines) == 0 \
and ps.current_char:
# Stash the comment and decide what to do with it
# in the next pass.
last_comment = entry
continue
if last_comment is not None:
if isinstance(entry, (ast.Message, ast.Term)):
entry.comment = last_comment
if self.with_spans:
cast(ast.Span, entry.span).start = cast(ast.Span, entry.comment.span).start
else:
entries.append(last_comment)
# In either case, the stashed comment has been dealt with;
# clear it.
last_comment = None
entries.append(entry)
res = ast.Resource(entries)
if self.with_spans:
res.add_span(0, ps.index)
return res
def parse_entry(self, source: str) -> ast.EntryType:
"""Parse the first :class:`.ast.Entry` in source.
Skip all encountered comments and start parsing at the first :class:`.ast.Message`
or :class:`.ast.Term` start. Return :class:`.ast.Junk` if the parsing is not successful.
Preceding comments are ignored unless they contain syntax errors
themselves, in which case :class:`.ast.Junk` for the invalid comment is returned.
"""
ps = FluentParserStream(source)
ps.skip_blank_block()
while ps.current_char == '#':
skipped = self.get_entry_or_junk(ps)
if isinstance(skipped, ast.Junk):
# Don't skip Junk comments.
return skipped
ps.skip_blank_block()
return self.get_entry_or_junk(ps)
def get_entry_or_junk(self, ps: FluentParserStream) -> ast.EntryType:
entry_start_pos = ps.index
try:
entry = self.get_entry(ps)
ps.expect_line_end()
return entry
except ParseError as err:
error_index = ps.index
ps.skip_to_next_entry_start(entry_start_pos)
next_entry_start = ps.index
if next_entry_start < error_index:
# The position of the error must be inside of the Junk's span.
error_index = next_entry_start
# Create a Junk instance
slice = ps.string[entry_start_pos:next_entry_start]
junk = ast.Junk(slice)
if self.with_spans:
junk.add_span(entry_start_pos, next_entry_start)
annot = ast.Annotation(err.code, list(err.args) if err.args else None, err.message)
annot.add_span(error_index, error_index)
junk.add_annotation(annot)
return junk
def get_entry(self, ps: FluentParserStream) -> ast.EntryType:
if ps.current_char == '#':
return self.get_comment(ps)
if ps.current_char == '-':
return self.get_term(ps)
if ps.is_identifier_start():
return self.get_message(ps)
raise ParseError('E0002')
@with_span
def get_comment(self, ps: FluentParserStream) -> Union[ast.Comment, ast.GroupComment, ast.ResourceComment]:
# 0 - comment
# 1 - group comment
# 2 - resource comment
level = -1
content = ''
while True:
i = -1
while ps.current_char == '#' \
and (i < (2 if level == -1 else level)):
ps.next()
i += 1
if level == -1:
level = i
if ps.current_char != EOL:
ps.expect_char(' ')
ch = ps.take_char(lambda x: x != EOL)
while ch:
content += ch
ch = ps.take_char(lambda x: x != EOL)
if ps.is_next_line_comment(level=level):
content += cast(str, ps.current_char)
ps.next()
else:
break
if level == 0:
return ast.Comment(content)
elif level == 1:
return ast.GroupComment(content)
elif level == 2:
return ast.ResourceComment(content)
# never happens if ps.current_char == '#' when called
return cast(ast.Comment, None)
@with_span
def get_message(self, ps: FluentParserStream) -> ast.Message:
id = self.get_identifier(ps)
ps.skip_blank_inline()
ps.expect_char('=')
value = self.maybe_get_pattern(ps)
attrs = self.get_attributes(ps)
if value is None and len(attrs) == 0:
raise ParseError('E0005', id.name)
return ast.Message(id, value, attrs)
@with_span
def get_term(self, ps: FluentParserStream) -> ast.Term:
ps.expect_char('-')
id = self.get_identifier(ps)
ps.skip_blank_inline()
ps.expect_char('=')
value = self.maybe_get_pattern(ps)
if value is None:
raise ParseError('E0006', id.name)
attrs = self.get_attributes(ps)
return ast.Term(id, value, attrs)
@with_span
def get_attribute(self, ps: FluentParserStream) -> ast.Attribute:
ps.expect_char('.')
key = self.get_identifier(ps)
ps.skip_blank_inline()
ps.expect_char('=')
value = self.maybe_get_pattern(ps)
if value is None:
raise ParseError('E0012')
return ast.Attribute(key, value)
def get_attributes(self, ps: FluentParserStream) -> List[ast.Attribute]:
attrs: List[ast.Attribute] = []
ps.peek_blank()
while ps.is_attribute_start():
ps.skip_to_peek()
attr = self.get_attribute(ps)
attrs.append(attr)
ps.peek_blank()
return attrs
@with_span
def get_identifier(self, ps: FluentParserStream) -> ast.Identifier:
name = ps.take_id_start()
if name is None:
raise ParseError('E0004', 'a-zA-Z')
ch = ps.take_id_char()
while ch:
name += ch
ch = ps.take_id_char()
return ast.Identifier(name)
def get_variant_key(self, ps: FluentParserStream) -> Union[ast.Identifier, ast.NumberLiteral]:
ch = ps.current_char
if ch is None:
raise ParseError('E0013')
cc = ord(ch)
if ((cc >= 48 and cc <= 57) or cc == 45): # 0-9, -
return self.get_number(ps)
return self.get_identifier(ps)
@with_span
def get_variant(self, ps: FluentParserStream, has_default: bool) -> ast.Variant:
default_index = False
if ps.current_char == '*':
if has_default:
raise ParseError('E0015')
ps.next()
default_index = True
ps.expect_char('[')
ps.skip_blank()
key = self.get_variant_key(ps)
ps.skip_blank()
ps.expect_char(']')
value = self.maybe_get_pattern(ps)
if value is None:
raise ParseError('E0012')
return ast.Variant(key, value, default_index)
def get_variants(self, ps: FluentParserStream) -> List[ast.Variant]:
variants: List[ast.Variant] = []
has_default = False
ps.skip_blank()
while ps.is_variant_start():
variant = self.get_variant(ps, has_default)
if variant.default:
has_default = True
variants.append(variant)
ps.expect_line_end()
ps.skip_blank()
if len(variants) == 0:
raise ParseError('E0011')
if not has_default:
raise ParseError('E0010')
return variants
def get_digits(self, ps: FluentParserStream) -> str:
num = ''
ch = ps.take_digit()
while ch:
num += ch
ch = ps.take_digit()
if len(num) == 0:
raise ParseError('E0004', '0-9')
return num
@with_span
def get_number(self, ps: FluentParserStream) -> ast.NumberLiteral:
num = ''
if ps.current_char == '-':
num += '-'
ps.next()
num += self.get_digits(ps)
if ps.current_char == '.':
num += '.'
ps.next()
num += self.get_digits(ps)
return ast.NumberLiteral(num)
def maybe_get_pattern(self, ps: FluentParserStream) -> Union[ast.Pattern, None]:
'''Parse an inline or a block Pattern, or None
maybe_get_pattern distinguishes between patterns which start on the
same line as the indentifier (aka inline singleline patterns and inline
multiline patterns), and patterns which start on a new line (aka block
patterns). The distinction is important for the dedentation logic: the
indent of the first line of a block pattern must be taken into account
when calculating the maximum common indent.
'''
ps.peek_blank_inline()
if ps.is_value_start():
ps.skip_to_peek()
return self.get_pattern(ps, is_block=False)
ps.peek_blank_block()
if ps.is_value_continuation():
ps.skip_to_peek()
return self.get_pattern(ps, is_block=True)
return None
@with_span
def get_pattern(self, ps: FluentParserStream, is_block: bool) -> ast.Pattern:
elements: List[Any] = []
if is_block:
# A block pattern is a pattern which starts on a new line. Measure
# the indent of this first line for the dedentation logic.
blank_start = ps.index
first_indent = ps.skip_blank_inline()
elements.append(self.Indent(first_indent, blank_start, ps.index))
common_indent_length = len(first_indent)
else:
# Should get fixed by the subsequent min() operation
common_indent_length = cast(int, float('infinity'))
while ps.current_char:
if ps.current_char == EOL:
blank_start = ps.index
blank_lines = ps.peek_blank_block()
if ps.is_value_continuation():
ps.skip_to_peek()
indent = ps.skip_blank_inline()
common_indent_length = min(common_indent_length, len(indent))
elements.append(self.Indent(blank_lines + indent, blank_start, ps.index))
continue
# The end condition for get_pattern's while loop is a newline
# which is not followed by a valid pattern continuation.
ps.reset_peek()
break
if ps.current_char == '}':
raise ParseError('E0027')
element: Union[ast.TextElement, ast.Placeable]
if ps.current_char == '{':
element = self.get_placeable(ps)
else:
element = self.get_text_element(ps)
elements.append(element)
dedented = self.dedent(elements, common_indent_length)
return ast.Pattern(dedented)
class Indent(ast.SyntaxNode):
def __init__(self, value: str, start: int, end: int):
super(FluentParser.Indent, self).__init__()
self.value = value
self.add_span(start, end)
def dedent(self,
elements: List[Union[ast.TextElement, ast.Placeable, Indent]],
common_indent: int
) -> List[Union[ast.TextElement, ast.Placeable]]:
'''Dedent a list of elements by removing the maximum common indent from
the beginning of text lines. The common indent is calculated in
get_pattern.
'''
trimmed: List[Union[ast.TextElement, ast.Placeable]] = []
for element in elements:
if isinstance(element, ast.Placeable):
trimmed.append(element)
continue
if isinstance(element, self.Indent):
# Strip the common indent.
element.value = element.value[:len(element.value) - common_indent]
if len(element.value) == 0:
continue
prev = trimmed[-1] if len(trimmed) > 0 else None
if isinstance(prev, ast.TextElement):
# Join adjacent TextElements by replacing them with their sum.
sum = ast.TextElement(prev.value + element.value)
if self.with_spans:
sum.add_span(cast(ast.Span, prev.span).start, cast(ast.Span, element.span).end)
trimmed[-1] = sum
continue
if isinstance(element, self.Indent):
# If the indent hasn't been merged into a preceding
# TextElements, convert it into a new TextElement.
text_element = ast.TextElement(element.value)
if self.with_spans:
text_element.add_span(cast(ast.Span, element.span).start, cast(ast.Span, element.span).end)
element = text_element
trimmed.append(element)
# Trim trailing whitespace from the Pattern.
last_element = trimmed[-1] if len(trimmed) > 0 else None
if isinstance(last_element, ast.TextElement):
last_element.value = last_element.value.rstrip(' \n\r')
if last_element.value == "":
trimmed.pop()
return trimmed
@with_span
def get_text_element(self, ps: FluentParserStream) -> ast.TextElement:
buf = ''
while ps.current_char:
ch = ps.current_char
if ch == '{' or ch == '}':
return ast.TextElement(buf)
if ch == EOL:
return ast.TextElement(buf)
buf += ch
ps.next()
return ast.TextElement(buf)
def get_escape_sequence(self, ps: FluentParserStream) -> str:
next = ps.current_char
if next == '\\' or next == '"':
ps.next()
return f'\\{next}'
if next == 'u':
return self.get_unicode_escape_sequence(ps, next, 4)
if next == 'U':
return self.get_unicode_escape_sequence(ps, next, 6)
raise ParseError('E0025', next)
def get_unicode_escape_sequence(self, ps: FluentParserStream, u: str, digits: int) -> str:
ps.expect_char(u)
sequence = ''
for _ in range(digits):
ch = ps.take_hex_digit()
if not ch:
raise ParseError('E0026', f'\\{u}{sequence}{ps.current_char}')
sequence += ch
return f'\\{u}{sequence}'
@with_span
def get_placeable(self, ps: FluentParserStream) -> ast.Placeable:
ps.expect_char('{')
ps.skip_blank()
expression = self.get_expression(ps)
ps.expect_char('}')
return ast.Placeable(expression)
@with_span
def get_expression(self, ps: FluentParserStream) -> Union[ast.InlineExpression,
ast.Placeable,
ast.SelectExpression]:
selector = self.get_inline_expression(ps)
ps.skip_blank()
if ps.current_char == '-':
if ps.peek() != '>':
ps.reset_peek()
return selector
if isinstance(selector, ast.MessageReference):
if selector.attribute is None:
raise ParseError('E0016')
else:
raise ParseError('E0018')
elif (
isinstance(selector, ast.TermReference)
):
if selector.attribute is None:
raise ParseError('E0017')
elif not (
isinstance(selector, (
ast.StringLiteral,
ast.NumberLiteral,
ast.VariableReference,
ast.FunctionReference,
))
):
raise ParseError('E0029')
ps.next()
ps.next()
ps.skip_blank_inline()
ps.expect_line_end()
variants = self.get_variants(ps)
return ast.SelectExpression(selector, variants)
if (
isinstance(selector, ast.TermReference)
and selector.attribute is not None
):
raise ParseError('E0019')
return selector
@with_span
def get_inline_expression(self, ps: FluentParserStream) -> Union[ast.InlineExpression, ast.Placeable]:
if ps.current_char == '{':
return self.get_placeable(ps)
if ps.is_number_start():
return self.get_number(ps)
if ps.current_char == '"':
return self.get_string(ps)
if ps.current_char == '$':
ps.next()
id = self.get_identifier(ps)
return ast.VariableReference(id)
if ps.current_char == '-':
ps.next()
id = self.get_identifier(ps)
attribute = None
if ps.current_char == '.':
ps.next()
attribute = self.get_identifier(ps)
arguments = None
ps.peek_blank()
if ps.current_peek == '(':
ps.skip_to_peek()
arguments = self.get_call_arguments(ps)
return ast.TermReference(id, attribute, arguments)
if ps.is_identifier_start():
id = self.get_identifier(ps)
ps.peek_blank()
if ps.current_peek == '(':
# It's a Function. Ensure it's all upper-case.
if not re.match('^[A-Z][A-Z0-9_-]*$', id.name):
raise ParseError('E0008')
ps.skip_to_peek()
args = self.get_call_arguments(ps)
return ast.FunctionReference(id, args)
attribute = None
if ps.current_char == '.':
ps.next()
attribute = self.get_identifier(ps)
return ast.MessageReference(id, attribute)
raise ParseError('E0028')
@with_span
def get_call_argument(self,
ps: FluentParserStream
) -> Union[ast.InlineExpression, ast.NamedArgument, ast.Placeable]:
exp = self.get_inline_expression(ps)
ps.skip_blank()
if ps.current_char != ':':
return exp
if isinstance(exp, ast.MessageReference) and exp.attribute is None:
ps.next()
ps.skip_blank()
value = self.get_literal(ps)
return ast.NamedArgument(exp.id, value)
raise ParseError('E0009')
@with_span
def get_call_arguments(self, ps: FluentParserStream) -> ast.CallArguments:
positional: List[Union[ast.InlineExpression, ast.Placeable]] = []
named: List[ast.NamedArgument] = []
argument_names: Set[str] = set()
ps.expect_char('(')
ps.skip_blank()
while True:
if ps.current_char == ')':
break
arg = self.get_call_argument(ps)
if isinstance(arg, ast.NamedArgument):
if arg.name.name in argument_names:
raise ParseError('E0022')
named.append(arg)
argument_names.add(arg.name.name)
elif len(argument_names) > 0:
raise ParseError('E0021')
else:
positional.append(arg)
ps.skip_blank()
if ps.current_char == ',':
ps.next()
ps.skip_blank()
continue
break
ps.expect_char(')')
return ast.CallArguments(positional, named)
@with_span
def get_string(self, ps: FluentParserStream) -> ast.StringLiteral:
value = ''
ps.expect_char('"')
while True:
ch = ps.take_char(lambda x: x != '"' and x != EOL)
if not ch:
break
if ch == '\\':
value += self.get_escape_sequence(ps)
else:
value += ch
if ps.current_char == EOL:
raise ParseError('E0020')
ps.expect_char('"')
return ast.StringLiteral(value)
@with_span
def get_literal(self, ps: FluentParserStream) -> Union[ast.NumberLiteral, ast.StringLiteral]:
if ps.is_number_start():
return self.get_number(ps)
if ps.current_char == '"':
return self.get_string(ps)
raise ParseError('E0014')