Source code

Revision control

Copy as Markdown

Other Tools

from typing import Callable, Union
from typing_extensions import Literal
from .errors import ParseError
class ParserStream:
def __init__(self, string: str):
self.string = string
self.index = 0
self.peek_offset = 0
def get(self, offset: int) -> Union[str, None]:
try:
return self.string[offset]
except IndexError:
return None
def char_at(self, offset: int) -> Union[str, None]:
# When the cursor is at CRLF, return LF but don't move the cursor. The
# cursor still points to the EOL position, which in this case is the
# beginning of the compound CRLF sequence. This ensures slices of
# [inclusive, exclusive) continue to work properly.
if self.get(offset) == '\r' \
and self.get(offset + 1) == '\n':
return '\n'
return self.get(offset)
@property
def current_char(self) -> Union[str, None]:
return self.char_at(self.index)
@property
def current_peek(self) -> Union[str, None]:
return self.char_at(self.index + self.peek_offset)
def next(self) -> Union[str, None]:
self.peek_offset = 0
# Skip over CRLF as if it was a single character.
if self.get(self.index) == '\r' \
and self.get(self.index + 1) == '\n':
self.index += 1
self.index += 1
return self.get(self.index)
def peek(self) -> Union[str, None]:
# Skip over CRLF as if it was a single character.
if self.get(self.index + self.peek_offset) == '\r' \
and self.get(self.index + self.peek_offset + 1) == '\n':
self.peek_offset += 1
self.peek_offset += 1
return self.get(self.index + self.peek_offset)
def reset_peek(self, offset: int = 0) -> None:
self.peek_offset = offset
def skip_to_peek(self) -> None:
self.index += self.peek_offset
self.peek_offset = 0
EOL = '\n'
EOF = None
SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*')
class FluentParserStream(ParserStream):
def peek_blank_inline(self) -> str:
start = self.index + self.peek_offset
while self.current_peek == ' ':
self.peek()
return self.string[start:self.index + self.peek_offset]
def skip_blank_inline(self) -> str:
blank = self.peek_blank_inline()
self.skip_to_peek()
return blank
def peek_blank_block(self) -> str:
blank = ""
while True:
line_start = self.peek_offset
self.peek_blank_inline()
if self.current_peek == EOL:
blank += EOL
self.peek()
continue
if self.current_peek is EOF:
# Treat the blank line at EOF as a blank block.
return blank
# Any other char; reset to column 1 on this line.
self.reset_peek(line_start)
return blank
def skip_blank_block(self) -> str:
blank = self.peek_blank_block()
self.skip_to_peek()
return blank
def peek_blank(self) -> None:
while self.current_peek in (" ", EOL):
self.peek()
def skip_blank(self) -> None:
self.peek_blank()
self.skip_to_peek()
def expect_char(self, ch: str) -> Literal[True]:
if self.current_char == ch:
self.next()
return True
raise ParseError('E0003', ch)
def expect_line_end(self) -> Literal[True]:
if self.current_char is EOF:
# EOF is a valid line end in Fluent.
return True
if self.current_char == EOL:
self.next()
return True
# Unicode Character 'SYMBOL FOR NEWLINE' (U+2424)
raise ParseError('E0003', '\u2424')
def take_char(self, f: Callable[[str], bool]) -> Union[str, Literal[False], None]:
ch = self.current_char
if ch is None:
return EOF
if f(ch):
self.next()
return ch
return False
def is_char_id_start(self, ch: Union[str, None]) -> bool:
if ch is None:
return False
cc = ord(ch)
return (cc >= 97 and cc <= 122) or \
(cc >= 65 and cc <= 90)
def is_identifier_start(self) -> bool:
return self.is_char_id_start(self.current_peek)
def is_number_start(self) -> bool:
ch = self.peek() if self.current_char == '-' else self.current_char
if ch is None:
self.reset_peek()
return False
cc = ord(ch)
is_digit = cc >= 48 and cc <= 57
self.reset_peek()
return is_digit
def is_char_pattern_continuation(self, ch: Union[str, None]) -> bool:
if ch is EOF:
return False
return ch not in SPECIAL_LINE_START_CHARS
def is_value_start(self) -> bool:
# Inline Patterns may start with any char.
return self.current_peek is not EOF and self.current_peek != EOL
def is_value_continuation(self) -> bool:
column1 = self.peek_offset
self.peek_blank_inline()
if self.current_peek == '{':
self.reset_peek(column1)
return True
if self.peek_offset - column1 == 0:
return False
if self.is_char_pattern_continuation(self.current_peek):
self.reset_peek(column1)
return True
return False
# -1 - any
# 0 - comment
# 1 - group comment
# 2 - resource comment
def is_next_line_comment(self, level: int = -1) -> bool:
if self.current_peek != EOL:
return False
i = 0
while (i <= level or (level == -1 and i < 3)):
if self.peek() != '#':
if i <= level and level != -1:
self.reset_peek()
return False
break
i += 1
# The first char after #, ## or ###.
if self.peek() in (' ', EOL):
self.reset_peek()
return True
self.reset_peek()
return False
def is_variant_start(self) -> bool:
current_peek_offset = self.peek_offset
if self.current_peek == '*':
self.peek()
if self.current_peek == '[' and self.peek() != '[':
self.reset_peek(current_peek_offset)
return True
self.reset_peek(current_peek_offset)
return False
def is_attribute_start(self) -> bool:
return self.current_peek == '.'
def skip_to_next_entry_start(self, junk_start: int) -> None:
last_newline = self.string.rfind(EOL, 0, self.index)
if junk_start < last_newline:
# Last seen newline is _after_ the junk start. It's safe to rewind
# without the risk of resuming at the same broken entry.
self.index = last_newline
while self.current_char:
# We're only interested in beginnings of line.
if self.current_char != EOL:
self.next()
continue
# Break if the first char in this line looks like an entry start.
first = self.next()
if self.is_char_id_start(first) or first == '-' or first == '#':
break
# Syntax 0.4 compatibility
peek = self.peek()
self.reset_peek()
if (first, peek) == ('/', '/') or (first, peek) == ('[', '['):
break
def take_id_start(self) -> Union[str, None]:
if self.is_char_id_start(self.current_char):
ret = self.current_char
self.next()
return ret
raise ParseError('E0004', 'a-zA-Z')
def take_id_char(self) -> Union[str, Literal[False], None]:
def closure(ch: str) -> bool:
cc = ord(ch)
return ((cc >= 97 and cc <= 122) or
(cc >= 65 and cc <= 90) or
(cc >= 48 and cc <= 57) or
cc == 95 or cc == 45)
return self.take_char(closure)
def take_digit(self) -> Union[str, Literal[False], None]:
def closure(ch: str) -> bool:
cc = ord(ch)
return (cc >= 48 and cc <= 57)
return self.take_char(closure)
def take_hex_digit(self) -> Union[str, Literal[False], None]:
def closure(ch: str) -> bool:
cc = ord(ch)
return (
(cc >= 48 and cc <= 57) # 0-9
or (cc >= 65 and cc <= 70) # A-F
or (cc >= 97 and cc <= 102)) # a-f
return self.take_char(closure)