diff options
author | Adam Stankiewicz <sheerun@sher.pl> | 2021-06-09 09:50:19 +0200 |
---|---|---|
committer | Adam Stankiewicz <sheerun@sher.pl> | 2021-06-09 09:50:19 +0200 |
commit | ff8c1d76741f148d5f6efb9a57119dcf11afaec6 (patch) | |
tree | 2717f611f45d2a82d83ae1e320af4f3aa33b4264 /autoload/smt2/parser.vim | |
parent | 27756b129b7a1f9763a0062aafa8fd35b0181b19 (diff) | |
download | vim-polyglot-ff8c1d76741f148d5f6efb9a57119dcf11afaec6.tar.gz vim-polyglot-ff8c1d76741f148d5f6efb9a57119dcf11afaec6.zip |
Update
Diffstat (limited to 'autoload/smt2/parser.vim')
-rw-r--r-- | autoload/smt2/parser.vim | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/autoload/smt2/parser.vim b/autoload/smt2/parser.vim new file mode 100644 index 00000000..ef4068e5 --- /dev/null +++ b/autoload/smt2/parser.vim @@ -0,0 +1,227 @@ +if polyglot#init#is_disabled(expand('<sfile>:p'), 'smt2', 'autoload/smt2/parser.vim') + finish +endif + +vim9script +const debug = false +set maxfuncdepth=100000000 # SMT files tend to be highly nested + +# TODO: Retry iterative parsing now that we have a scanner and simpler grammar +# TODO: Refer to token kind by name, e.g. token_comment instead of 8 +# TODO: Change Ast.kind type from string to enum/number? + +# ------------------------------------------------------------------------------ +# AST nodes -- essentially named token wrappers +# +# Note: pos_from, pos_to and contains_comment were only introduced to allow for +# a fast FitsOneLine(ast) function in the formatter. +# Here, pos_from and pos_to refer to indices of characters -- not tokens +# ------------------------------------------------------------------------------ +def Ast(kind: string, value: any, pos_from: number, pos_to: number, contains_comment: bool): dict<any> + return {kind: kind, value: value, pos_from: pos_from, pos_to: pos_to, contains_comment: contains_comment} +enddef + +def ParagraphAst(exprs: list<dict<any>>, pos_from: number, pos_to: number): dict<any> + var contains_comment = false + for expr in exprs + if expr.contains_comment + contains_comment = true + break + endif + endfor + return Ast('Paragraph', exprs, pos_from, pos_to, contains_comment) +enddef + +def SExprAst(exprs: list<dict<any>>, pos_from: number, pos_to: number): dict<any> + var contains_comment = false + for expr in exprs + if expr.contains_comment + contains_comment = true + break + endif + endfor + return Ast('SExpr', exprs, pos_from, pos_to, contains_comment) +enddef + +def AtomAst(token: dict<any>): dict<any> + return Ast('Atom', token, token.pos, token.pos + len(token.lexeme), token.kind == 8) +enddef + +def PrintAst(ast: dict<any>, indent = 0) + echo repeat(' ', indent * 2) .. '[' .. ast.kind .. '] ' + + if ast.kind ==# 'Atom' + echon ast.value.lexeme + elseif ast.kind ==# 'SExpr' + for v in ast.value + call PrintAst(v, indent + 1) + endfor + elseif ast.kind ==# 'Paragraph' + for v in ast.value + call PrintAst(v, indent + 1) + endfor + endif +enddef + +# ------------------------------------------------------------------------------ +# Grammar +# ------------------------------------------------------------------------------ +# Paragraph ::= Expr+ +# Expr ::= SExpr | Atom +# SExpr ::= '(' Expr* ')' + +# ------------------------------------------------------------------------------ +# LParen +# ------------------------------------------------------------------------------ +def AtStartOfLParen(scanner: dict<any>): bool + return scanner.cur_token.kind == 0 # token_lparen +enddef + +def ParseLParen(scanner: dict<any>) # consumes token; no return + if debug + scanner->smt2#scanner#Enforce(scanner->AtStartOfLParen(), + "ParseLParen called but not at start of LParen", + scanner.cur_token.pos) + endif + + scanner->smt2#scanner#NextToken() +enddef + +# ------------------------------------------------------------------------------ +# RParen +# ------------------------------------------------------------------------------ +def AtStartOfRParen(scanner: dict<any>): bool + return scanner.cur_token.kind == 1 # token_rparen +enddef + +def ParseRParen(scanner: dict<any>) # consumes token; no return + if debug + scanner->smt2#scanner#Enforce(scanner->AtStartOfRParen(), + "ParseRParen called but not at start of RParen", + scanner.cur_token.pos) + endif + + scanner->smt2#scanner#NextToken() +enddef + +# ------------------------------------------------------------------------------ +# Atom +# ------------------------------------------------------------------------------ +def AtStartOfAtom(scanner: dict<any>): bool + return 2 <= scanner.cur_token.kind && scanner.cur_token.kind <= 8 +enddef + +def ParseAtom(scanner: dict<any>): dict<any> + if debug + scanner->smt2#scanner#Enforce(scanner->AtStartOfAtom(), + "ParseAtom called but not at start of Atom", + scanner.cur_token.pos) + endif + + const ast = AtomAst(scanner.cur_token) + scanner->smt2#scanner#NextToken() + return ast +enddef + +# ------------------------------------------------------------------------------ +# Expr +# ------------------------------------------------------------------------------ +def AtStartOfExpr(scanner: dict<any>): bool + return scanner->AtStartOfSExpr() || scanner->AtStartOfAtom() +enddef +def ParseExpr(scanner: dict<any>): dict<any> + if debug + scanner->smt2#scanner#Enforce(scanner->AtStartOfExpr(), + "ParseExpr called but not at start of Expr", + scanner.cur_token.pos) + endif + + if scanner->AtStartOfSExpr() + return scanner->ParseSExpr() + endif + return scanner->ParseAtom() +enddef + +# ------------------------------------------------------------------------------ +# SExpr +# ------------------------------------------------------------------------------ +const AtStartOfSExpr = funcref(AtStartOfLParen) +def ParseSExpr(scanner: dict<any>): dict<any> + const pos_from = scanner.cur_token.pos + + if debug + scanner->smt2#scanner#Enforce(scanner->AtStartOfSExpr(), + "ParseSExpr called but not at start of SExpr", + pos_from) + endif + scanner->ParseLParen() + + # Expr* + var exprs: list<any> + while scanner->AtStartOfExpr() + exprs->add(scanner->ParseExpr()) + endwhile + + scanner->smt2#scanner#Enforce(scanner->AtStartOfRParen(), + printf("Expected RParen but got %s", scanner.cur_token.kind->smt2#scanner#TokenKind2Str()), + scanner.cur_token.pos) + scanner->ParseRParen() + + const pos_to = scanner.cur_token.pos + return SExprAst(exprs, pos_from, pos_to) +enddef + +# ------------------------------------------------------------------------------ +# Paragraph +# ------------------------------------------------------------------------------ +def ParseParagraph(scanner: dict<any>): dict<any> + const pos_from = scanner.cur_token.pos + + # Expr+ + scanner->smt2#scanner#Enforce(scanner->AtStartOfExpr(), + printf("Expected Expr but got %s", scanner.cur_token.kind->smt2#scanner#TokenKind2Str()), + pos_from) + + var exprs = [scanner->ParseExpr()] + while scanner->AtStartOfExpr() && !scanner.at_new_paragraph + exprs->add(scanner->ParseExpr()) + endwhile + + const pos_to = scanner.cur_token.pos + return ParagraphAst(exprs, pos_from, pos_to) +enddef + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ +def smt2#parser#ParseCurrentParagraph(): dict<any> + # source = [start of current paragraph, EOF] + # Note: This is needed since `silent! normal! {y}` may not yank full paragraphs + # in the context of multiline expressions + const cursor = getpos('.') + silent! normal! { + const line_offset = line('.') + const source = join(getline('.', '$'), "\n") + call setpos('.', cursor) + + var scanner = smt2#scanner#Scanner(source, line_offset) + const ast = scanner->ParseParagraph() + + if debug | ast->PrintAst() | endif + return ast +enddef + +def smt2#parser#ParseAllParagraphs(): list<dict<any>> + # source = current buffer + const source = join(getline(1, '$'), "\n") + + var scanner = smt2#scanner#Scanner(source) + var asts = [] + while scanner.cur_token.kind != 9 # token_eof + const ast = scanner->ParseParagraph() + asts->add(ast) + + if debug | ast->PrintAst() | endif + endwhile + return asts +enddef |