/* * Copyright ©️ 2022 Mario Forzanini * * This file is part of dwrt. * * Dwrt is free software: you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation, either version 3 of the License, or (at your * option) any later version. * * Dwrt is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License * along with dwrt. If not, see . * */ #include #include #include #include #include #include #include "dat.h" #include "fns.h" #include "stack.h" #define KNOWN_FUNCS 8 #define LEXEME_MINSZ 10 static char l_getc(Lexer *); static char l_peek(Lexer *); static Lexeme *lex_number(Lexer *, Lexeme *, size_t); static Lexeme *lex_symbol(Lexer *, Lexeme *, size_t); static char l_getc(Lexer *l) { return *(l->pos++); } void l_free(Lexer *lex) { free(lex->data); free(lex->err); free(lex); } Lexer * l_alloc(char *filename) { FILE *f; Lexer *l; l = emalloc(sizeof(Lexer)); if (filename == NULL) { l->filename = "stdin"; f = stdin; } else { l->filename = filename; if ((f = fopen(filename, "r")) == NULL) { free(l); return NULL; } } l->err = NULL; l->state = LS_WS; l->pos = l->data = readall(f); if (filename == NULL) fclose(f); return l; } Lexer * l_alloc_str(char *str) { Lexer *l; l = emalloc(sizeof(Lexer)); l->filename = "input"; l->err = NULL; l->state = LS_WS; l->data = ecalloc(strlen(str) + 1, sizeof(char)); l->pos = l->data = strcpy(l->data, str); return l; } static char l_peek(Lexer *l) { return *(l->pos); } Lexeme * lex(Lexer *l) { char c; size_t offset; Lexeme *result; result = emalloc(sizeof(Lexeme)); result->len = LEXEME_MINSZ; result->lexeme = ecalloc(result->len, sizeof(char)); result->type = LE_EOF; offset = 0; while ((c = l_getc(l)) != EOF) { if (isspace(c)) { switch (l->state) { case LS_SYMBOL: l->state = LS_WS; return result; default: continue; } } else if (isalpha(c)) { result->len = strappend(result->lexeme, c, offset++, result->len); return lex_symbol(l, result, offset); } else if (isdigit(c) && c != '0') { result->len = strappend(result->lexeme, c, offset++, result->len); return lex_number(l, result, offset); } else { switch (c) { case '0': result->type = LE_NUMBER; result->len = strappend(result->lexeme, c, offset++, result->len); if (l_peek(l) == '.') { return lex_number(l, result, offset); } else if (isdigit(l_peek(l))) { l->err = ecalloc(strlen(l->filename) + 30 + 1, sizeof(char)); sprintf(l->err, "%s: numbers can't start with 0\n", l->filename); result->type = LE_ERROR; } return result; case '+': case '-': result->len = strappend(result->lexeme, c, offset++, result->len); if (isdigit(l_peek(l))) return lex_number(l, result, offset); /* FALLTHROUGH */ case '*': case '^': case '/': result->type = LE_OPERATOR; result->lexeme[0] = c; return result; case '.': l->state = LS_ERROR; l->err = emalloc(strlen(l->filename) + 43 + 1); sprintf(l->err, "%s: unexpected '.', not in a number literal\n", l->filename); result->type = LE_ERROR; return result; case '(': result->type = LE_LPAREN; result->len = strappend(result->lexeme, c, offset++, result->len); return result; case ')': result->type = LE_RPAREN; result->len = strappend(result->lexeme, c, offset++, result->len); return result; case '\0': free(result->lexeme); result->type = LE_EOF; return result; default: l->state = LS_ERROR; l->err = emalloc(strlen(l->filename) + 16 + 1); sprintf(l->err, "%s: %c is garbage\n", l->filename, c); result->type = LE_ERROR; return result; } } } return result; } static Lexeme * lex_number(Lexer *l, Lexeme *le, size_t offset) { char c; le->type = LE_NUMBER; while (isdigit((c = l_getc(l))) || c == '.') { le->len = strappend(le->lexeme, c, offset++, le->len); } l->pos--; l->state = LS_WS; return le; } static Lexeme * lex_symbol(Lexer *l, Lexeme *le, size_t offset) { char c; le->type = LE_SYMBOL; while (isalpha(c = l_getc(l))) { le->len = strappend(le->lexeme, c, offset++, le->len); } l->pos--; l->state = LS_WS; return le; } void p_free(Parser *p) { l_free(p->l); ast_free(p->ast); free(p->err); free(p); } /* * Allocate a parser for file filename, if filename is NULL read stdin instead */ Parser * p_alloc(char *filename) { Parser *p; p = emalloc(sizeof(Parser)); p->ast = NULL; p->err = NULL; p->l = l_alloc(filename); return p; } Parser * p_alloc_str(char *str) { Parser *p; p = emalloc(sizeof(Parser)); p->ast = NULL; p->err = NULL; p->l = l_alloc_str(str); return p; } /* * Shunting yard algorithm */ int parse(Parser *p) { size_t i; int found; Lexeme *le; Node *node1, *node2; Stack *op_stack, *node_stack, *s; Symbol *sym, *head, *tmp_sym; op_stack = node_stack = NULL; for (le = lex(p->l); le->type != LE_EOF; free(le->lexeme), free(le), le = lex(p->l)) { switch (le->type) { case LE_ERROR: p->err = ecalloc(strlen(p->l->err) + 1, sizeof(char)); p->err = strcpy(p->err, p->l->err); free(le->lexeme); free(le); STACK_ITER(op_stack, s, tmp_sym) { symbol_free(tmp_sym); } STACK_ITER(node_stack, s, node2) { ast_free(node2); } return -1; case LE_NUMBER: sym = num_alloc(atof(le->lexeme)); STACK_PUSH(node_stack, s, ast_alloc(sym)); break; case LE_OPERATOR: sym = operator_alloc(le->lexeme[0]); head = STACK_PEEK(op_stack); while (head != NULL && !is_lparen(head) && precedence(head) >= precedence(sym)) { if (STACK_PEEK(op_stack) == NULL) goto err; STACK_POP(op_stack, s, tmp_sym); node1 = ast_alloc(tmp_sym); if (STACK_PEEK(node_stack) == NULL) goto err; STACK_POP(node_stack, s, node2); ast_insert(node1, node2); if (STACK_PEEK(node_stack) == NULL) goto err; STACK_POP(node_stack, s, node2); ast_insert(node1, node2); STACK_PUSH(node_stack, s, node1); head = STACK_PEEK(op_stack); } STACK_PUSH(op_stack, s, sym); break; case LE_LPAREN: sym = lparen_alloc(); STACK_PUSH(op_stack, s, sym); break; case LE_RPAREN: while (!is_lparen(STACK_PEEK(op_stack))) { if (op_stack == NULL) { p->err = ecalloc(strlen(p->l->filename) + 26 + 1, sizeof(char)); sprintf(p->err, "%s: unbalanced parenthesis\n", p->l->filename); return -1; } if (STACK_PEEK(op_stack) == NULL) goto err; STACK_POP(op_stack, s, tmp_sym); node1 = ast_alloc(tmp_sym); if (STACK_PEEK(node_stack) == NULL) goto err; STACK_POP(node_stack, s, node2); ast_insert(node1, node2); if (STACK_PEEK(node_stack) == NULL) goto err; STACK_POP(node_stack, s, node2); ast_insert(node1, node2); STACK_PUSH(node_stack, s, node1); } STACK_POP(op_stack, s, tmp_sym); symbol_free(tmp_sym); /* Left paren, discarded */ if (STACK_PEEK(op_stack) != NULL && is_function(STACK_PEEK(op_stack))) { STACK_POP(op_stack, s, tmp_sym); node1 = ast_alloc(tmp_sym); STACK_POP(node_stack, s, node2); ast_insert(node1, node2); STACK_PUSH(node_stack, s, node1); } break; case LE_SYMBOL: if (strlen(le->lexeme) == 1) { sym = var_alloc(le->lexeme[0]); STACK_PUSH(node_stack, s, ast_alloc(sym)); } else { /* Throw error on unknown functions */ found = 0; for (i = 0; i < KNOWN_FUNCS; i++) { if (strcmp(le->lexeme, known_funcs[i].func) == 0) { sym = func_alloc(le->lexeme); STACK_PUSH(op_stack, s, sym); found = 1; break; } } if (!found) { STACK_ITER(op_stack, s, tmp_sym) { symbol_free(tmp_sym); } STACK_ITER(node_stack, s, node2) { ast_free(node2); } p->err = ecalloc(strlen(p->l->filename) + strlen(le->lexeme) + 21 + 1, sizeof(char)); sprintf(p->err, "%s: unknown function %s\n", p->l->filename, le->lexeme); free(le->lexeme); free(le); return -1; } found = 0; } break; default: STACK_ITER(op_stack, s, tmp_sym) { symbol_free(tmp_sym); } STACK_ITER(node_stack, s, node2) { ast_free(node2); } p->err = ecalloc(strlen(p->l->filename) + strlen(le->lexeme) + 18 + 1, sizeof(char)); sprintf(p->err, "%s: unknown symbol %s\n", p->l->filename, le->lexeme); free(le->lexeme); free(le); return -1; } } free(le); while (op_stack != NULL) { if (is_lparen(STACK_PEEK(op_stack))) { p->err = ecalloc(strlen(p->l->filename) + 26 + 1, sizeof(char)); sprintf(p->err, "%s: unbalanced parenthesis\n", p->l->filename); STACK_ITER(op_stack, s, tmp_sym) { symbol_free(tmp_sym); } STACK_ITER(node_stack, s, node2) { ast_free(node2); } return -1; } STACK_POP(op_stack, s, tmp_sym); node1 = ast_alloc(tmp_sym); STACK_POP(node_stack, s, node2); ast_insert(node1, node2); STACK_POP(node_stack, s, node2); ast_insert(node1, node2); STACK_PUSH(node_stack, s, node1); } STACK_POP(node_stack, s, p->ast); if (STACK_LEN(op_stack) > 0) goto err; else if (STACK_LEN(node_stack) > 0) goto err; return 0; err: p->err = ecalloc(strlen(p->l->filename) + 24 + 1, sizeof(char)); sprintf(p->err, "%s: malformed expression\n", p->l->filename); STACK_ITER(op_stack, s, tmp_sym) { symbol_free(tmp_sym); } STACK_ITER(node_stack, s, node2) { ast_free(node2); } return -1; } int precedence(Symbol *s) { if (s == NULL) return -1; switch (s->type) { case S_OP: switch (s->content.func) { case '-': case '+': return 0; case '*': case '/': return 1; case '^': return 2; } break; case S_FUNC: return 2; default: return -1; } return -1; }