feat: math expression language implementation
This commit is contained in:
277
src/plotter/parser.py
Normal file
277
src/plotter/parser.py
Normal file
@@ -0,0 +1,277 @@
|
||||
# Parsing is the process of turning a sequence
|
||||
# of tokens into a tree representation:
|
||||
#
|
||||
# Add
|
||||
# Parser / \
|
||||
# "1 + 2 * 3" -------> 1 Mul
|
||||
# / \
|
||||
# 2 3
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import deque
|
||||
from collections.abc import Iterator
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from functools import partial
|
||||
import math
|
||||
import operator
|
||||
|
||||
# The first step is to generate a list of tokens.
|
||||
# Tokens are the supported symbol classes, they have a type tag
|
||||
|
||||
|
||||
class TokenType(Enum):
|
||||
ERROR = -1
|
||||
OPERATOR = 0
|
||||
VARIABLE = 1
|
||||
SEPARATOR = 2
|
||||
FLOAT = 3
|
||||
FUNCTION = 4
|
||||
|
||||
|
||||
# The token can optionally capture a string value.
|
||||
# This will be used later by the parser to build the synctatic tree.
|
||||
|
||||
|
||||
@dataclass
|
||||
class Token:
|
||||
type: TokenType
|
||||
value: str | None = None
|
||||
|
||||
|
||||
operators: set[str] = {"+", "-", "*", "/", "^"}
|
||||
functions: set[str] = {"abs", "cos", "sin", "tan", "atan", "exp", "ln", "log"}
|
||||
separators: set[str] = {"(", ")"}
|
||||
variables: set[str] = {"x"}
|
||||
|
||||
|
||||
# The lexer is a generator function that yields token as it scans the input string
|
||||
def lex(input: str) -> Iterator[Token]:
|
||||
i = 0
|
||||
|
||||
while i < len(input):
|
||||
char = input[i]
|
||||
|
||||
# whitespace
|
||||
if char.isspace():
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# separators
|
||||
if char in separators:
|
||||
yield Token(type=TokenType.SEPARATOR, value=char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# operators
|
||||
if char in operators:
|
||||
yield Token(type=TokenType.OPERATOR, value=char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# variables
|
||||
if char in variables:
|
||||
yield Token(type=TokenType.VARIABLE, value=char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# functions
|
||||
if char.isalpha():
|
||||
j = i + 1
|
||||
while j < len(input) and input[j].isalpha():
|
||||
j += 1
|
||||
|
||||
name = input[i:j]
|
||||
if name not in functions:
|
||||
yield Token(
|
||||
type=TokenType.ERROR, value=f"unknown function name '{name}'"
|
||||
)
|
||||
return
|
||||
|
||||
yield Token(type=TokenType.FUNCTION, value=name)
|
||||
|
||||
i = j
|
||||
continue
|
||||
|
||||
# float numbers
|
||||
if (
|
||||
char.isdigit()
|
||||
or (char == ".")
|
||||
or (
|
||||
(char == "+" and input[i + 1].isdigit())
|
||||
or (char == "-" and input[i + 1].isdigit())
|
||||
)
|
||||
):
|
||||
j = i + 1
|
||||
|
||||
has_dot = char == "."
|
||||
while j < len(input) and (input[j].isdigit() or input[j] == "."):
|
||||
has_dot = has_dot or input[j] == "."
|
||||
j += 1
|
||||
|
||||
if has_dot:
|
||||
if input[j - 1] == ".":
|
||||
yield Token(
|
||||
type=TokenType.ERROR, value="number after dot was expected"
|
||||
)
|
||||
return
|
||||
|
||||
yield Token(type=TokenType.FLOAT, value=input[i:j])
|
||||
i = j
|
||||
continue
|
||||
|
||||
yield Token(type=TokenType.ERROR, value="not an accepted character")
|
||||
return
|
||||
|
||||
|
||||
class Expression(ABC):
|
||||
@abstractmethod
|
||||
def eval(self, x: float) -> float:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class Atom(Expression):
|
||||
token: Token
|
||||
|
||||
def eval(self, x: float) -> float:
|
||||
if self.token.type == TokenType.VARIABLE:
|
||||
return x
|
||||
if self.token.type == TokenType.FLOAT:
|
||||
return float(self.token.value or 0)
|
||||
|
||||
return 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionExpression(Expression):
|
||||
function: str
|
||||
argument: Expression
|
||||
|
||||
_funcs = {
|
||||
"abs": abs,
|
||||
"sin": math.sin,
|
||||
"cos": math.cos,
|
||||
"tan": math.tan,
|
||||
"atan": math.atan,
|
||||
"exp": math.exp,
|
||||
"ln": partial(math.log, base=math.e),
|
||||
"log": math.log10,
|
||||
}
|
||||
|
||||
def eval(self, x: float) -> float:
|
||||
func = self._funcs.get(self.function)
|
||||
if not func:
|
||||
raise ValueError(f"Unknown function {self.function}")
|
||||
return func(x)
|
||||
|
||||
|
||||
@dataclass
|
||||
class InfixExpression(Expression):
|
||||
operator: str
|
||||
lvalue: Expression
|
||||
rvalue: Expression
|
||||
|
||||
_ops = {
|
||||
"+": operator.add,
|
||||
"-": operator.sub,
|
||||
"*": operator.mul,
|
||||
"/": operator.truediv,
|
||||
"^": operator.pow,
|
||||
}
|
||||
|
||||
_binding_power = {
|
||||
"+": (1, 2),
|
||||
"-": (1, 2),
|
||||
"*": (3, 4),
|
||||
"/": (3, 4),
|
||||
"^": (4, 5),
|
||||
}
|
||||
|
||||
# Prefix binding power for unary minus (lower than ^, so -2^3 = -(2^3))
|
||||
_prefix_binding_power = {"-": 3}
|
||||
|
||||
def eval(self, x: float) -> float:
|
||||
op_func = self._ops.get(self.operator)
|
||||
if not op_func:
|
||||
raise ValueError(f"Unknown operator {self.operator}")
|
||||
return op_func(self.lvalue.eval(x), self.rvalue.eval(x))
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, tokens: Iterator[Token]):
|
||||
self.tokens = deque(tokens)
|
||||
|
||||
def peek(self) -> Token | None:
|
||||
return self.tokens[0] if self.tokens else None
|
||||
|
||||
def consume(self) -> Token:
|
||||
return self.tokens.popleft()
|
||||
|
||||
def _parse_expression_bp(self, min_bp: int) -> Expression:
|
||||
token = self.consume()
|
||||
|
||||
if token.type == TokenType.SEPARATOR and token.value == "(":
|
||||
lhs = self._parse_expression_bp(0)
|
||||
self.consume() # consume closing ')'
|
||||
elif token.type == TokenType.FUNCTION and token.value is not None:
|
||||
self.consume() # consume opening '('
|
||||
argument = self._parse_expression_bp(0)
|
||||
self.consume() # consume closing ')'
|
||||
lhs = FunctionExpression(function=token.value, argument=argument)
|
||||
elif token.type == TokenType.OPERATOR and token.value == "-":
|
||||
# Unary minus: desugar to 0 - operand
|
||||
prefix_bp = InfixExpression._prefix_binding_power["-"]
|
||||
operand = self._parse_expression_bp(prefix_bp)
|
||||
lhs = InfixExpression(
|
||||
operator="-",
|
||||
lvalue=Atom(Token(type=TokenType.FLOAT, value="0")),
|
||||
rvalue=operand,
|
||||
)
|
||||
else:
|
||||
lhs = Atom(token)
|
||||
|
||||
while True:
|
||||
op = self.peek()
|
||||
if op is None or op.value is None:
|
||||
break
|
||||
|
||||
if op.type != TokenType.OPERATOR:
|
||||
break
|
||||
|
||||
l_bp, r_bp = InfixExpression._binding_power[op.value]
|
||||
if l_bp < min_bp:
|
||||
break
|
||||
|
||||
self.consume()
|
||||
|
||||
rhs = self._parse_expression_bp(r_bp)
|
||||
lhs = InfixExpression(operator=op.value, lvalue=lhs, rvalue=rhs)
|
||||
|
||||
return lhs
|
||||
|
||||
def parse_expression(self) -> Expression:
|
||||
return self._parse_expression_bp(min_bp=0)
|
||||
|
||||
|
||||
def parse(expression: str) -> Expression:
|
||||
return Parser(lex(expression)).parse_expression()
|
||||
|
||||
|
||||
def eval_in_range(
|
||||
expression: str, start: float, stop: float, increment: float
|
||||
) -> list[float]:
|
||||
if stop < start:
|
||||
raise ValueError("range must be provided in crescent order")
|
||||
|
||||
parsed_expression = parse(expression)
|
||||
|
||||
n = 1
|
||||
x = start
|
||||
values = []
|
||||
while x < stop:
|
||||
x += increment
|
||||
n += 1
|
||||
values.append(parsed_expression.eval(x))
|
||||
return values
|
||||
Reference in New Issue
Block a user