feat: math expression language implementation

This commit is contained in:
2026-01-29 22:12:40 +01:00
parent f7c34448bb
commit ad9c3af0ac
11 changed files with 646 additions and 0 deletions

277
src/plotter/parser.py Normal file
View File

@@ -0,0 +1,277 @@
# Parsing is the process of turning a sequence
# of tokens into a tree representation:
#
# Add
# Parser / \
# "1 + 2 * 3" -------> 1 Mul
# / \
# 2 3
from __future__ import annotations
from abc import ABC, abstractmethod
from collections import deque
from collections.abc import Iterator
from dataclasses import dataclass
from enum import Enum
from functools import partial
import math
import operator
# The first step is to generate a list of tokens.
# Tokens are the supported symbol classes, they have a type tag
class TokenType(Enum):
ERROR = -1
OPERATOR = 0
VARIABLE = 1
SEPARATOR = 2
FLOAT = 3
FUNCTION = 4
# The token can optionally capture a string value.
# This will be used later by the parser to build the synctatic tree.
@dataclass
class Token:
type: TokenType
value: str | None = None
operators: set[str] = {"+", "-", "*", "/", "^"}
functions: set[str] = {"abs", "cos", "sin", "tan", "atan", "exp", "ln", "log"}
separators: set[str] = {"(", ")"}
variables: set[str] = {"x"}
# The lexer is a generator function that yields token as it scans the input string
def lex(input: str) -> Iterator[Token]:
i = 0
while i < len(input):
char = input[i]
# whitespace
if char.isspace():
i += 1
continue
# separators
if char in separators:
yield Token(type=TokenType.SEPARATOR, value=char)
i += 1
continue
# operators
if char in operators:
yield Token(type=TokenType.OPERATOR, value=char)
i += 1
continue
# variables
if char in variables:
yield Token(type=TokenType.VARIABLE, value=char)
i += 1
continue
# functions
if char.isalpha():
j = i + 1
while j < len(input) and input[j].isalpha():
j += 1
name = input[i:j]
if name not in functions:
yield Token(
type=TokenType.ERROR, value=f"unknown function name '{name}'"
)
return
yield Token(type=TokenType.FUNCTION, value=name)
i = j
continue
# float numbers
if (
char.isdigit()
or (char == ".")
or (
(char == "+" and input[i + 1].isdigit())
or (char == "-" and input[i + 1].isdigit())
)
):
j = i + 1
has_dot = char == "."
while j < len(input) and (input[j].isdigit() or input[j] == "."):
has_dot = has_dot or input[j] == "."
j += 1
if has_dot:
if input[j - 1] == ".":
yield Token(
type=TokenType.ERROR, value="number after dot was expected"
)
return
yield Token(type=TokenType.FLOAT, value=input[i:j])
i = j
continue
yield Token(type=TokenType.ERROR, value="not an accepted character")
return
class Expression(ABC):
@abstractmethod
def eval(self, x: float) -> float:
pass
@dataclass
class Atom(Expression):
token: Token
def eval(self, x: float) -> float:
if self.token.type == TokenType.VARIABLE:
return x
if self.token.type == TokenType.FLOAT:
return float(self.token.value or 0)
return 0.0
@dataclass
class FunctionExpression(Expression):
function: str
argument: Expression
_funcs = {
"abs": abs,
"sin": math.sin,
"cos": math.cos,
"tan": math.tan,
"atan": math.atan,
"exp": math.exp,
"ln": partial(math.log, base=math.e),
"log": math.log10,
}
def eval(self, x: float) -> float:
func = self._funcs.get(self.function)
if not func:
raise ValueError(f"Unknown function {self.function}")
return func(x)
@dataclass
class InfixExpression(Expression):
operator: str
lvalue: Expression
rvalue: Expression
_ops = {
"+": operator.add,
"-": operator.sub,
"*": operator.mul,
"/": operator.truediv,
"^": operator.pow,
}
_binding_power = {
"+": (1, 2),
"-": (1, 2),
"*": (3, 4),
"/": (3, 4),
"^": (4, 5),
}
# Prefix binding power for unary minus (lower than ^, so -2^3 = -(2^3))
_prefix_binding_power = {"-": 3}
def eval(self, x: float) -> float:
op_func = self._ops.get(self.operator)
if not op_func:
raise ValueError(f"Unknown operator {self.operator}")
return op_func(self.lvalue.eval(x), self.rvalue.eval(x))
class Parser:
def __init__(self, tokens: Iterator[Token]):
self.tokens = deque(tokens)
def peek(self) -> Token | None:
return self.tokens[0] if self.tokens else None
def consume(self) -> Token:
return self.tokens.popleft()
def _parse_expression_bp(self, min_bp: int) -> Expression:
token = self.consume()
if token.type == TokenType.SEPARATOR and token.value == "(":
lhs = self._parse_expression_bp(0)
self.consume() # consume closing ')'
elif token.type == TokenType.FUNCTION and token.value is not None:
self.consume() # consume opening '('
argument = self._parse_expression_bp(0)
self.consume() # consume closing ')'
lhs = FunctionExpression(function=token.value, argument=argument)
elif token.type == TokenType.OPERATOR and token.value == "-":
# Unary minus: desugar to 0 - operand
prefix_bp = InfixExpression._prefix_binding_power["-"]
operand = self._parse_expression_bp(prefix_bp)
lhs = InfixExpression(
operator="-",
lvalue=Atom(Token(type=TokenType.FLOAT, value="0")),
rvalue=operand,
)
else:
lhs = Atom(token)
while True:
op = self.peek()
if op is None or op.value is None:
break
if op.type != TokenType.OPERATOR:
break
l_bp, r_bp = InfixExpression._binding_power[op.value]
if l_bp < min_bp:
break
self.consume()
rhs = self._parse_expression_bp(r_bp)
lhs = InfixExpression(operator=op.value, lvalue=lhs, rvalue=rhs)
return lhs
def parse_expression(self) -> Expression:
return self._parse_expression_bp(min_bp=0)
def parse(expression: str) -> Expression:
return Parser(lex(expression)).parse_expression()
def eval_in_range(
expression: str, start: float, stop: float, increment: float
) -> list[float]:
if stop < start:
raise ValueError("range must be provided in crescent order")
parsed_expression = parse(expression)
n = 1
x = start
values = []
while x < stop:
x += increment
n += 1
values.append(parsed_expression.eval(x))
return values