function-plotter/src/plotter/parser.py

# Parsing is the process of turning a sequence
# of tokens into a tree representation:
#
#                             Add
#                  Parser     / \
#  "1 + 2 * 3"    ------->   1  Mul
#                               / \
#                              2   3
from __future__ import annotations

from abc import ABC, abstractmethod
from collections import deque
from collections.abc import Iterator
from dataclasses import dataclass
from enum import Enum
from functools import partial
import math
import operator

# The first step is to generate a list of tokens.
# Tokens are the supported symbol classes, they have a type tag


class TokenType(Enum):
    ERROR = -1
    OPERATOR = 0
    VARIABLE = 1
    SEPARATOR = 2
    FLOAT = 3
    FUNCTION = 4


# The token can optionally capture a string value.
# This will be used later by the parser to build the synctatic tree.


@dataclass
class Token:
    type: TokenType
    value: str | None = None


operators: set[str] = {"+", "-", "*", "/", "^"}
functions: set[str] = {"abs", "cos", "sin", "tan", "atan", "exp", "ln", "log"}
separators: set[str] = {"(", ")"}
variables: set[str] = {"x"}


# The lexer is a generator function that yields token as it scans the input string
def lex(input: str) -> Iterator[Token]:
    i = 0

    while i < len(input):
        char = input[i]

        # whitespace
        if char.isspace():
            i += 1
            continue

        # separators
        if char in separators:
            yield Token(type=TokenType.SEPARATOR, value=char)
            i += 1
            continue

        # operators
        if char in operators:
            yield Token(type=TokenType.OPERATOR, value=char)
            i += 1
            continue

        # variables
        if char in variables:
            yield Token(type=TokenType.VARIABLE, value=char)
            i += 1
            continue

        # functions
        if char.isalpha():
            j = i + 1
            while j < len(input) and input[j].isalpha():
                j += 1

            name = input[i:j]
            if name not in functions:
                yield Token(
                    type=TokenType.ERROR, value=f"unknown function name '{name}'"
                )
                return

            yield Token(type=TokenType.FUNCTION, value=name)

            i = j
            continue

        # float numbers
        if (
            char.isdigit()
            or (char == ".")
            or (
                (char == "+" and input[i + 1].isdigit())
                or (char == "-" and input[i + 1].isdigit())
            )
        ):
            j = i + 1

            has_dot = char == "."
            while j < len(input) and (input[j].isdigit() or input[j] == "."):
                has_dot = has_dot or input[j] == "."
                j += 1

            if has_dot:
                if input[j - 1] == ".":
                    yield Token(
                        type=TokenType.ERROR, value="number after dot was expected"
                    )
                    return

            yield Token(type=TokenType.FLOAT, value=input[i:j])
            i = j
            continue

        yield Token(type=TokenType.ERROR, value="not an accepted character")
        return


class Expression(ABC):
    @abstractmethod
    def eval(self, x: float) -> float:
        pass


@dataclass
class Atom(Expression):
    token: Token

    def eval(self, x: float) -> float:
        if self.token.type == TokenType.VARIABLE:
            return x
        if self.token.type == TokenType.FLOAT:
            return float(self.token.value or 0)

        return 0.0


@dataclass
class FunctionExpression(Expression):
    function: str
    argument: Expression

    _funcs = {
        "abs": abs,
        "sin": math.sin,
        "cos": math.cos,
        "tan": math.tan,
        "atan": math.atan,
        "exp": math.exp,
        "ln": partial(math.log, base=math.e),
        "log": math.log10,
    }

    def eval(self, x: float) -> float:
        func = self._funcs.get(self.function)
        if not func:
            raise ValueError(f"Unknown function {self.function}")
        return func(x)


@dataclass
class InfixExpression(Expression):
    operator: str
    lvalue: Expression
    rvalue: Expression

    _ops = {
        "+": operator.add,
        "-": operator.sub,
        "*": operator.mul,
        "/": operator.truediv,
        "^": operator.pow,
    }

    _binding_power = {
        "+": (1, 2),
        "-": (1, 2),
        "*": (3, 4),
        "/": (3, 4),
        "^": (4, 5),
    }

    # Prefix binding power for unary minus (lower than ^, so -2^3 = -(2^3))
    _prefix_binding_power = {"-": 3}

    def eval(self, x: float) -> float:
        op_func = self._ops.get(self.operator)
        if not op_func:
            raise ValueError(f"Unknown operator {self.operator}")
        return op_func(self.lvalue.eval(x), self.rvalue.eval(x))


class Parser:
    def __init__(self, tokens: Iterator[Token]):
        self.tokens = deque(tokens)

    def peek(self) -> Token | None:
        return self.tokens[0] if self.tokens else None

    def consume(self) -> Token:
        return self.tokens.popleft()

    def _parse_expression_bp(self, min_bp: int) -> Expression:
        token = self.consume()

        if token.type == TokenType.SEPARATOR and token.value == "(":
            lhs = self._parse_expression_bp(0)
            self.consume()  # consume closing ')'
        elif token.type == TokenType.FUNCTION and token.value is not None:
            self.consume()  # consume opening '('
            argument = self._parse_expression_bp(0)
            self.consume()  # consume closing ')'
            lhs = FunctionExpression(function=token.value, argument=argument)
        elif token.type == TokenType.OPERATOR and token.value == "-":
            # Unary minus: desugar to 0 - operand
            prefix_bp = InfixExpression._prefix_binding_power["-"]
            operand = self._parse_expression_bp(prefix_bp)
            lhs = InfixExpression(
                operator="-",
                lvalue=Atom(Token(type=TokenType.FLOAT, value="0")),
                rvalue=operand,
            )
        else:
            lhs = Atom(token)

        while True:
            op = self.peek()
            if op is None or op.value is None:
                break

            if op.type != TokenType.OPERATOR:
                break

            l_bp, r_bp = InfixExpression._binding_power[op.value]
            if l_bp < min_bp:
                break

            self.consume()

            rhs = self._parse_expression_bp(r_bp)
            lhs = InfixExpression(operator=op.value, lvalue=lhs, rvalue=rhs)

        return lhs

    def parse_expression(self) -> Expression:
        return self._parse_expression_bp(min_bp=0)


def parse(expression: str) -> Expression:
    return Parser(lex(expression)).parse_expression()


def eval_in_range(
    expression: str, start: float, stop: float, increment: float
) -> list[float]:
    if stop < start:
        raise ValueError("range must be provided in crescent order")

    parsed_expression = parse(expression)

    n = 1
    x = start
    values = []
    while x < stop:
        x += increment
        n += 1
        values.append(parsed_expression.eval(x))
    return values