Thiết kế website giá rẻ

Question

I found myself making a variant of pl.DataFrame which keeps track of the operations performed on it. For example:

from pprint import pformat, pprint
import polars as pl
import polars._typing as plt
from collections import UserList
from dataclasses import dataclass, field
from typing import Any, Iterable, Optional, Self
from numpy import ndarray


@dataclass
class CalcMeta(UserList):
    data: list[Any] = field(default_factory=list)


@dataclass
class CalcReport(UserList):
    data: list[tuple[str, Any]] = field(default_factory=list, kw_only=True)

    def append(self, **kwargs) -> None:  # type: ignore
        self.data += list(kwargs.items())


class CalcDataFrame(pl.DataFrame):
    meta: CalcMeta
    report: Optional[CalcReport] = None

    def __init__(
        self,
        data: pl.DataFrame,
        meta: CalcMeta = CalcMeta(),
        report: Optional[CalcReport] = None,
    ):
        super().__init__(data)
        self.meta = meta
        self.report = report

    def filter(
        self,
        *predicates: pl.Expr
        | pl.Series
        | str
        | Iterable[pl.Expr | pl.Series | str]
        | bool
        | list[bool]
        | ndarray[Any, Any],
        **constraints: Any,
    ) -> Self:
        return self.append_report(
            filtered_with={
                "predicates": str(predicates),
                "constraints": str(constraints),
            }
        ).derive(super().filter(*predicates, **constraints))

    def with_columns(
        self,
        *exprs: plt.IntoExpr | Iterable[plt.IntoExpr],
        **named_exprs: plt.IntoExpr,
    ) -> Self:
        return self.append_report(
            with_columns={"exprs": str(exprs), "named_exprs": str(named_exprs)}
        ).derive(super().with_columns(*exprs, **named_exprs))

    def append_report(self, **kwargs) -> Self:
        if self.report is not None:
            self.report.append(**kwargs)
        return self

    def derive(self, data: pl.DataFrame, meta: CalcMeta = CalcMeta()) -> Self:
        return self.__class__(data, self.meta + meta, self.report)


xs = pl.DataFrame(
    [
        pl.Series("alpha", ["a", "b", "c"]),
        pl.Series("beta", ["x", "x", "a"]),
        pl.Series("xs", [0, 1, 2]),
    ]
)
xs.with_columns()
xs = CalcDataFrame(xs, meta=CalcMeta(["some meta data"]), report=CalcReport())
xs = xs.filter(pl.col("alpha").eq("a")).with_columns(
    pl.col("beta").replace_strict({"x": "y"})
)
if xs.report:
    for step in xs.report:
        print(f"{step[0]}:")
        print(f"    {pformat(step[1])}")
print(xs)

filtered_with:
    {'constraints': '{}',
 'predicates': '(<Expr ['[(col("alpha")) == (String(a))…'] at '
               '0x7F1258222300>,)'}
with_columns:
    {'exprs': '(<Expr ['col("beta").replace_strict([Se…'] at 0x7F1258223680>,)',
 'named_exprs': '{}'}
shape: (1, 3)
┌───────┬──────┬─────┐
│ alpha ┆ beta ┆ xs  │
│ ---   ┆ ---  ┆ --- │
│ str   ┆ str  ┆ i64 │
╞═══════╪══════╪═════╡
│ a     ┆ y    ┆ 0   │
└───────┴──────┴─────┘

I stopped myself before I went too far for two reasons:

Likely this is already functionality that exists? I found the following relevant information:

Logging in Polars
Polars show_graph method

Some expressions don’t have a nice string representation off the bat:

import polars as pl

expr = pl.col("somecol").replace_strict(
    {"hello": "world"},
    return_dtype=pl.List(pl.Enum(pl.Series(["a", "b"]))),
)
print(expr)

col("somecol").replace_strict([Series, Series])

The trouble with show_graph is that it doesn’t present output in a format that is useful to me (i.e. it uses notation which is meant to help polars library authors).

Am I missing some obvious functionality that does what I want? If not: how can I pretty print expressions such as replace_strict, so that the inner series etc. they are built on are also fully printed?

(Otherwise, I do have various ideas I can update this question with that let me capture what I need.)

Thiết kế website giá rẻ

Danh mục

Python + Polars: a DataFrame which keeps track of the history of operations it was derived from?