I’m building a package on top of Polars, and one of the functions looks like this
<code>def func(x: IntoExpr, y: IntoExpr):
...
</code>
<code>def func(x: IntoExpr, y: IntoExpr):
...
</code>
def func(x: IntoExpr, y: IntoExpr):
...
The business logic requires that x can include multiple columns, but y must be a single column.
What should I do to check and validate this?
You can use the polars.selectors.expand_selector
function which lets you evaluate selected columns using either selectors or simple expressions.
Note that the drawback here is that you can’t pass in arbitrary expressions, or else the evaluation fails (see the final examples).
<code>import polars as pl
import polars.selectors as cs
from polars.selectors import expand_selector
data = {
"a1": [1, 2, 3],
"a2": [4, 5, 6],
"b1": [7, 8, 9],
"b2": [10, 11, 12],
}
df = pl.DataFrame(data)
print(
expand_selector(df, cs.exclude('b1', 'b2')), # ('a1', 'a2')
expand_selector(df, cs.starts_with('b')), # ('b1', 'b2')
expand_selector(df, cs.matches('(a|b)1$')), # ('a1', 'b1')
# use with expressions expand_selector(..., strict=False)
expand_selector(df, pl.exclude('a1', 'a2'), strict=False), # ('b1', 'b2')
expand_selector(df, pl.col('b1'), strict=False), # ('b1', )
expand_selector(df, pl.all(), strict=False), # ('a1', 'a2', 'b1', 'b2')
sep='n'
)
# anything past an arbitrary selection expression will fail
print(expand_selector(df, pl.all() + 1, strict=False))
# Traceback (most recent call last):
# File "/home/cameron/.vim-excerpt", line 26, in <module>
# expand_selector(df, pl.all() + 1, strict=False),
# File "/home/cameron/.pyenv/versions/dutc-site/lib/python3.10/site-packages/polars/selectors.py", line 190, in expand_selector
# raise TypeError(msg)
# TypeError: expected a selector; found <Expr ['[(*) + (dyn int: 1)]'] at 0x7F835F943D30> instead.
</code>
<code>import polars as pl
import polars.selectors as cs
from polars.selectors import expand_selector
data = {
"a1": [1, 2, 3],
"a2": [4, 5, 6],
"b1": [7, 8, 9],
"b2": [10, 11, 12],
}
df = pl.DataFrame(data)
print(
expand_selector(df, cs.exclude('b1', 'b2')), # ('a1', 'a2')
expand_selector(df, cs.starts_with('b')), # ('b1', 'b2')
expand_selector(df, cs.matches('(a|b)1$')), # ('a1', 'b1')
# use with expressions expand_selector(..., strict=False)
expand_selector(df, pl.exclude('a1', 'a2'), strict=False), # ('b1', 'b2')
expand_selector(df, pl.col('b1'), strict=False), # ('b1', )
expand_selector(df, pl.all(), strict=False), # ('a1', 'a2', 'b1', 'b2')
sep='n'
)
# anything past an arbitrary selection expression will fail
print(expand_selector(df, pl.all() + 1, strict=False))
# Traceback (most recent call last):
# File "/home/cameron/.vim-excerpt", line 26, in <module>
# expand_selector(df, pl.all() + 1, strict=False),
# File "/home/cameron/.pyenv/versions/dutc-site/lib/python3.10/site-packages/polars/selectors.py", line 190, in expand_selector
# raise TypeError(msg)
# TypeError: expected a selector; found <Expr ['[(*) + (dyn int: 1)]'] at 0x7F835F943D30> instead.
</code>
import polars as pl
import polars.selectors as cs
from polars.selectors import expand_selector
data = {
"a1": [1, 2, 3],
"a2": [4, 5, 6],
"b1": [7, 8, 9],
"b2": [10, 11, 12],
}
df = pl.DataFrame(data)
print(
expand_selector(df, cs.exclude('b1', 'b2')), # ('a1', 'a2')
expand_selector(df, cs.starts_with('b')), # ('b1', 'b2')
expand_selector(df, cs.matches('(a|b)1$')), # ('a1', 'b1')
# use with expressions expand_selector(..., strict=False)
expand_selector(df, pl.exclude('a1', 'a2'), strict=False), # ('b1', 'b2')
expand_selector(df, pl.col('b1'), strict=False), # ('b1', )
expand_selector(df, pl.all(), strict=False), # ('a1', 'a2', 'b1', 'b2')
sep='n'
)
# anything past an arbitrary selection expression will fail
print(expand_selector(df, pl.all() + 1, strict=False))
# Traceback (most recent call last):
# File "/home/cameron/.vim-excerpt", line 26, in <module>
# expand_selector(df, pl.all() + 1, strict=False),
# File "/home/cameron/.pyenv/versions/dutc-site/lib/python3.10/site-packages/polars/selectors.py", line 190, in expand_selector
# raise TypeError(msg)
# TypeError: expected a selector; found <Expr ['[(*) + (dyn int: 1)]'] at 0x7F835F943D30> instead.
2
There are introspection methods in the Meta namespace, e.g.
has_multiple_outputs()
<code>>>> pl.col(["a", "b"]).meta.has_multiple_outputs()
True
</code>
<code>>>> pl.col(["a", "b"]).meta.has_multiple_outputs()
True
</code>
>>> pl.col(["a", "b"]).meta.has_multiple_outputs()
True
<code>>>> pl.col("a").meta.has_multiple_outputs()
False
</code>
<code>>>> pl.col("a").meta.has_multiple_outputs()
False
</code>
>>> pl.col("a").meta.has_multiple_outputs()
False
6