I need to append rows of a serialized subclass of BaseModel and display in streamlit. The problem is that some fields are Optional and sometimes are missing. When they are missing, the column type will of the initialized table will deviate. I tried a few ways to pre-specify column types:
- by setting them in the pandas.DataFrame that is used to initialize the streamlit.dataframe
- by setting the column_config
None of these seem work (see code below). Any tips appreciated
Setup code
import pandas as pd
import numpy as np
from pydantic import BaseModel
import streamlit as st
from functools import partial
from typing import Optional
class MyModel(BaseModel):
first: str
second: Optional[str]
date: datetime.datetime = datetime.datetime.today()
TYPE_TO_STREAMLIT = {"str": st.column_config.TextColumn,
'float': st.column_config.NumberColumn,
'int': partial(st.column_config.NumberColumn, format='%u.'),
'datetime64[D]': partial(st.column_config.DatetimeColumn, format='YYYY-MM-DD'),
'datetime64[s]': partial(st.column_config.DatetimeColumn, format='YYYY-MM-DD HH:mm:ss'),
}
def remove_optional(type_):
"""
>>> remove_optional(typing.Optional[str])
"""
match = re.search("Optional[(?P<t>[A-Z_a-z]+)]", repr(type_))
if match:
return match.groupdict()['t']
elif 'date' in repr(type_):
return 'datetime64[s]' # pandas does not support [D]
elif type_ == str:
return 'str'
else:
return (type_)
def np_from_base_model(bm: BaseModel):
col_types = {kk: remove_optional(ff.annotation) for kk, ff in bm.model_fields.items()}
# list(col_types.values())
return np.empty(0, dtype=[tuple(tt) for tt in col_types.items()])
def df_from_base_model(bm: BaseModel):
return pd.DataFrame(np_from_base_model(bm))
def st_from_base_model(bm: BaseModel):
col_types = {kk: TYPE_TO_STREAMLIT[remove_optional(ff.annotation)](kk.replace('_', ' ')) for kk,ff in bm.model_fields.items()}
df_ = df_from_base_model(bm)
return st.dataframe(df_, column_config=col_types)
def format_metadata(report_metadatas: List[MyModel]) -> pd.DataFrame:
bm = MyModel
col_types = {kk: remove_optional(ff.annotation) for kk, ff in bm.model_fields.items()}
df_ = [md.model_dump() for md in report_metadatas]
df_ = pd.DataFrame(df_)
df_ = df_.astype(col_types)
return df_
Actual code
st.session_state['df_reports'] = st_from_base_model(MyModel)
examples = [MyModel(**{"first":x, "second":chr(ord(x)+5) if ord(x)%2==0 else None}) for x in "hello world!"]
st.session_state['df_reports'] = st_from_base_model(MyModel)
for ex in examples:
df_ = format_metadata([ex])
st.session_state['df_reports'].add_rows(df_)
# time.sleep(5)
I am still getting:
Unsupported operation. The data passed into add_rows() must have the same data signature as the original data.
In this case, add_rows() received ["unicode","unicode","unicode","unicode","unicode","unicode","unicode","unicode","unicode","unicode","datetime","unicode"]
but was expecting ["empty","empty","empty","empty","empty","empty","empty","empty","empty","empty","datetime","empty"].
or
elementType 'alert' is not a valid arrowAddRows target!
depending on the input data.
I am struggling to see how data is represented in streamlit and where I can see current types of the data (the code seems to be quite obscure).
2