how to compare 2 dataframes and populate boolean value
check whether individual pattern from dict is present or whole string is present in second dataframe.
df1 = pl.DataFrame({
"A": ["a", "b", "a", "c","a","c","a","b","b"],
"B": [23,45,60,50,56,44,34,30,97],
"C": [{'pattern1':['a','score1'],'pattern2':['df','text1']},
{'pattern3':['abc','score1']},
{'pattern1':['a','score2'],'pattern4':['df','text2']},
{'pattern5':['a','score2'],'pattern6':['gh','text1'],'pattern7':['gh','text1']},
{'pattern8':['a','score1']},
{'pattern9':['a','score1'],'pattern10':['df','text1']},
{'pattern11':['a','score1']},
{'pattern12':['a','score1'],'pattern13':['df','text1']},
{'pattern3':['a','score1'],'pattern5':['df','text1']}]
})
shape: (9, 3)
┌─────┬─────┬─────────────────────────────────────┐
│ A ┆ B ┆ C │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ object │
╞═════╪═════╪═════════════════════════════════════╡
│ a ┆ 23 ┆ {'pattern1': ['a', 'score1'], 'p... │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ 45 ┆ {'pattern3': ['abc', 'score1']} │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ a ┆ 60 ┆ {'pattern1': ['a', 'score2'], 'p... │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ c ┆ 50 ┆ {'pattern5': ['a', 'score2'], 'p... │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ ... ┆ ... ┆ ... │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ a ┆ 56 ┆ {'pattern8': ['a', 'score1']} │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ c ┆ 44 ┆ {'pattern9': ['a', 'score1'], 'p... │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ a ┆ 34 ┆ {'pattern11': ['a', 'score1']} │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ 30 ┆ {'pattern12': ['a', 'score1'], '... │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ 97 ┆ {'pattern3': ['a', 'score1'], 'p... │
└─────┴─────┴─────────────────────────────────────┘
df2 = pl.DataFrame({
"A": ["a", "a", "b", "b","a","c"],
"valid_patterns": ['pattern1',
"{'pattern1': ['a', 'score2'], 'pattern4': ['df', 'text2']}",
"{'pattern3':['abc','score1']}",
'pattern3',
"{'pattern11': ['a', 'score1']}",
'pattern9']
})
┌─────┬─────────────────────────────────────┐
│ A ┆ valid_patterns │
│ --- ┆ --- │
│ str ┆ str │
╞═════╪═════════════════════════════════════╡
│ a ┆ pattern1 │
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ a ┆ {'pattern1': ['a', 'score2'], 'p... │
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ {'pattern3':['abc','score1']} │
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ pattern3 │
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ a ┆ {'pattern11': ['a', 'score1']} │
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ c ┆ pattern9 │
└─────┴─────────────────────────────────────┘
ouput:
Based on Column A , need to check column c values wither full dict or any one key in dict need to be present in second dataframe for column A.
shape: (9, 4)
┌─────┬─────┬─────────────────────────────────────┬───────┐
│ A ┆ B ┆ C ┆ VALID │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ object ┆ bool │
╞═════╪═════╪═════════════════════════════════════╪═══════╡
│ a ┆ 23 ┆ {'pattern1': ['a', 'score1'], 'p... ┆ true │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ b ┆ 45 ┆ {'pattern3': ['abc', 'score1']} ┆ true │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ a ┆ 60 ┆ {'pattern1': ['a', 'score2'], 'p... ┆ true │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ c ┆ 50 ┆ {'pattern5': ['a', 'score2'], 'p... ┆ false │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ ... ┆ ... ┆ ... ┆ ... │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ a ┆ 56 ┆ {'pattern8': ['a', 'score1']} ┆ false │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ c ┆ 44 ┆ {'pattern9': ['a', 'score1'], 'p... ┆ true │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ a ┆ 34 ┆ {'pattern11': ['a', 'score1']} ┆ true │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ b ┆ 30 ┆ {'pattern12': ['a', 'score1'], '... ┆ false │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ b ┆ 97 ┆ {'pattern3': ['a', 'score1'], 'p... ┆ false │
└─────┴─────┴─────────────────────────────────────┴───────┘