Let's expand your Auction DataFrame by two lines so that we can clarify something.
import polars as pl
auctiondata = {"ItemId": [15331, 15332, 15333, 15213, 15331, 15213] + [15213, 15213],
"AuctionId": [2084868458, 2085008809, 2087827052, 2087835700, 2087827999,
2087827997] + [-1, -2],
"RealmName": ['Gehennas', 'Gehennas', 'Mograine', 'Lakeshire', 'Gehennas',
'Bloodfang'] + ['Euripides', 'Euripides'],
"Stat0": ['+3 Stamina', '+2 Intelelct', '+3 Stamina', '+3 Agility', '+3 Stamina',
'+3 Stamina'] + ['+3 Stamina', '+3 Stamina'],
"Stat1": ['+3 Agility', '+3 Stamina', '+3 Intellect', '+3 Stamina', '+3 Agility',
'+3 Agility'] + ['+3 Agility', ''],
"Stat2": ['', '', '', '','',''] + ['+10 Strength', ''],
"Stat3": [''] * 8,
"Stat4": [''] * 8,
"buyoutgold": ['40', '', '', '', '120',''] + ['', '']}
trackingdata = {"ItemId": [15331, 15213, 15333],
"RealmName": ['Gehennas', '', 'Mograine'],
"Stat0": ['+3 Stamina', '+3 Stamina', '+3 Stamina'],
"Stat1": ['+3 Agility', '+3 Agility', '+3 Intellect'],
"Stat2": ['', '', ''],
"Stat3": ['', '', ''],
"Stat4": ['', '', ''],
"buyoutgold": ['50', '', '']}
resultdata = {"ItemId": [15331, 15333, 15213, 15213],
"AuctionId": [2084868458, 2087827052, 2087835700, 2087827997],
"RealmName": ['Gehennas', 'Mograine', 'Lakeshire', 'Bloodfang'],
"Stat0": ['+3 Stamina', '+3 Stamina', '+3 Agility', '+3 Stamina'],
"Stat1": ['+3 Agility', '+3 Intellect', '+3 Stamina', '+3 Agility'],
"Stat2": ['', '', '', ''],
"Stat3": ['', '', '', ''],
"Stat4": ['', '', '', ''],
"buyoutgold": ['40', '', '', '']}
auctiondf = pl.DataFrame(auctiondata)
trackingdf = pl.DataFrame(trackingdata)
resultdf = pl.DataFrame(resultdata)
print(auctiondf)
shape: (8, 9)
┌────────┬────────────┬───────────┬──────────────┬──────────────┬──────────────┬───────┬───────┬────────────┐
│ ItemId ┆ AuctionId ┆ RealmName ┆ Stat0 ┆ Stat1 ┆ Stat2 ┆ Stat3 ┆ Stat4 ┆ buyoutgold │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ str ┆ str ┆ str ┆ str ┆ str ┆ str │
╞════════╪════════════╪═══════════╪══════════════╪══════════════╪══════════════╪═══════╪═══════╪════════════╡
│ 15331 ┆ 2084868458 ┆ Gehennas ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ 40 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15332 ┆ 2085008809 ┆ Gehennas ┆ +2 Intelelct ┆ +3 Stamina ┆ ┆ ┆ ┆ │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15333 ┆ 2087827052 ┆ Mograine ┆ +3 Stamina ┆ +3 Intellect ┆ ┆ ┆ ┆ │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15213 ┆ 2087835700 ┆ Lakeshire ┆ +3 Agility ┆ +3 Stamina ┆ ┆ ┆ ┆ │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15331 ┆ 2087827999 ┆ Gehennas ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ 120 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15213 ┆ 2087827997 ┆ Bloodfang ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15213 ┆ -1 ┆ Euripides ┆ +3 Stamina ┆ +3 Agility ┆ +10 Strength ┆ ┆ ┆ │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15213 ┆ -2 ┆ Euripides ┆ +3 Stamina ┆ ┆ ┆ ┆ ┆ │
└────────┴────────────┴───────────┴──────────────┴──────────────┴──────────────┴───────┴───────┴────────────┘
Here's the algorithm:
result = (
auctiondf
.join(
trackingdf,
on="ItemId",
suffix="_tracking",
)
.filter((pl.col('RealmName_tracking') == "") | (pl.col('RealmName') == pl.col('RealmName_tracking')))
.with_columns(pl.col('^buyoutgold.*$').cast(pl.Float64, strict=False).keep_name())
.filter((pl.col('buyoutgold_tracking').is_null()) | (pl.col('buyoutgold') <= pl.col('buyoutgold_tracking')))
.with_columns(
auction_stats=pl.concat_list(pl.col(r'^Stat.*\d$')).list.eval(pl.element().filter(pl.element() != '')),
tracking_stats=pl.concat_list(pl.col(r'^Stat.*_tracking$')).list.eval(pl.element().filter(pl.element() != '')),
)
.filter(pl.col('tracking_stats').list.difference(pl.col('auction_stats')).list.lengths()==0)
.select(pl.exclude(r'^.*_tracking$'))
.select(pl.exclude(r'^.*_stats$'))
)
with pl.Config(set_tbl_cols=10):
print(result)
shape: (5, 9)
┌────────┬────────────┬───────────┬────────────┬──────────────┬──────────────┬───────┬───────┬────────────┐
│ ItemId ┆ AuctionId ┆ RealmName ┆ Stat0 ┆ Stat1 ┆ Stat2 ┆ Stat3 ┆ Stat4 ┆ buyoutgold │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ str ┆ str ┆ str ┆ str ┆ str ┆ f64 │
╞════════╪════════════╪═══════════╪════════════╪══════════════╪══════════════╪═══════╪═══════╪════════════╡
│ 15331 ┆ 2084868458 ┆ Gehennas ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ 40.0 │
│ 15333 ┆ 2087827052 ┆ Mograine ┆ +3 Stamina ┆ +3 Intellect ┆ ┆ ┆ ┆ null │
│ 15213 ┆ 2087835700 ┆ Lakeshire ┆ +3 Agility ┆ +3 Stamina ┆ ┆ ┆ ┆ null │
│ 15213 ┆ 2087827997 ┆ Bloodfang ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ null │
│ 15213 ┆ -1 ┆ Euripides ┆ +3 Stamina ┆ +3 Agility ┆ +10 Strength ┆ ┆ ┆ null │
└────────┴────────────┴───────────┴────────────┴──────────────┴──────────────┴───────┴───────┴────────────┘
Some notes
Let's take a look at the results just before we exclude the Tracking DF columns:
shape: (5, 16)
┌────────┬────────────┬───────────┬────────────┬──────────────┬──────────────┬───────┬───────┬────────────┬────────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬─────────────────────┐
│ ItemId ┆ AuctionId ┆ RealmName ┆ Stat0 ┆ Stat1 ┆ Stat2 ┆ Stat3 ┆ Stat4 ┆ buyoutgold ┆ RealmName_tracking ┆ Stat0_tracking ┆ Stat1_tracking ┆ Stat2_tracking ┆ Stat3_tracking ┆ Stat4_tracking ┆ buyoutgold_tracking │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ str ┆ str ┆ str ┆ str ┆ str ┆ f64 ┆ str ┆ str ┆ str ┆ str ┆ str ┆ str ┆ f64 │
╞════════╪════════════╪═══════════╪════════════╪══════════════╪══════════════╪═══════╪═══════╪════════════╪════════════════════╪════════════════╪════════════════╪════════════════╪════════════════╪════════════════╪═════════════════════╡
│ 15331 ┆ 2084868458 ┆ Gehennas ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ 40.0 ┆ Gehennas ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ 50.0 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15333 ┆ 2087827052 ┆ Mograine ┆ +3 Stamina ┆ +3 Intellect ┆ ┆ ┆ ┆ null ┆ Mograine ┆ +3 Stamina ┆ +3 Intellect ┆ ┆ ┆ ┆ null │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15213 ┆ 2087835700 ┆ Lakeshire ┆ +3 Agility ┆ +3 Stamina ┆ ┆ ┆ ┆ null ┆ ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ null │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15213 ┆ 2087827997 ┆ Bloodfang ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ null ┆ ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ null │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 15213 ┆ -1 ┆ Euripides ┆ +3 Stamina ┆ +3 Agility ┆ +10 Strength ┆ ┆ ┆ null ┆ ┆ +3 Stamina ┆ +3 Agility ┆ ┆ ┆ ┆ null │
└────────┴────────────┴───────────┴────────────┴──────────────┴──────────────┴───────┴───────┴────────────┴────────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴─────────────────────┘
Notice that the last line contains +10 Strength
. I wasn't sure what to do with auction id's that contain Stats that are not in the tracking DF. In this case, the Auction ID contained all the stats contained in the Tracking ID, plus one that wasn't in the Tracking ID: +10 Strength
. If that's not what you want, we can change the algorithm easily.
We cast both buyoutgold
columns to numbers so that we can make the comparison.
The Stats columns
For the Stats columns, we're mimicking set difference. Basically, we're asking "Is there any Stat in the Tracking DF that is not in the Auction DF?".
To accomplish this, we use .list.difference