I think what you want is the fill_nan
method. There is currently a request to expand the fill_nan
method to have the same handy options as the fill_null
method (i.e., 'forward', 'backward', 'mean', etc..).
But we can get the same results with a slight workaround. Let's suppose our data looks like this:
df = pl.DataFrame(
{
"group": (["a"] * 3) + (["b"] * 4),
"obs": [1, 2, 3, 1, 2, 3, 4],
"val": [1.0, np.NaN, 3, 4, np.NaN, np.NaN, 7],
}
)
df
shape: (7, 3)
┌───────┬─────┬─────┐
│ group ┆ obs ┆ val │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ f64 │
╞═══════╪═════╪═════╡
│ a ┆ 1 ┆ 1.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ a ┆ 2 ┆ NaN │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ a ┆ 3 ┆ 3.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b ┆ 1 ┆ 4.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b ┆ 2 ┆ NaN │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b ┆ 3 ┆ NaN │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b ┆ 4 ┆ 7.0 │
└───────┴─────┴─────┘
As long as your column does not have null
values, we can convert the NaN
values to null
and then use the fill_null
expression.
(
df
.with_column(pl.col("val").fill_nan(None).keep_name())
.with_columns(
[
pl.col("val").fill_null("forward").over("group").alias("fill_forward"),
pl.col("val").fill_null("backward").over("group").alias("fill_back"),
pl.col("val").fill_null("mean").over("group").alias("fill_mean"),
pl.col("val").fill_null("zero").over("group").alias("fill_zero"),
pl.col("val").fill_null("one").over("group").alias("fill_one"),
]
)
)
shape: (7, 8)
┌───────┬─────┬──────┬──────────────┬───────────┬───────────┬───────────┬──────────┐
│ group ┆ obs ┆ val ┆ fill_forward ┆ fill_back ┆ fill_mean ┆ fill_zero ┆ fill_one │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
╞═══════╪═════╪══════╪══════════════╪═══════════╪═══════════╪═══════════╪══════════╡
│ a ┆ 1 ┆ 1.0 ┆ 1.0 ┆ 1.0 ┆ 1.0 ┆ 1.0 ┆ 1.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ a ┆ 2 ┆ null ┆ 1.0 ┆ 3.0 ┆ 2.0 ┆ 0.0 ┆ 1.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ a ┆ 3 ┆ 3.0 ┆ 3.0 ┆ 3.0 ┆ 3.0 ┆ 3.0 ┆ 3.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ 1 ┆ 4.0 ┆ 4.0 ┆ 4.0 ┆ 4.0 ┆ 4.0 ┆ 4.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ 2 ┆ null ┆ 4.0 ┆ 7.0 ┆ 5.5 ┆ 0.0 ┆ 1.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ 3 ┆ null ┆ 4.0 ┆ 7.0 ┆ 5.5 ┆ 0.0 ┆ 1.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ 4 ┆ 7.0 ┆ 7.0 ┆ 7.0 ┆ 7.0 ┆ 7.0 ┆ 7.0 │
You can also use other methods with null
values as well, such as interpolate
.
(df
.with_column(pl.col("val").fill_nan(None).keep_name())
.with_column(pl.col("val").interpolate().over("group").alias("inter"))
)
shape: (7, 4)
┌───────┬─────┬──────┬───────┐
│ group ┆ obs ┆ val ┆ inter │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ f64 ┆ f64 │
╞═══════╪═════╪══════╪═══════╡
│ a ┆ 1 ┆ 1.0 ┆ 1.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ a ┆ 2 ┆ null ┆ 2.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ a ┆ 3 ┆ 3.0 ┆ 3.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ b ┆ 1 ┆ 4.0 ┆ 4.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ b ┆ 2 ┆ null ┆ 5.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ b ┆ 3 ┆ null ┆ 6.0 │
├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ b ┆ 4 ┆ 7.0 ┆ 7.0 │
└───────┴─────┴──────┴───────┘