I have this data frame
AutoStat_1 AutoStat_2 Mode_1 Mode_2 Setpoint_1 Setpoint_2
0 -> 0 0 1 1 23 24
1 -> 0 1 1 0 23 27
2 -> 1 1 3 0 26 27
3 -> 1 0 3 1 26 24
4 -> 0 0 1 2 24 24
5 -> 0 0 1 2 24 24
6 -> 2 3 0 4 24 26
7 -> 2 3 0 4 25 26
The requirement is that if AutoStat_i
is not 0
then Mode_i
and Setpoint_i
will be the value of the above (in-front) of which AutoStat_i
is 0
The result should be (notice the column Setpoint_i
and Mode_i
are different than above)
AutoStat_1 AutoStat_2 Mode_1 Mode_2 Setpoint_1 Setpoint_2
0 -> 0 0 1 1 23 24
1 -> 0 1 1 1 23 24
2 -> 1 1 1 1 23 24
3 -> 1 0 1 1 23 24
4 -> 0 0 1 2 24 24
5 -> 0 0 1 2 24 24
6 -> 2 3 1 2 24 24
7 -> 2 3 1 2 24 24
What've I tried:
My idea is for each set i
of (AutoStat_i, Mode_i, Setpoint_i)
, scan each row if AutoStat_i
is <> 0
then set the other values to NaN
, after that I will just do the fillMissing
with Direction.Forward
. Below is the impementation
let calculateNonSFi (df:Frame<_,string>) idx =
let autoStatusName = sprintf "AutoStat_%d" idx
let setpointName = sprintf "Setpoint_%d" idx
let modeName = sprintf "Mode_%d" idx
let setMissingOnMode (s:ObjectSeries<string>) =
let s2 = s.As<float>()
if s2.[autoStatusName] <> 0. then
Series.replaceArray [|setpointName;modeName|] Double.NaN s2
else
s2
df.Rows
|> Series.mapValues setMissingOnMode
|> Frame.ofRows
|> Frame.fillMissing Direction.Forward
|> Frame.fillMissing Direction.Backward
// for each set i do the folding
[0..150]
|> List.fold calculateNonSFi df
It gave me the expected results, however, for 150sets of 8000rows, it took more than 30 minutes to complete. I kinda see where it's wrong as for every set it acts on the whole dataset but I cannot think of a better way.
The logic is quite simple. I believe there should be a better way, do advice, thanks.
UPDATE Here is the code for reproduction
open Deedle
open System
let df =
[
{| AutoStat_1=0;Setpoint_1=23;Mode_1=1;AutoStat_2=0;Setpoint_2=24;Mode_2=1|}
{| AutoStat_1=0;Setpoint_1=23;Mode_1=1;AutoStat_2=1;Setpoint_2=24;Mode_2=1|}
{| AutoStat_1=1;Setpoint_1=23;Mode_1=1;AutoStat_2=1;Setpoint_2=24;Mode_2=1|}
{| AutoStat_1=1;Setpoint_1=23;Mode_1=1;AutoStat_2=0;Setpoint_2=24;Mode_2=1|}
{| AutoStat_1=0;Setpoint_1=24;Mode_1=1;AutoStat_2=0;Setpoint_2=24;Mode_2=2|}
{| AutoStat_1=0;Setpoint_1=24;Mode_1=1;AutoStat_2=0;Setpoint_2=24;Mode_2=2|}
{| AutoStat_1=2;Setpoint_1=24;Mode_1=1;AutoStat_2=3;Setpoint_2=24;Mode_2=2|}
{| AutoStat_1=2;Setpoint_1=24;Mode_1=1;AutoStat_2=3;Setpoint_2=24;Mode_2=2|}
] |> Frame.ofRecords
df.Print()
let calculateNonSFi (df:Frame<_,string>) idx =
let autoStatusName = sprintf "AutoStat_%d" idx
let setpointName = sprintf "Setpoint_%d" idx
let modeName = sprintf "Mode_%d" idx
let setMissingOnMode (s:ObjectSeries<string>) =
let s2 = s.As<float>()
if s2.[autoStatusName] <> 0. then
Series.replaceArray [|setpointName;modeName|] Double.NaN s2
else
s2
df.Rows
|> Series.mapValues setMissingOnMode
|> Frame.ofRows
|> Frame.fillMissing Direction.Forward
let df1 =
[1..2]
|> List.fold calculateNonSFi df
df1.Print()
Advice/Answer from Tomas
df
|> Frame.mapRows (fun _ o ->
[ for i in 0 .. 150 do
let au = o.GetAs<float>("AutoStat_" + string i)
yield "AutoStat_" + string i, au
yield "Mode_" + string i, if au <> 0. then nan else o.GetAs("Mode_" + string i)
yield "Setpoint_" + string i, if au <> 0. then nan else o.GetAs("Setpoint_" + string i) ]
|> series )
|> Frame.ofRows
|> Frame.fillMissing Direction.Forward
which yields correct result but in different column order hence my mistake in the earlier edit
AutoStat_1 Mode_1 Setpoint_1 AutoStat_2 Mode_2 Setpoint_2
0 -> 0 1 23 0 1 24
1 -> 0 1 23 1 1 24
2 -> 1 1 23 1 1 24
3 -> 1 1 23 0 1 24
4 -> 0 1 24 0 2 24
5 -> 0 1 24 0 2 24
6 -> 2 1 24 3 2 24
7 -> 2 1 24 3 2 24