I get the data from the sql server to perform regression analysis, and then the regression results i return back to another sql table.
library("RODBC")
library(sqldf)
dbHandle <- odbcDriverConnect("driver={SQL Server};server=MYSERVER;database=MYBASE;trusted_connection=true")
sql <-
"select
Dt
,CustomerName
,ItemRelation
,SaleCount
,DocumentNum
,DocumentYear
,IsPromo
from dbo.mytable"
df <- sqlQuery(dbHandle, sql)
After this query i must perform regression analysis separately for groups
my_lm <- function(df) {
lm(SaleCount~IsPromo, data = df)
}
reg=df %>%
group_by(CustomerName,ItemRelation,DocumentNum,DocumentYear) %>%
nest() %>%
mutate(fit = map(data, my_lm),
tidy = map(fit, tidy)) %>%
select(-fit, - data) %>%
unnest()
View(reg)
#save to sql table
sqlSave(dbHandle, as.data.frame(reg), "dbo.mytableforecast", verbose = TRUE) # use "append = TRUE" to add rows to an existing table
odbcClose(dbHandle)
The question:
The script works automatically, i.e. in the scheduler there is task that script in certain time was launched. For example, today was loaded 100 observations.
From 01.01.2017-10.04.2017
Script performed regression and returned data to sql table. Tomorrow will loaded new 100 observations.
11.04.2017-20.07.2017
I.E. when tomorrow the data will loaded and the script will start at 10 pm, it must work only with data from 11.04.2017-20.07.2017, and not from 01.01.2017-20.07.2017
the situation is complicated by the fact that after the regression the column Dt is dropped, so the solution given me here does not work Automatic transfer data from the sql to R because Dt is absent.
How can i set the condition for schedule select Dt ,CustomerName ,ItemRelation ,SaleCount ,DocumentNum ,DocumentYear ,IsPromo from dbo.mytable "where Dt>the last date when the script was launched"
is it possible to create this expression?
data example from sql
df=structure(list(Dt = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L,
15L, 15L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L,
18L, 19L), .Label = c("2017-10-12 00:00:00.000", "2017-10-13 00:00:00.000",
"2017-10-14 00:00:00.000", "2017-10-15 00:00:00.000", "2017-10-16 00:00:00.000",
"2017-10-17 00:00:00.000", "2017-10-18 00:00:00.000", "2017-10-19 00:00:00.000",
"2017-10-20 00:00:00.000", "2017-10-21 00:00:00.000", "2017-10-22 00:00:00.000",
"2017-10-23 00:00:00.000", "2017-10-24 00:00:00.000", "2017-10-25 00:00:00.000",
"2017-10-26 00:00:00.000", "2017-10-27 00:00:00.000", "2017-10-28 00:00:00.000",
"2017-10-29 00:00:00.000", "2017-10-30 00:00:00.000"), class = "factor"),
CustomerName = structure(c(1L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L), .Label = c("x1", "x10", "x11", "x12", "x13", "x14",
"x15", "x16", "x17", "x18", "x2", "x3", "x4", "x5", "x6",
"x7", "x8", "x9"), class = "factor"), ItemRelation = c(13322L,
13322L, 13322L, 13322L, 13322L, 13322L, 13322L, 11706L, 13322L,
11706L, 13322L, 11706L, 13322L, 11706L, 13322L, 11706L, 13322L,
11706L, 13322L, 11706L, 13322L, 11706L, 13322L, 11706L, 13163L,
13322L, 158010L, 11706L, 13163L, 13322L, 158010L, 11706L,
13163L, 13322L, 158010L, 11706L), SaleCount = c(10L, 3L,
1L, 0L, 9L, 5L, 5L, 11L, 7L, 0L, 5L, 11L, 1L, 0L, 0L, 19L,
10L, 0L, 1L, 12L, 1L, 11L, 6L, 0L, 167L, 7L, 0L, 16L, 165L,
1L, 0L, 0L, 29L, 0L, 0L, 11L), DocumentNum = c(36L, 36L,
36L, 36L, 36L, 36L, 36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L,
36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L, 131L, 36L,
89L, 51L, 131L, 36L, 89L, 51L, 131L, 36L, 89L, 51L), DocumentYear = c(2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L),
IsPromo = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Dt", "CustomerName",
"ItemRelation", "SaleCount", "DocumentNum", "DocumentYear", "IsPromo"
), class = "data.frame", row.names = c(NA, -36L))