0

i tried various techniques such as oversampling, undersampling, ROSE and both(oversampling and undersampling) on a imbalanced dataset to balance a dataset. when i applied all these techniques on a small dataset then these techniques perfectly work

library(ROSE)

> table(df4$Price)

     0      1 
100020   1249 
> data.rose <- ROSE(Price~., data=df4, seed=3, N = 200040)$data
>  table(data.rose$Price)

     0      1 
 99960 100080 
# Oversampling
> data.balanced.over <- ovun.sample(Price ~ ., data = df4, method = "over",N = 200040)$data
 table(data.balanced.over$Price)

     0      1 
100020 100020 
> data.balanced.both <- ovun.sample(Price ~ ., data = df4, method = "both",N = 200040)$data
> table(data.balanced.both$Price)

     0      1 
 99903 100137 
> data.balanced.under <- ovun.sample(Price ~ ., data = df4, method = "under",N = 2538)$data
> table(data.balanced.under$Price)

   0    1 
1289 1249 

but i when i applied all these techniques on a large dataset then i got error

> table(mydata$Price)

       0        1 
33003944  1150753 
> data.rose <- ROSE(Price~., data=mydata, seed=3)$data
Error in omnibus.balancing(formula, data, subset, na.action, N, p, method = "rose",  : 
  Too few observations.
> data.balanced.over <- ovun.sample(Price ~ ., data = mydata, method = "over",N = 66007888)$data
Error in (function (formula, data, method, subset, na.action, N, p = 0.5,  : 
  Too few observations.
>  data.balanced.under <- ovun.sample(Price ~ ., data = mydata, method = "under",N = 2301506)$data
Error in (function (formula, data, method, subset, na.action, N, p = 0.5,  : 
  Too few observations.
agenis
  • 8,069
  • 5
  • 53
  • 102
maira khan
  • 43
  • 1
  • 8
  • 1
    can you provide a reproducible example? – agenis Jul 16 '18 at 13:38
  • data_frame <- fread(file.choose(),sep=",",header=FALSE,stringsAsFactors=FALSE,select=c(1,2,3,4),colClasses=c("as.numeric","as.Date","as.numeric","as.character")) data_frame2 <- fread(file.choose(),sep=",",header=FALSE,stringsAsFactors=FALSE,select=c(1,2,3,4,5),colClasses=c("as.numeric","as.Date","as.numeric","as.numeric","as.numeric")) mydata<-rbind(df1, df2) table(mydata$Price) 0 1 33003944 1150753 – maira khan Jul 16 '18 at 13:45
  • > data.rose <- ROSE(Price~., data=mydata, seed=3)$data Error in omnibus.balancing(formula, data, subset, na.action, N, p, method = "rose", : Too few observations. – maira khan Jul 16 '18 at 13:46

0 Answers0