r: 2 subset function, 2nd works but 1st don't

Question

I am trying to write my own function. There are 2 subset functions used to delete part of the data. The 2nd subset function works, but 1st one fails. I checked the class and level of the field that is used to subset the data, and everything seems right. The code is shown below:

mdl.sum<-function(seed=0, times=0, method=c("Boostrap", "cv"), alpha=0.05, data, Prev.vars=NULL, varia, depvar, Expo=NULL, WP=NULL, weight, offset=NULL, p=1.6, outfile){

  mdlout<-data.frame(var.lev=character(0), Estimate = numeric(0),pvalue=numeric(0), AIC=numeric(0), converged=numeric(0), Varorder=numeric(0), CVGroup = numeric(0) )

  set.seed(seed)
  data$y<-data[, depvar]/data[, weight]
  loc_y<-dim(data)[2]

  index<-createResample(data$y, times, list=FALSE)

  foreach(j = 1:times) %dopar% {

    CV.Grp<-j
    train <- data[index[,j],]

    for (i in varia) {
      if (i %in% Prev.vars){next} else{
        if (length(Prev.vars)==0) {
          Mdl.form<-as.formula(paste(c(names(train)[loc_y], paste(c(paste(c("offset(", names(train)[offset], ")"), collapse=""), names(train)[i]), collapse="+")), collapse="~"))
        } else{
          Mdl.form<-as.formula(paste(c(names(train)[loc_y], paste(c(paste(c("offset(", names(train)[offset], ")"), collapse=""), names(train)[Prev.vars],names(train)[i]), collapse="+")), collapse="~"))
        }

        LR.glm<-glm(Mdl.form, family=tweedie(var.power=p, link.power=0),data=train, weights=train[, weight])
        temp.coef<-data.frame(coef(summary(LR.glm)))
        temp.coef<-cbind(var.lev=row.names(temp.coef), temp.coef)
        mdlout<-rbind(mdlout, cbind(temp.coef[,c(1:2,5)], AIC=AICtweedie(LR.glm), converged=(LR.glm$converged+0), Varorder=i, CVGroup = CV.Grp ))

      }
    }
  }

  mdlout2<-subset(mdlout, mdlout$converged==1)
  mdlout2<-subset(mdlout, mdlout$var.lev!="(Intercept)")

  write.csv(mdlout2, file=outfile, row.names = FALSE)

}

Then, run the code below:

library(lattice)
library(Matrix)
library(MASS)
library(RODBC)
library(statmod)
library(tweedie)
library(ggplot2)
library(gtools)
library(colorspace)
library(vcd)
library(cvTools)
library(car)
library(caret)
library(foreach)
library(xtable)

mdl.sum(seed=0, times=5, method="Boostrap", alpha=0.2, data=table.1.2,Prev.vars=2, varia=1:3,depvar=7, Expo=4, WP=8, weight=4, offset=10, p=1.7, outfile='D:/b1.csv')

Based on Jdbaba's suggestion, I dput(table.1.2) and it is shown as below.

> dput(table.1.2)
structure(list(premiekl = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor", comment = c("Name: Class", 
"Code: 1=Weight over 60kg and more than 2 gears", "Code: 2=Other"
)), moptva = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("1", "2"), class = "factor", comment = c("Name: Age", 
"Code: 1=At most 1 year", "Code: 2=2 years or more")), zon = structure(c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("1", 
"2", "3", "4", "5", "6", "7"), class = "factor", comment = c("Name: Zone", 
"Code: 1=Central and semi-central parts of Sweden's three largest cities", 
"Code: 2=suburbs and middle-sized towns", "Code: 3=Lesser towns, except those in 5 or 7", 
"Code: 4=Small towns and countryside, except 5--7", "Code: 5=Northern towns", 
"Code: 6=Northern countryside", "Code: 7=Gotland (Sweden's largest island)"
)), dur = structure(c(62.9, 112.9, 133.1, 376.6, 9.4, 70.8, 4.4, 
352.1, 840.1, 1378.3, 5505.3, 114.1, 810.9, 62.3, 191.6, 237.3, 
162.4, 446.5, 13.2, 82.8, 14.5, 844.8, 1296, 1214.9, 3740.7, 
109.4, 404.7, 66.3), comment = c("Name: Duration", "Unit: year"
)), medskad = structure(c(18256L, 13632L, 20877L, 13045L, 0L, 
15000L, 8018L, 8232L, 7418L, 7318L, 6922L, 11131L, 5970L, 6500L, 
7754L, 6933L, 4402L, 8214L, 0L, 5830L, 0L, 4728L, 4252L, 4212L, 
3846L, 3925L, 5280L, 7795L), comment = c("Name: Claim severity", 
"Unit: SEK")), antskad = structure(c(17L, 7L, 9L, 7L, 0L, 1L, 
1L, 52L, 69L, 75L, 136L, 2L, 14L, 1L, 43L, 34L, 11L, 8L, 0L, 
3L, 0L, 94L, 99L, 37L, 56L, 4L, 5L, 1L), comment = "Name: No. claims"), 
    riskpre = structure(c(4936L, 845L, 1411L, 242L, 0L, 212L, 
    1829L, 1216L, 609L, 398L, 171L, 195L, 103L, 104L, 1740L, 
    993L, 298L, 147L, 0L, 211L, 0L, 526L, 325L, 128L, 58L, 144L, 
    65L, 118L), comment = c("Name: Pure premium", "Unit: SEK"
    )), helpre = structure(c(2049L, 1230L, 762L, 396L, 990L, 
    594L, 396L, 1229L, 738L, 457L, 238L, 594L, 356L, 238L, 1024L, 
    615L, 381L, 198L, 495L, 297L, 198L, 614L, 369L, 229L, 119L, 
    297L, 178L, 119L), comment = c("Name: Actual premium", "Note: The premium for one year according to the tariff in force 1999", 
    "Unit: SEK")), skadfre = structure(c(0.27027027027027, 0.0620017714791851, 
    0.067618332081142, 0.0185873605947955, 0, 0.0141242937853107, 
    0.227272727272727, 0.1476853166714, 0.0821330793953101, 0.0544148588841326, 
    0.0247034675676167, 0.0175284837861525, 0.017264767542237, 
    0.0160513643659711, 0.224425887265136, 0.143278550358196, 
    0.0677339901477833, 0.0179171332586786, 0, 0.036231884057971, 
    0, 0.111268939393939, 0.0763888888888889, 0.0304551814964195, 
    0.0149704600743176, 0.036563071297989, 0.0123548307388189, 
    0.0150829562594268), comment = c("Name: Claim frequency", 
    "Unit: /year")), offset = structure(c(7.6251071482389, 7.11476944836646, 
    6.63594655568665, 5.98141421125448, 6.89770494312864, 6.38687931936265, 
    5.98141421125448, 7.11395610956603, 6.60394382460047, 6.12468339089421, 
    5.47227067367148, 6.38687931936265, 5.87493073085203, 5.47227067367148, 
    6.93147180559945, 6.42162226780652, 5.9427993751267, 5.28826703069454, 
    6.20455776256869, 5.6937321388027, 5.28826703069454, 6.41999492814714, 
    5.91079664404053, 5.43372200355424, 4.77912349311153, 5.6937321388027, 
    5.18178355029209, 4.77912349311153), comment = c("Name: Actual premium", 
    "Note: The premium for one year according to the tariff in force 1999", 
    "Unit: SEK"))), .Names = c("premiekl", "moptva", "zon", "dur", 
"medskad", "antskad", "riskpre", "helpre", "skadfre", "offset"
), row.names = c(NA, -28L), comment = c("Title: Partial casco moped insurance from Wasa insurance, 1994--1999", 
"Source: http://www2.math.su.se/~esbj/GLMbook/moppe.sas", "Copyright: http://www2.math.su.se/~esbj/GLMbook/"
), class = "data.frame")

Hi Jdbaba,I am a newbie. did you mean using dput() to upload table.1.2 to the website? if so, would you please give more detail how to do it? Thank you very much. Also to be clear, table.1.2 can be generated using the codes in the last segment of the codes above. Thanks again. — user2037892, Feb 22 '14 at 18:21
You should NOT be using the dataframe name with $ in the 'subset' argument to the `subset` function. It's also suggested in the help for `subset` that it not be used inside functions. Use `[[` instead. — IRTFM, Feb 22 '14 at 19:49

score 0 · Accepted Answer · answered Feb 22 '14 at 21:24

0

I think there is just a small typo in the code:

# mdlout2<-subset(mdlout, mdlout$var.lev!="(Intercept)") # Should be:
mdlout2<-subset(mdlout2, mdlout2$var.lev!="(Intercept)")

But, as @IShouldBuyABoat suggests, it would be safer to subset using [

mdlout2<-mdlout[mdlout$var.lev!="(Intercept)") & mdlout$converged==1,]

answered Feb 22 '14 at 21:24

nograpes

18,623
1
44
67

If you like the answer, you can click the check box beside the answer. – nograpes Feb 22 '14 at 21:56

r: 2 subset function, 2nd works but 1st don't

1 Answers1