3

I am trying to fit a mixture of a gaussian and a uniform distribution in R with only limited success.

I rely on flexmix to provide an EM framework and I fit the components using slightly adapted versions of the matching moment estimators from fitdistrplus package. (The adaptation is only a replacement of the sample mean and variance with weighted versions.)

As I am not terribly familiar with flexmix the code (below) is quite a hack.

It looks as if the uniform distribution gets too little weight.

Any help with how to improve the fit is highly appreciated.

First the example

Generate toy data

a <- rnorm(10000)
b <- runif(10000, min=1, max=5)
c <- c(a,b) + 10
testdata <- data.frame(c=c)

sample data

The ideal fit

comp1 <- 0.5*dnorm(testdata$c, mean=10)
comp2 <- 0.5*dunif(testdata$c, min=11, max=15)

ideal fit

The fit

fit <- flexmix(c~1, data=testdata, k=2,
               model=FLXMCnormplusunifw(),
               cluster=as.integer(testdata$c < 12)+1)

fit

The functions

## adapted from the 'fitdistrplus' package
fitnorm_w <- function(data, weights) {
  library("Hmisc")
  n <- length(data)
  m <- weighted.mean(data, weights)
  v <- wtd.var(data, weights, normwt = TRUE)

  list(mean=m, sd=sqrt(v))
}
## adapted from the 'fitdistrplus' package
fitunif_w <- function(data, weights) {
  library("Hmisc")
  n <- length(data)
  m <- weighted.mean(data, weights)
  v <- wtd.var(data, weights, normwt = TRUE)

  min1 <- m - sqrt(3*v)
  max1 <- m + sqrt(3*v)

  list(min=min1,
       max=max1)
}

## generate needed S4 class for flexmix -- "norm"
FLXMCnorm1w <- function (formula = . ~ .)
{
  z <- new("FLXMC", weighted = TRUE, formula = formula, dist = "norm",
           name = "model-based univariate norm clustering")
  z@defineComponent <- expression({
    logLik <- function(x, y) dnorm(y, mean = mean, sd = sd,
                                    log = TRUE)
    predict <- function(x, ...) matrix(mean, nrow = nrow(x),
                                       ncol = 1, byrow = TRUE)
    new("FLXcomponent",
        parameters = list(mean = as.vector(mean), sd = as.vector(sd)),
        df = df, logLik = logLik, predict = predict)
  })
  z@fit <- function(x, y, w, ...) {
    ##browser()
    para <- fitnorm_w(as.vector(y), as.vector(w))
    para$df <- 2
    with(para, eval(z@defineComponent))
  }
  z
}

## generate needed S4 class for flexmix -- "unif"
FLXMCunif1w <- function (formula = . ~ .)
{
  z <- new("FLXMC", weighted = TRUE, formula = formula, dist = "unif",
           name = "model-based univariate uniform clustering")
  z@defineComponent <- expression({
    logLik <- function(x, y) dunif(y, min = min, max = max,
                                   log = TRUE)
    predict <- function(x, ...) matrix(runif(nrow(x), min=min, max=max),
                                       nrow = nrow(x), ncol = 1, byrow = TRUE)
    new("FLXcomponent",
        parameters = list(min = as.vector(min), max = as.vector(max)),
        df = df, logLik = logLik, predict = predict)
  })
  z@fit <- function(x, y, w, ...) {
    ##browser()
    para <- fitunif_w(as.vector(y), as.vector(w))
    para$df <- 2
    with(para, eval(z@defineComponent))
  }
  z
}


## ---------------------------------- ##
## the norm+uniform flexmix class     ##
## ---------------------------------- ##

setClass("FLXMCnormplusunifw", contains = "FLXMC")
##
FLXMCnormplusunifw <- function(formula = . ~ ., ...)
{
  new("FLXMCnormplusunifw", FLXMCnorm1w(formula, ...),
      name = paste("FLXMCnormplusunifw" , sep=":"))
}
##
setMethod("FLXremoveComponent", signature(model = "FLXMCnormplusunifw"),
          function(model , nok, ...)
          {
            if (1 %in% nok) as(model , "FLXMC") else model
          })
##
setMethod("FLXmstep", signature(model = "FLXMCnormplusunifw"),
          function(model , weights , ...)
          {
            ##browser()
            comp.1 <- FLXMCunif1w()
            c(list(comp.1@fit(model@x, model@y, weights[, -1,drop=FALSE])),
              FLXmstep(as(model , "FLXMC"), weights[, 1, drop=FALSE]))
          })
Andreas
  • 1,106
  • 9
  • 26

0 Answers0