Solved – Help requested with using custom model in caret() package

caretneural networks

The caret package (terrific btw) has a lot of models built in but if you want to use a model that is not built in, there is a way as described in outline here http://caret.r-forge.r-project.org/custom_models.html. Reproducing the example given there works just fine.

I'm attempting to do this for the grnn() general regression neural network model and have run into problems I can't fix. My reproducible code example is:

library(caret)
x <- rep(1:100); y <- x^2+x*rnorm(100,0,1); tr <- data.frame(y=y,x=x)
grnnFit <- function(dat, params) smooth(learn(dat), sigma=params$sigma) #train
    grnnPred <- function(mod, newx) guess(mod, as.matrix(newx)) #predict
    grnnSort <- function(x) x[order(x$sigma),] #sort results
#list of params/functions
lpgrnn <- list(library="grnn",
  type="Regression",
  parameters=data.frame(parameter="sigma", class="numeric", label="Sigma"),
  grid=data.frame(sigma=c(.1, .2, .3)), #only one tuning parameter sigma
  fit=grnnFit,
  predict=grnnPred,
  prob=NULL,
  sort=grnnSort)

set.seed(998)
fitControl <- trainControl(method="cv", number=10)
set.seed(825)
res <- train(y=tr[,-1], x=tr[,1], method=lpgrnn, metric="RMSE", trControl = fitControl)

The error message is:

res <- train(y=tr[,-1], x=tr[,1], method=lpgrnn, metric="RMSE", trControl = fitControl)
Error in train.default(y = tr[, -1], x = tr[, 1], method = lpgrnn, metric = "RMSE", :
attempt to apply non-function

getModelInfo("grnn") return an empty list

> getModelInfo("grnn")
named list()
> 

as opposed to other models, e.g. getModelInfo("nnet") returns

> getModelInfo("nnet")
$nnet
    $nnet$label
[1] "Neural Network"

$nnet$library
[1] "nnet"

$nnet$loop
NULL

$nnet$type
[1] "Classification" "Regression"    

$nnet$parameters
  parameter   class         label
1      size numeric #Hidden Units
2     decay numeric  Weight Decay

$nnet$grid
function (x, y, len = NULL) 
expand.grid(size = ((1:len) * 2) - 1, decay = c(0, 10^seq(-1, 
    -4, length = len - 1)))

$nnet$fit
function (x, y, wts, param, lev, last, classProbs, ...) 
{
    dat <- x
    dat$.outcome <- y
        if (!is.null(wts)) {
            out <- nnet(.outcome ~ ., data = dat, weights = wts, 
                size = param$size, decay = param$decay, ...)
        }
        else out <- nnet(.outcome ~ ., data = dat, size = param$size, 
        decay = param$decay, ...)
    out
}

$nnet$predict
function (modelFit, newdata, submodels = NULL) 
{
    if (modelFit$problemType == "Classification") {
        out <- predict(modelFit, newdata, type = "class")
    }
    else {
        out <- predict(modelFit, newdata, type = "raw")
    }
    out
}

$nnet$prob
function (modelFit, newdata, submodels = NULL) 
{
    out <- predict(modelFit, newdata)
    if (ncol(as.data.frame(out)) == 1) {
        out <- cbind(out, 1 - out)
        dimnames(out)[[2]] <- rev(modelFit$obsLevels)
    }
    out
}

$nnet$varImp
function (object, ...) 
{
    imp <- caret:::GarsonWeights(object, ...)
    if (ncol(imp) > 1) {
        imp <- cbind(apply(imp, 1, mean), imp)
        colnames(imp)[1] <- "Overall"
    }
    else {
        imp <- as.data.frame(imp)
        names(imp) <- "Overall"
    }
    if (!is.null(object$xNames)) 
            rownames(imp) <- object$xNames
    imp
}

$nnet$predictors
function (x, ...) 
if (hasTerms(x)) predictors(x$terms) else NA

$nnet$tags
[1] "Neural Network"    "L2 Regularization"

$nnet$levels
function (x) 
x$lev

$nnet$sort
function (x) 
x[order(x$size, -x$decay), ]

Best Answer

getModelInfo shows you the code for built-in models. grnn is not wrapped by this package, so you won't find code there.

There are a lot of avoidable problems. First, you have your data mixed up:

x <- rep(1:100); y <- x^2+x*rnorm(100,0,1); tr <- data.frame(y=y,x=x)

tr[,-1] is x so y=tr[,-1] is wrong.

For your code, there are a few things:

  • the grid module should be a function instead of a data frame. That is where the attempt to apply non-function comes from. However:
  • the arguments to the pred and fit modules do not include most of the required arguments listed on the help page.

For this particular package:

  • You might have to do something like this:

    grnnFit <- function(x, y, wts, param, lev, last, weights, classProbs, ...) {
                        dat <- x
                        dat$.outcome <- y
                            smooth(learn(dat, variable.column = ncol(dat)), 
                                   sigma = param$sigma)}
    
  • Also, for this package, you might have to use guess inside of apply.

My impression is that you should slow down and read the documentation (it really looks like you did not). There are some weird things about grnn (to me) and it has almost no documentation. That should be the hard part, so read the caret web page and get the easy parts down.

Max

** Update** As Max alluded to, grnn() guess() method can only compute a prediction for a single vector so this had to be wrapped in a for loop.

The new working code:

#Using caret() to determine the optimum value for grnn() smooth parameter    
grnnFit <- function(x, y, wts, param, lev, last, weights, classProbs, ...) {
  #use argument names EXACTLY as here in all functions
  library(grnn)
  dat <- data.frame(y, x)
  s <- smooth(learn(dat), sigma=param$sigma)
  return(s)
}

grnnPred <- function(modelFit, newdata, preProc=NULL, submodels=NULL) {
  library(grnn)
  library(foreach)
  xlst <- split(newdata, 1:nrow(newdata))
  pred <- foreach(i = xlst, .combine = rbind) %do% {
    #grnn() can only compute a prediction for one sample at a time
    guess(modelFit, as.matrix(i)) #provide x values as matrix
  }
}

grnnSort <- function(x) {
  x[order(x$sigma),]
  print(x)
}

grnnGrid <- function(x, y, len=NULL) {
  #only one tuning parameter sigma
  data.frame(sigma=seq(1,4,.05)) #search range
}

grnnLev <- function(x) {
  lev(x)
}

#list of params/functions
lpgrnn <- list(
  library="grnn",
  type="Regression",
  parameters=data.frame(parameter="sigma", class="numeric", label="Sigma"),
  grid=grnnGrid,
  fit=grnnFit,
  predict=grnnPred,
  prob=NULL,
  levels=grnnLev,
  sort=grnnSort)

library(caret)
set.seed(123)
x1 <- rep(1:100) + rnorm(100,0,1)
x2 <- rep(1:100) + rnorm(100,0,1)
tr <- data.frame(y=x1*x2, x1, x2)
set.seed(998)
fitControl <- trainControl(method="repeatedcv", repeats=5)
set.seed(825)
res <- train(y~., data=tr, method=lpgrnn, metric="RMSE", trControl = fitControl)
print(res)
print(res$finalModel$sigma)
plot(res)

sigma versus RMSE