Solved – model selection through shrinkage (Lasso) using glmnet

cox-modellassorsurvival

I would like to use model selection through shrinkage (Lasso) using glmnet. After trying the example of the glmnet manual and tried the procedure with my data. Unfortunately, I encountered an error which I cannot explain or solve. Using the colon data from survival I get a similar error.
Using my data I get: coxnet(x, is.sparse, ix, jx, y, weights, offset, alpha, nobs, : negative event times encountered; not permitted for Cox family.

The following example throws a similar error message:

library(glmnet)
library(survival)
library(Hmisc)
data(colon)
d <- colon

x <- as.matrix(subset(d, select=Cs(rx, sex, age, obstruct, perfor, adhere, surg)))
str(x)
y <- Surv(d$time, 1-(d$etype-1)) # y=Surv(ty,1-tcens) with library(survival)

fit=glmnet(x,y,family="cox")

Error:

Error in coxnet(x, is.sparse, ix, jx, y, weights, offset, alpha, nobs,  : 
  NA/NaN/Inf in foreign function call (arg 4)
In addition: Warning message:
In coxnet(x, is.sparse, ix, jx, y, weights, offset, alpha, nobs,  :
  NAs introduced by coercion

Does anyone can solve this problem?

UPDATE #1

> library(glmnet)
> library(survival)
> library(Hmisc)
> data(colon)
> d <- colon
> x <- model.matrix( ~ rx + sex + age + obstruct + perfor + adhere + surg - 1, d)
> y <- Surv(d$time, 1-(d$status-1))
> fit=glmnet(x,y,family="cox", alpha=1)
> plot(fit, main="plot of fit1")
> coef(fit)
9 x 44 sparse Matrix of class "dgCMatrix"
   [[ suppressing 44 column names ‘s0’, ‘s1’, ‘s2’ ... ]]

rxObs     . .             .            0.009915912  0.023053342  0.034964240  0.045762622  0.055557231  0.064445296  0.072513914
rxLev     . .             .            .            .            .            .            .            .            .          
rxLev+5FU . .             .            .            .            .            .            .            .            .          
sex       . .             0.006558721  0.018477686  0.029513135  0.039581739  0.048766586  0.057144685  0.064786380  0.071755936
age       . 0.0006206431  0.001171455  0.001694417  0.002184036  0.002631495  0.003040261  0.003413601  0.003754518  0.004065773
obstruct  . .             .            .            .            .            .            .            .            .          
perfor    . .             .            .            .            .            .            .            .            .          
adhere    . .             .            .            .            .            .            .            .            .          
surg      . .            -0.009659565 -0.022652281 -0.034581510 -0.045509875 -0.055517191 -0.064677565 -0.073059662 -0.080727029

rxObs      0.079841229  0.086497467  0.092545819  0.097998174  0.102769608  0.10711355  0.111067400  0.114601922  0.117808517
rxLev      .            .            .            .            .            .           .            .            .          
rxLev+5FU  .            .            .            .            .            .           .            .            .          
sex        0.078112060  0.083908378  0.089193878  0.094056781  0.098673426  0.10288297  0.106720490  0.110116168  0.113189834
age        0.004349898  0.004609218  0.004845865  0.005068016  0.005296893  0.00550584  0.005696437  0.005864769  0.006017049
obstruct   .            .            .            0.001442824  0.008298598  0.01453123  0.020197875  0.025432731  0.030215863
perfor     .            .            .            .            .            .           .            .            .          
adhere     .            .            .            .            .            .           .           -0.003663309 -0.007737726
surg      -0.087738419 -0.094148089 -0.100006102 -0.105419225 -0.110596442 -0.11532634 -0.119646795 -0.123523774 -0.127052591

rxObs      0.120730042  0.123410419  0.125870776  0.12811353  0.130157406  0.132020009  0.133717399  0.135264212  0.1365777330
rxLev      .            .            .            .           .            .            .            .           -0.0001863275
rxLev+5FU  .            .            .            .           .            .            .            .            .           
sex        0.115992712  0.118519024  0.120792245  0.12286470  0.124754048  0.126476381  0.128046400  0.129477520  0.1307089994
age        0.006155944  0.006282935  0.006399103  0.00650509  0.006601763  0.006689933  0.006770341  0.006843663  0.0069037396
obstruct   0.034569672  0.038295999  0.041447769  0.04431763  0.046930830  0.049310466  0.051477538  0.053451135  0.0553185179
perfor     .            0.004820480  0.013675046  0.02169004  0.028947473  0.035522843  0.041483425  0.046889294  0.0508071565
adhere    -0.011469722 -0.015236282 -0.019007632 -0.02245421 -0.025602816 -0.028478551 -0.031104509 -0.033501926 -0.0356035714
surg      -0.130275685 -0.133238609 -0.135960039 -0.13844331 -0.140708949 -0.142775799 -0.144661098 -0.146380628 -0.1479475591

rxObs      0.137770390  0.138589879  0.139275735  0.139887337  0.140441437  0.140945486  0.141404496  0.141822612  0.142203511
rxLev     -0.001003592 -0.001885083 -0.002717013 -0.003482434 -0.004182189 -0.004820804 -0.005403324 -0.005934573 -0.006419014
rxLev+5FU  .            .            .            .            .            .            .            .            .          
sex        0.131954461  0.133092595  0.134128182  0.135071981  0.135932216  0.136716258  0.137430837  0.138082090  0.138675616
age        0.006963663  0.007018196  0.007067617  0.007112630  0.007153660  0.007191063  0.007225159  0.007256238  0.007284567
obstruct   0.057073555  0.058654492  0.060097277  0.061413022  0.062612077  0.063704591  0.064699988  0.065606895  0.066433182
perfor     0.055302288  0.059395461  0.063107031  0.066478234  0.069542020  0.072327324  0.074860040  0.077163516  0.079258855
adhere    -0.037515425 -0.039260690 -0.040854114 -0.042307980 -0.043634402 -0.044844439 -0.045948193 -0.046954902 -0.047873019
surg      -0.149447215 -0.150824323 -0.152083490 -0.153232349 -0.154279961 -0.155235073 -0.156105782 -0.156899504 -0.157623018

rxObs      0.142550516  0.142866649  0.143154662  0.143417057  0.143656117  0.143873918  0.144072353
rxLev     -0.006860738 -0.007263484 -0.007630671 -0.007965421 -0.008270585 -0.008548764 -0.008802336
rxLev+5FU  .            .            .            .            .            .            .          
sex        0.139216523  0.139709467  0.140158693  0.140568073  0.140941137  0.141281101  0.141590899
age        0.007310388  0.007333923  0.007355373  0.007374923  0.007392740  0.007408978  0.007423776
obstruct   0.067186021  0.067871943  0.068496899  0.069066312  0.069585120  0.070057822  0.070488517
perfor     0.081165153  0.082899705  0.084478181  0.085914789  0.087222412  0.088412745  0.089496399
adhere    -0.048710272 -0.049473726 -0.050169841 -0.050804517 -0.051383143 -0.051910644 -0.052391513
surg      -0.158282508 -0.158883619 -0.159431504 -0.159930860 -0.160385975 -0.160800759 -0.161178778
> fit2 <- cv.glmnet(x, y, nfolds=10, alpha=1, family='cox')
> print(fit2)
$lambda
 [1] 0.0371729307 0.0338705900 0.0308616201 0.0281199589 0.0256218592 0.0233456839 0.0212717177 0.0193819969 0.0176601537 0.0160912743
[11] 0.0146617699 0.0133592586 0.0121724589 0.0110910912 0.0101057892 0.0092080188 0.0083900038 0.0076446590 0.0069655285 0.0063467302
[21] 0.0057829042 0.0052691669 0.0048010686 0.0043745549 0.0039859315 0.0036318323 0.0033091904 0.0030152110 0.0027473480 0.0025032812
[31] 0.0022808966 0.0020782680 0.0018936404 0.0017254146 0.0015721335 0.0014324695 0.0013052129 0.0011892614 0.0010836107 0.0009873457
[41] 0.0008996326 0.0008197117 0.0007468908 0.0006805391

$cvm
 [1] 13.68406 13.68443 13.68454 13.68426 13.68327 13.68182 13.68043 13.67914 13.67810 13.67741 13.67687 13.67644 13.67622 13.67613
[15] 13.67609 13.67607 13.67608 13.67612 13.67620 13.67629 13.67635 13.67643 13.67651 13.67661 13.67668 13.67676 13.67683 13.67691
[29] 13.67695 13.67702 13.67707 13.67713 13.67717 13.67721 13.67724 13.67728 13.67730 13.67733 13.67735 13.67737 13.67739 13.67740
[43] 13.67742 13.67744

$cvsd
 [1] 0.01337609 0.01347114 0.01355429 0.01357230 0.01348802 0.01335352 0.01317168 0.01294416 0.01275761 0.01254795 0.01237861
[12] 0.01224355 0.01215895 0.01210839 0.01207702 0.01205933 0.01205210 0.01205665 0.01207058 0.01207081 0.01208059 0.01209502
[23] 0.01211153 0.01212944 0.01215048 0.01217188 0.01219305 0.01221303 0.01223293 0.01225301 0.01227166 0.01228760 0.01229744
[34] 0.01230827 0.01231860 0.01232846 0.01233857 0.01234955 0.01235965 0.01236900 0.01237765 0.01238566 0.01239305 0.01239986

$cvup
 [1] 13.69744 13.69790 13.69809 13.69784 13.69676 13.69517 13.69361 13.69209 13.69086 13.68996 13.68924 13.68868 13.68838 13.68824
[15] 13.68816 13.68812 13.68813 13.68818 13.68827 13.68836 13.68843 13.68852 13.68863 13.68874 13.68883 13.68893 13.68903 13.68912
[29] 13.68918 13.68927 13.68935 13.68942 13.68947 13.68952 13.68956 13.68960 13.68964 13.68968 13.68971 13.68974 13.68976 13.68979
[43] 13.68981 13.68984

$cvlo
 [1] 13.67068 13.67096 13.67098 13.67069 13.66978 13.66846 13.66726 13.66620 13.66534 13.66486 13.66449 13.66419 13.66407 13.66402
[15] 13.66401 13.66401 13.66403 13.66407 13.66413 13.66422 13.66427 13.66433 13.66440 13.66448 13.66453 13.66459 13.66464 13.66469
[29] 13.66472 13.66476 13.66480 13.66485 13.66488 13.66490 13.66492 13.66495 13.66497 13.66498 13.66499 13.66500 13.66501 13.66502
[43] 13.66503 13.66504

$nzero
 s0  s1  s2  s3  s4  s5  s6  s7  s8  s9 s10 s11 s12 s13 s14 s15 s16 s17 s18 s19 s20 s21 s22 s23 s24 s25 s26 s27 s28 s29 s30 s31 s32 
  0   1   3   4   4   4   4   4   4   4   4   4   4   5   5   5   5   6   6   6   7   7   7   7   7   7   7   8   8   8   8   8   8 
s33 s34 s35 s36 s37 s38 s39 s40 s41 s42 s43 
  8   8   8   8   8   8   8   8   8   8   8 

$name
                     deviance 
"Partial Likelihood Deviance" 

$glmnet.fit

Call:  glmnet(x = x, y = y, alpha = 1, family = "cox") 

      Df      %Dev    Lambda
 [1,]  0 0.0000000 0.0371700
 [2,]  1 0.0001045 0.0338700
 [3,]  3 0.0002842 0.0308600
 [4,]  4 0.0005483 0.0281200
 [5,]  4 0.0007920 0.0256200
 [6,]  4 0.0009946 0.0233500
 [7,]  4 0.0011630 0.0212700
 [8,]  4 0.0013030 0.0193800
 [9,]  4 0.0014190 0.0176600
[10,]  4 0.0015150 0.0160900
[11,]  4 0.0015960 0.0146600
[12,]  4 0.0016620 0.0133600
[13,]  4 0.0017170 0.0121700
[14,]  5 0.0017660 0.0110900
[15,]  5 0.0018180 0.0101100
[16,]  5 0.0018610 0.0092080
[17,]  5 0.0018970 0.0083900
[18,]  6 0.0019300 0.0076450
[19,]  6 0.0019580 0.0069660
[20,]  6 0.0019820 0.0063470
[21,]  7 0.0020030 0.0057830
[22,]  7 0.0020230 0.0052690
[23,]  7 0.0020390 0.0048010
[24,]  7 0.0020530 0.0043750
[25,]  7 0.0020640 0.0039860
[26,]  7 0.0020730 0.0036320
[27,]  7 0.0020810 0.0033090
[28,]  8 0.0020870 0.0030150
[29,]  8 0.0020930 0.0027470
[30,]  8 0.0020980 0.0025030
[31,]  8 0.0021020 0.0022810
[32,]  8 0.0021050 0.0020780
[33,]  8 0.0021080 0.0018940
[34,]  8 0.0021100 0.0017250
[35,]  8 0.0021120 0.0015720
[36,]  8 0.0021140 0.0014320
[37,]  8 0.0021150 0.0013050
[38,]  8 0.0021160 0.0011890
[39,]  8 0.0021170 0.0010840
[40,]  8 0.0021170 0.0009873
[41,]  8 0.0021180 0.0008996
[42,]  8 0.0021190 0.0008197
[43,]  8 0.0021190 0.0007469
[44,]  8 0.0021190 0.0006805

$lambda.min
[1] 0.009208019

$lambda.1se
[1] 0.03717293

attr(,"class")
[1] "cv.glmnet"
> plot(fit2, main="plot of fit2")

Now does it work!

plot1

plot2

Best Answer

Try replacing

x <- as.matrix(subset(d, select=Cs(rx, sex, age, obstruct, perfor, adhere, surg)))

with

x <- model.matrix( ~ rx + sex + age + obstruct + perfor + adhere + surg - 1, d)

The latter will convert the convert the factor rx to numeric dummy variables in the matrix x.