R : Copyright 2002, The R Development Core Team Version 1.5.1 (2002-06-17) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type `license()' or `licence()' for distribution details. R is a collaborative project with many contributors. Type `contributors()' for more information. Type `demo()' for some demos, `help()' for on-line help, or `help.start()' for a HTML browser interface to help. Type `q()' to quit R. > invisible(options(echo = TRUE)) > library(nnet) Loading required package: MASS > > target.lrn <- read.table("../lrn/num/472.dat",header=T,colClasses="numeric") > target.val <- read.table("../val/num/472.dat",header=T,colClasses="numeric") > target.tst <- read.table("../tst/num/472.dat",header=T,colClasses="numeric") > > y.lrn <- target.lrn[,1] > y.val <- target.val[,1] > y.tst <- target.tst[,1] > y <- c(y.lrn,y.val,y.tst) > > n.lrn <- length(y.lrn) > n.val <- length(y.val) > n.tst <- length(y.tst) > n <- length(y) > > rm(target.lrn,target.val,target.tst) > > mod <- read.table("../cty_mod.txt", + header=F,colClasses="character",col.names=c("file","feature","type")) > > n.mod <- length(mod$file) > > first.time.1 <- TRUE > first.time.2 <- TRUE > > for (i in 1:n.mod) { + + fn.lrn <- paste("../lrn/",mod$type[i],"/",mod$file[i],".dat",sep="") + fn.val <- paste("../val/",mod$type[i],"/",mod$file[i],".dat",sep="") + fn.tst <- paste("../tst/",mod$type[i],"/",mod$file[i],".dat",sep="") + print(mod$feature[i]) + + if (mod$type[i]=="chr") { + + f.lrn <- read.table(fn.lrn, + header=T,colClasses="character",blank.lines.skip=F) + f.val <- read.table(fn.val, + header=T,colClasses="character",blank.lines.skip=F) + f.tst <- read.table(fn.tst, + header=T,colClasses="character",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1],f.tst[,1]) + + if (mod$feature[i]=="STATE") { + f[f=="AS"|f=="DC"|f=="DE"|f=="MA"|f=="ME"|f=="NH"] <- "S1" + f[f=="OH"|f=="RI"|f=="VI"|f=="WV"] <- "S1" + f[f=="AA"|f=="AE"|f=="AP"|f=="CT"|f=="GU"|f=="MD"] <- "S2" + f[f=="NJ"|f=="NY"|f=="PA"|f=="PA"|f=="VA"|f=="VT"] <- "S2" + f[f=="WY"] <- "S2" + f[f=="AK"|f=="UT"|f=="MS"] <- "S3" + f[f=="NE"|f=="ND"] <- "S4" + f[f=="SD"|f=="SC"] <- "S5" + } + + f <- as.factor(f) + + n.lev <- nlevels(f) + print(paste(" nlevels = ",n.lev)) + + f <- model.matrix(y ~ f - 1) # Note: Intercept removed. + f <- f[,2:ncol(f)] # Note: First dummy deleted. + + if (first.time.2) { + X.2 <- f + first.time.2 <- FALSE + } else { + X.2 <- cbind(prev.X.2,f) + } + prev.X.2 <- X.2 + + } else { + + f.lrn<-read.table(fn.lrn, + header=T,colClasses="numeric",blank.lines.skip=F) + f.val<-read.table(fn.val, + header=T,colClasses="numeric",blank.lines.skip=F) + f.tst<-read.table(fn.tst, + header=T,colClasses="numeric",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1],f.tst[,1]) + + f[is.na(f)] <- 0 + + # if (mod$feature[i]=="DOB") { + # d <- f ; d[d>0] <- 1 # Note: Dummy for missing DOB + # f <- cbind(d,f,f^2) # Note: Quadratic term added to DOB. + # rm(d) + # } + + if (first.time.1) { + X.1 <- f + first.time.1 <- FALSE + } else { + X.1 <- cbind(prev.X.1,f) + } + prev.X.1 <- X.1 + + } + } [1] "LASTGIFT" [1] "PEPSTRFL" [1] " nlevels = 2" [1] "STATE" [1] " nlevels = 33" [1] "RECP3" [1] " nlevels = 2" [1] "DOB" [1] "MAILCODE" [1] " nlevels = 2" [1] "MHUC2" [1] "LASTDATE" [1] "MINRAMNT" > > rm(prev.X.1,prev.X.2) > rm(f.lrn,f.val,f.tst,f) > > wts <- mat.or.vec(n,1) ; for (i in 1:n.lrn) wts[i]=1 > idx.lrn <- 1:n.lrn > idx.val <- (n.lrn+1):(n.lrn+n.val) > idx.tst <- (n.lrn+n.val+1):n > > yhat <- mat.or.vec(n,1) > ehat <- y - yhat > > best.mse.val <- sum(ehat[idx.val]^2)/n.val > > for (iter in 1:10) { + + y.1 <- y - yhat + + set.seed(100) + + sz <- 10 + n.parm <- 1 + sz * (1 + 1 + 5) # bias + sz * (wt + bias + features) + + best.parm <- mat.or.vec(n.parm,1) + best.sse <- sum(y.1[idx.lrn]^2) + + for (r in seq(from=.1,to=35.1,by=1.0)) { + for (i in 1:50) { + parm <- runif(n=n.parm,min=-r,max=r) + fit.1<-nnet.formula(y.1~X.1,size=sz,maxit=10,weights=wts,Wts=parm,trace=F) + sse <- sum(fit.1$residuals[idx.lrn]^2) + if (sse < best.sse) { + best.sse <- sse + best.parm <- fit.1$wts + } + } + } + + fit.1 <- nnet.formula(y.1~X.1,size=sz,maxit=500,weights=wts,Wts=best.parm) + + print(fit.1$wts) + + yhat <- yhat + fit.1$fitted.values + ehat <- y - yhat + + print(" ") + print(paste(" iter = ",iter + 0.1 )) + mse.lrn <- sum(ehat[idx.lrn]^2)/n.lrn + mse.val <- sum(ehat[idx.val]^2)/n.val + mse.tst <- sum(ehat[idx.tst]^2)/n.tst + print(paste(" mse.lrn = ",mse.lrn)) + print(paste(" mse.val = ",mse.val)) + print(paste(" mse.tst = ",mse.tst)) + + if (mse.val > best.mse.val) { + break + } else { + best.mse.val = mse.val + } + + y.2 <- y - yhat + + fit.2 <- lm(y.2~X.2,weights=wts) + + print(summary.lm(fit.2)) + + yhat <- yhat + fit.2$fitted.values + ehat <- y - yhat + + print(" ") + print(paste(" iter = ",iter + 0.2 )) + mse.lrn <- sum(ehat[idx.lrn]^2)/n.lrn + mse.val <- sum(ehat[idx.val]^2)/n.val + mse.tst <- sum(ehat[idx.tst]^2)/n.tst + print(paste(" mse.lrn = ",mse.lrn)) + print(paste(" mse.val = ",mse.val)) + print(paste(" mse.tst = ",mse.tst)) + + if (mse.val > best.mse.val) { + break + } else { + best.mse.val = mse.val + } + } # weights: 71 initial value 1344273.332336 iter 10 value 1344189.647443 iter 20 value 1344119.040262 iter 30 value 1344114.689545 iter 40 value 1344110.432769 final value 1344109.511425 converged [1] -0.56798070 -2.03293954 -1.02205882 0.60249122 1.13104276 [6] -1.51991439 -1.58449668 -1.46290045 1.29753349 -0.42774824 [11] 0.31210641 1.68918940 1.53275080 -1.77874135 1.12567841 [16] -0.27839574 1.71900361 0.66038548 0.09122276 0.37017603 [21] -1.89646064 -0.60407961 -0.83893785 -1.93461421 0.22255481 [26] -1.74317319 0.70871856 -0.46305072 0.73474228 0.36610367 [31] 33.43868797 570.30794410 -30.50115643 -347.44161483 14.67168706 [36] -15.42298553 -1.69156195 8.70754123 -34.58359569 18.79343633 [41] 5.48829453 37.19074104 0.93595551 1.34203943 -1.22950905 [46] -1.68835270 -0.65026529 0.46426107 -1.88646234 1.81800111 [51] -0.35693325 2.00391891 -1.76044469 -1.40904188 0.55076812 [56] -0.05516605 -1.41097256 1.83585687 1.52617629 0.24257837 [61] -1.91614776 0.53069911 1.24671416 1.42399850 -0.79032292 [66] 1.55699742 1.24591820 -1.06742406 -0.26078317 0.81317268 [71] -2.07733082 [1] " " [1] " iter = 1.1" [1] " mse.lrn = 20.0913230213288" [1] " mse.val = 18.8177833651441" [1] " mse.tst = 17.8689766251972" Call: lm(formula = y.2 ~ X.2, weights = wts) Residuals: Min 1Q Median 3Q Max -2.0537 -0.8663 -0.6222 0.0000 199.3929 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.277563 0.131522 -2.110 0.034828 * X.2prev.X.2 0.119149 0.034822 3.422 0.000623 *** X.2fAR 0.083215 0.213175 0.390 0.696273 X.2fAZ 0.180002 0.170242 1.057 0.290364 X.2fCA 0.472434 0.136703 3.456 0.000549 *** X.2fCO 0.309463 0.177257 1.746 0.080843 . X.2fFL 0.183917 0.143028 1.286 0.198487 X.2fGA 0.200918 0.159054 1.263 0.206517 X.2fHI 0.671534 0.282450 2.378 0.017432 * X.2fIA 0.006985 0.197091 0.035 0.971728 X.2fID 0.583795 0.263707 2.214 0.026846 * X.2fIL 0.162677 0.146557 1.110 0.267008 X.2fIN 0.036938 0.163183 0.226 0.820921 X.2fKS 0.106315 0.198297 0.536 0.591863 X.2fKY 0.080478 0.186680 0.431 0.666395 X.2fLA 0.038176 0.186792 0.204 0.838061 X.2fMI 0.158161 0.148506 1.065 0.286873 X.2fMN -0.046473 0.173141 -0.268 0.788384 X.2fMO 0.149187 0.165661 0.901 0.367828 X.2fMT 0.196745 0.268599 0.732 0.463874 X.2fNC 0.215946 0.154669 1.396 0.162665 X.2fNM 0.310254 0.223537 1.388 0.165161 X.2fNV 0.117593 0.218316 0.539 0.590140 X.2fOK -0.046135 0.185835 -0.248 0.803936 X.2fOR 0.440662 0.172487 2.555 0.010628 * X.2fS1 -0.534971 0.492369 -1.087 0.277251 X.2fS2 0.382283 0.244170 1.566 0.117436 X.2fS3 -0.067800 0.181287 -0.374 0.708410 X.2fS4 0.142801 0.213035 0.670 0.502658 X.2fS5 0.291385 0.175172 1.663 0.096233 . X.2fTN 0.043654 0.168620 0.259 0.795723 X.2fTX 0.164862 0.144421 1.142 0.253650 X.2fWA 0.307051 0.158515 1.937 0.052744 . X.2fWI -0.014984 0.165059 -0.091 0.927668 X.2f 0.658615 0.121778 5.408 6.38e-08 *** X.2f -0.406709 0.145310 -2.799 0.005129 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 4.479 on 66864 degrees of freedom Multiple R-Squared: 0.002111, Adjusted R-squared: 0.001589 F-statistic: 4.042 on 35 and 66864 DF, p-value: 9.981e-15 [1] " " [1] " iter = 1.2" [1] " mse.lrn = 20.0489046592394" [1] " mse.val = 18.7917958442691" [1] " mse.tst = 17.8511842252178" # weights: 71 initial value 1340277.857549 iter 10 value 1340270.826358 final value 1340270.587028 converged [1] 2.92151852 -13.81021431 -12.84901519 -2.81980933 12.31629866 [6] 0.87773033 -6.07231924 13.40145865 -8.43801853 14.91700519 [11] -0.09375445 -17.93335098 7.21137834 12.93813571 13.01806033 [16] -11.59949950 12.28219220 -3.14572855 -3.20127064 -10.97433814 [21] -12.66268781 2.48794473 -7.20695934 -3.05261709 13.64900161 [26] -13.21063157 -4.95692049 -6.39651777 12.21934773 -11.34781709 [31] -16.02113109 11.64593540 -0.05441672 -13.84984687 -0.60332099 [36] -18.00759519 -0.80697035 -5.49649065 -4.02701090 11.85408491 [41] -5.36042004 -5.24221488 11.03387389 5.11355437 12.59038289 [46] -2.07946656 -13.26485418 12.16207548 17.26704708 -9.41711305 [51] -14.32747865 16.45636221 7.95598572 2.65956631 -14.07903417 [56] -2.75462789 -4.15855346 4.12231437 -16.37450541 -16.74679777 [61] -1.27109863 -11.46157116 24.41177910 10.94178182 9.27713607 [66] -17.10389324 3.33269315 -5.07116506 12.06286346 5.96184957 [71] 13.24078328 [1] " " [1] " iter = 2.1" [1] " mse.lrn = 20.0339398165671" [1] " mse.val = 18.7940176936220" [1] " mse.tst = 17.8532431245309" > > > x0 <- c(0,n.lrn) > y0 <- c(0,sum(y[idx.lrn]-0.68)) > > x1 <- (1:n.lrn) > y1 <- yhat[idx.lrn] > y1 <- y1-0.68 > y1 <- sort(y1) > y1 <- y1[n.lrn:1] > y1 <- cumsum(y1) > > idx <- 1:n.lrn > > print(paste("maximum profit in learning sample is ", max(y1))) [1] "maximum profit in learning sample is 10086.3770615908" > print(paste("maximum occurs at ", idx[y1==max(y1)])) [1] "maximum occurs at 45355" > > idx <- seq(1,n.lrn,length=200) > > x1 <- x1[idx] > y1 <- y1[idx] > > source("psopts.r"); > if (sz < 10) { + plt.name <- paste("cty_lif_0",sz,".eps",sep="") + } else { + plt.name <- paste("cty_lif_",sz,".eps",sep="") + } > postscript(file=plt.name) > > plot(x=c(x0,x1),y=c(y0,y1),ylab="dollars",xlab="size of mailing",type="n") > lines(x=x0,y=y0,col="green") > lines(x=x1,y=y1,col="red") > > dev.off() null device 1 > > > proc.time() [1] 28420.46 2745.31 31205.05 0.00 0.00 >