
R : Copyright 2005, The R Foundation for Statistical Computing
Version 2.1.1  (2005-06-20), ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for a HTML browser interface to help.
Type 'q()' to quit R.

> invisible(options(echo = TRUE))
> library(rpart);
> 
> train <- read.table("loans.dat",header=F,col.names=c("x1","x2","y"));
> 
> fit <- rpart(y ~ x1 + x2, data=train, method="class");
> 
> print(fit)
n= 250 

node), split, n, loss, yval, (yprob)
      * denotes terminal node

1) root 250 91 0 (0.63600000 0.36400000)  
  2) x1>=0.3440913 170 16 0 (0.90588235 0.09411765)  
    4) x2>=0.2215745 160  8 0 (0.95000000 0.05000000) *
    5) x2< 0.2215745 10  2 1 (0.20000000 0.80000000) *
  3) x1< 0.3440913 80  5 1 (0.06250000 0.93750000) *
> 
> print(fit$control)
$minsplit
[1] 20

$minbucket
[1] 7

$cp
[1] 0.01

$maxcompete
[1] 4

$maxsurrogate
[1] 5

$usesurrogate
[1] 2

$surrogatestyle
[1] 0

$maxdepth
[1] 30

$xval
[1] 10

> 
> source("psopts.r");
> postscript(file="cart01.eps");
> 
> par(mar=c(0,0,0,0));
> plot(fit);
> text(fit);
> par();
$xlog
[1] FALSE

$ylog
[1] FALSE

$adj
[1] 0.5

$ann
[1] TRUE

$ask
[1] FALSE

$bg
[1] "transparent"

$bty
[1] "o"

$cex
[1] 1

$cex.axis
[1] 1

$cex.lab
[1] 1

$cex.main
[1] 1.2

$cex.sub
[1] 1

$cin
[1] 0.15 0.20

$col
[1] "black"

$col.axis
[1] "black"

$col.lab
[1] "black"

$col.main
[1] "black"

$col.sub
[1] "black"

$cra
[1] 10.8 14.4

$crt
[1] 0

$csi
[1] 0.2

$cxy
[1] 0.04628571 0.03607912

$din
[1] 7 5

$err
[1] 0

$family
[1] ""

$fg
[1] "black"

$fig
[1] 0 1 0 1

$fin
[1] 7 5

$font
[1] 1

$font.axis
[1] 1

$font.lab
[1] 1

$font.main
[1] 2

$font.sub
[1] 1

$gamma
[1] 1

$lab
[1] 5 5 7

$las
[1] 0

$lend
[1] "round"

$lheight
[1] 1

$ljoin
[1] "round"

$lmitre
[1] 10

$lty
[1] "solid"

$lwd
[1] 1

$mai
[1] 0 0 0 0

$mar
[1] 0 0 0 0

$mex
[1] 1

$mfcol
[1] 1 1

$mfg
[1] 1 1 1 1

$mfrow
[1] 1 1

$mgp
[1] 3 1 0

$mkh
[1] 0.001

$new
[1] FALSE

$oma
[1] 0 0 0 0

$omd
[1] 0 1 0 1

$omi
[1] 0 0 0 0

$pch
[1] 1

$pin
[1] 7 5

$plt
[1] 0 1 0 1

$ps
[1] 12

$pty
[1] "m"

$smo
[1] 1

$srt
[1] 0

$tck
[1] NA

$tcl
[1] -0.5

$tmag
[1] 1.2

$type
[1] "p"

$usr
[1] 0.9200000 3.0800000 0.1314286 1.0334066

$xaxp
[1] 1 3 4

$xaxs
[1] "r"

$xaxt
[1] "s"

$xpd
[1] FALSE

$yaxp
[1] 0.2 1.0 4.0

$yaxs
[1] "r"

$yaxt
[1] "s"

> 
> dev.off();
null device 
          1 
> 
> size <- 100;
> 
> grid <- mat.or.vec((size+1)*(size+1),2);
> for (i in 0:size) {
+ for (j in 0:size) {
+   ij <- (size+1)*j+i+1
+   grid[ij,1] <- i;
+   grid[ij,2] <- j;
+ }
+ }
> grid <- grid/(size+1);
> 
> gx1 <- grid[,1];
> gx2 <- grid[,2];
> 
> dflt <- (gx1 < 0.3440913) | ( (gx1 >= 0.3440913) & (gx2 < 0.2215745) );
> 
> edge <- mat.or.vec((size+1)*(size+1),1);
> edge <- as.logical(edge);
> for (i in 1:size) {
+ for (j in 1:size) {
+   left <- (size+1)*j+i+1;
+   rite <- (size+1)*(j-1)+i+1;
+   top  <- left;
+   bot  <- left - 1;
+   edge[left] <- 
+     ( dflt[left] && !dflt[rite] ) ||
+     ( !dflt[left] && dflt[rite] ) ||
+     ( dflt[top]  && !dflt[bot]  ) || 
+     ( !dflt[top]  && dflt[bot]  ) ;
+ }
+ }
> 
> X <- cbind(train$x1,train$x2);
> y <- (train$y==1);
> 
> source("psopts.r");
> postscript(file="cart02.eps");
> 
> plot(X,type='n',xlab="FICO Score",ylab="P-Index");
> points(X[y,],pch='o',col="red");
> points(X[!y,],pch='o',col="green");
> points(grid[dflt,],pch='.',col="red");
> points(grid[!dflt,],pch='.',col="green");
> points(grid[edge,],pch='+',col="black",cex=0.4);
> 
> dev.off();
null device 
          1 
> 
> test <- read.table("eval.dat",header=F,col.names=c("x1","x2","y"));
> 
> tx1 <- test$x1;
> tx2 <- test$x2;
> ty <- test$y;
> 
> dflt <- (tx1 < 0.3440913) | ( (tx1 >= 0.3440913) & (tx2 < 0.2215745) );
> 
> dflt <- as.numeric(dflt);
> 
> err <- abs(ty-dflt);
> print(mean(err));
[1] 0.08875
> 
> proc.time()
[1]  5.20  0.37 11.25  0.00  0.00
> 
