R version 2.6.2 (2008-02-08) Copyright (C) 2008 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R est un logiciel libre livré sans AUCUNE GARANTIE. Vous pouvez le redistribuer sous certaines conditions. Tapez 'license()' ou 'licence()' pour plus de détails. R est un projet collaboratif avec de nombreux contributeurs. Tapez 'contributors()' pour plus d'information et 'citation()' pour la façon de le citer dans les publications. Tapez 'demo()' pour des démonstrations, 'help()' pour l'aide en ligne ou 'help.start()' pour obtenir l'aide au format HTML. Tapez 'q()' pour quitter R. CWD = /Users/chl/Notes/022_RMB/chap_3/chapter > # Regression Methods in Biostatistics, Vittinghoff, Glidden, Shiboski > # and McCulloch (Springer, 2005) > # > # Chapter 3. Basic Statistical Methods > # > # Time-stamp: <2008-02-10 13:41:06 chl> > > postscript(file="RMB_c3.ps") > > ############################################################################## > ### t-test > > # first, we load the data > HERS <- read.table("hersdata.txt",header=TRUE,sep="\t") > HERS.women.wht_diab <- subset(HERS,diabetes==0) > HERS.women.wht_diab$exercise <- factor(HERS.women.wht_diab$exercise,labels=c("No","Yes")) > > # before carrying out the t-test, we shall look at the data with the help of > # some graphics > boxplot(glucose~exercise,data=HERS.women.wht_diab) > title(xlab="Exercise",ylab="Glucose Level (mg/dL)") > > op <- par(mfrow=c(2,2)) > with(HERS.women.wht_diab, hist(glucose[exercise=="No"], + main="Exercice: No",xlab="Glucose Level (mg/dL)")) > with(HERS.women.wht_diab, hist(glucose[exercise=="Yes"], + main="Exercice: Yes",xlab="Glucose Level (mg/dL)")) > with(HERS.women.wht_diab, qqnorm(glucose[exercise=="No"])) > with(HERS.women.wht_diab, qqline(glucose[exercise=="No"])) > with(HERS.women.wht_diab, qqnorm(glucose[exercise=="Yes"])) > with(HERS.women.wht_diab, qqline(glucose[exercise=="Yes"])) > par(op) > > # Note: > # the Q-Q plots are not very useful here since sample size is large enough > # for valid t-test > > # compute overall and group means > mean(HERS.women.wht_diab$glucose,na.rm=TRUE) [1] 96.66043 > group.means <- with(HERS.women.wht_diab, tapply(glucose,exercise,mean,na.rm=TRUE)) > (dobs <- diff(group.means)) Yes -1.692789 > > # now compute t statistic, assuming equal variance (two-sided test) > (glucose.ttest <- t.test(glucose~exercise,data=HERS.women.wht_diab,var.equal=TRUE)) Two Sample t-test data: glucose by exercise t = 3.8685, df = 2030, p-value = 0.0001130 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: 0.8346242 2.5509539 sample estimates: mean in group No mean in group Yes 97.36104 95.66825 > > # compare with Welch corrected t-test (default within R) > t.test(glucose~exercise,data=HERS.women.wht_diab,var.equal=FALSE) Welch Two Sample t-test data: glucose by exercise t = 3.8995, df = 1858.333, p-value = 9.984e-05 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: 0.8413953 2.5441828 sample estimates: mean in group No mean in group Yes 97.36104 95.66825 > > # given the sample size, one should reasonnably make use of asymptotic > # convergence of the t distribution to the standard normal > 1-pnorm(as.numeric(glucose.ttest$statistic)) [1] 5.476017e-05 > > ############################################################################## > ### one-way ANOVA > HERS$raceth <- factor(HERS$raceth,labels=c("White","Afr Amer","Other")) > summary(HERS$raceth) White Afr Amer Other 2451 218 94 > > # get a numerical summary tabulated by the factor of interest (ethnicity) > my.summary <- function(x,digits=3) { + tmp <- summary(x) + tmp <- c(tmp,sd(x),length(x),sum(is.na(x))) + names(tmp)[7:9] <- c("Sd","N","NAs") + return(round(tmp,digits)) + } > with(HERS, by(SBP,raceth,my.summary,digits=2)) INDICES: White Min. 1st Qu. Median Mean 3rd Qu. Max. Sd N NAs 83.00 121.00 133.00 134.80 146.00 224.00 18.83 2451.00 0.00 ------------------------------------------------------------ INDICES: Afr Amer Min. 1st Qu. Median Mean 3rd Qu. Max. Sd N NAs 98.00 123.00 137.00 138.20 150.00 194.00 19.99 218.00 0.00 ------------------------------------------------------------ INDICES: Other Min. 1st Qu. Median Mean 3rd Qu. Max. Sd N NAs 95.00 119.50 134.00 135.20 148.80 187.00 21.26 94.00 0.00 > > # plot the between-group dispersion around the overall mean > plot.design(SBP~raceth,data=HERS) > > # perform the ANOVA and display the anova table > hers.aov <- aov(SBP~raceth,data=HERS) > summary(hers.aov) Df Sum Sq Mean Sq F value Pr(>F) raceth 2 2384 1192 3.2981 0.0371 * Residuals 2760 997618 361 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 > > model.tables(hers.aov) Tables of effects raceth White Afr Amer Other -0.2857 3.164 0.1114 rep 2451.0000 218.000 94.0000 > > # we can check that there is no clear departures from ANOVA assumptions > op <- par(mfrow=c(2,2)) > plot(hers.aov) > par(op) > > # Notes: > # 1. some individual have high residual so we should check if anything else goes > # wrong with these observations (916, 2161 and 175) among other covariates... > # 2. pairwise comparisons should follow > > # As suggested by the authors, we could perform an ANOVA which does not assume > # equal variance among groups, as we did for the t-test. R. L. Welch proposes to > # correct the df in a way that is close to what is done in the case of two > # samples comparison. > # > # Welch, R.L. On the comparison of several mean values: An alternative approach, > # Biometrika 38: 330--336, 1951. > > # though this is not really recommended here, we could perform a non parametric > # ANOVA (based on ranks) > kruskal.test(SBP~raceth,data=HERS) Kruskal-Wallis rank sum test data: SBP by raceth Kruskal-Wallis chi-squared = 4.2182, df = 2, p-value = 0.1213 > > plot.design(SBP~raceth,data=HERS,fun=median) > > # Note: > # the test is now non significant! > > ############################################################################## > ### simple linear regression > > # first, we plot the data > plot(SBP~age,data=HERS,cex=.5,xlab="Age (years)", ylab="Systolic Blood Pressure (mmHg)") > > # and we add average SBP for each decile of age > age.decile <- quantile(HERS$age,probs=seq(0,1,.1)) > > tmp <- as.numeric(age.decile) > xx <- numeric(length(tmp)-1) # mid-interval points > for (i in 1:(length(tmp)-1)) xx[i] <- (tmp[i]+tmp[i+1])/2 > HERS$ageq <- cut(HERS$age,breaks=age.decile,labels=FALSE) > points(xx,as.numeric(tapply(HERS$SBP,HERS$ageq,mean)),pch=15,cex=1.2) > > # compute the linear regression > HERS.lm <- lm(SBP~age,data=HERS) > summary(HERS.lm) Call: lm(formula = SBP ~ age, data = HERS) Residuals: Min 1Q Median 3Q Max -49.405 -13.150 -1.405 11.378 85.934 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 103.62949 3.59573 28.820 <2e-16 *** age 0.47173 0.05368 8.787 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 18.77 on 2761 degrees of freedom Multiple R-squared: 0.02721, Adjusted R-squared: 0.02685 F-statistic: 77.21 on 1 and 2761 DF, p-value: < 2.2e-16 > > # add the regression line to the previous plot > abline(HERS.lm) > > # some diagnostics plots > op <- par(mfrow=c(2,2)) > plot(HERS.lm) > par(op) > > # Note: > # Results differ from those presented by the authors because they consider only > # a 10% random sample of the total sample (cf. chap3.do on the website) > > # doing the analysis with a random sample (10% of the total size) > set.seed(12386) > idx <- sample(HERS$id,size=nrow(HERS)/10) > HERS.sample10 <- HERS[idx,] > HERS.lm2 <- lm(SBP~age,data=HERS.sample10) > summary(HERS.lm2) Call: lm(formula = SBP ~ age, data = HERS.sample10) Residuals: Min 1Q Median 3Q Max -41.238 -12.928 -1.258 11.052 63.242 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 109.8778 11.1384 9.865 <2e-16 *** age 0.3600 0.1666 2.161 0.0315 * --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 18.94 on 274 degrees of freedom Multiple R-squared: 0.01676, Adjusted R-squared: 0.01318 F-statistic: 4.672 on 1 and 274 DF, p-value: 0.03153 > > # we now can predict SBP for a woman of average age > as.numeric(HERS.lm$coefficients[1] + HERS.lm$coefficients[2]*mean(HERS$age)) [1] 135.0695 > > # but we could also create a new dataframe containing fitted values for each age > # along with confidence interval for prediction (larger than CI for estimation) > HERS.lm.fit <- data.frame(age=seq(min(HERS$age),max(HERS$age),by=1)) > fitted.values <- predict(HERS.lm,HERS.lm.fit,interval="prediction") > fitted.values2 <- predict(HERS.lm,HERS.lm.fit,interval="confidence") > HERS.lm.fit <- cbind(HERS.lm.fit,fitted.values,fitted.values2[,2:3]) > colnames(HERS.lm.fit)[5:6] <- c("lwr2","upr2") > > with(HERS.lm.fit, { + matplot(age,cbind(lwr,upr),type="l",xlab="Age",ylab="Predicted SBP",col=2,lty=2) + matlines(age,cbind(lwr2,upr2),type="l",xlab="Age",ylab="Predicted SBP",col=3,lty=2) + lines(age,fit) + points(mean(HERS$age),mean(HERS$SBP)) + }) > legend("topleft",c("Prediction","Confidence"),col=2:3,lty=2,title="CI",bty="n") > > # computing confidence interval for regression parameters using bootstrap > library(boot) > boot.fun <- function(data,x) return(as.numeric(lm(data[x,2]~data[x,1])$coefficients[2])) > > sbp.age <- cbind(HERS$age,HERS$SBP) > res <- boot(sbp.age,boot.fun,1000) > res ORDINARY NONPARAMETRIC BOOTSTRAP Call: boot(data = sbp.age, statistic = boot.fun, R = 1000) Bootstrap Statistics : original bias std. error t1* 0.4717281 0.003394972 0.05617798 > > # two-sided bootstrapped confidence interval of Cronbach's alpha > quantile(res$t,c(0.025,0.975)) 2.5% 97.5% 0.3661607 0.5884125 > > # std error > sqrt(diag(cov(res$t))) [1] 0.05617798 > > # adjusted bootstrap percentile (BCa) confidence interval (better) > # Note: > # The boot.ci( ) function takes a bootobject and generates 5 different types > # of two-sided nonparametric confidence intervals. These include the first > # order normal approximation, the basic bootstrap interval, the studentized > # bootstrap interval, the bootstrap percentile interval, and the adjusted > # bootstrap percentile (BCa) interval. > boot.ci(res,type=c("norm","perc")) # normal approx. + percentile method BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS Based on 1000 bootstrap replicates CALL : boot.ci(boot.out = res, type = c("norm", "perc")) Intervals : Level Normal Percentile 95% ( 0.3582, 0.5784 ) ( 0.3644, 0.5899 ) Calculations and Intervals on Original Scale > > # the following displays a graphical summary of the bootstrap procedure > plot(res) > > # Note: > # There is a great tutorial on boostrapping techniques written by John Fox, > # which is available at the following address: > # http://cran.r-project.org/doc/contrib/Fox-Companion/appendix-bootstrapping.pdf > > > ############################################################################## > ### two-way contingency table > > # loading the data > WCGS <- read.table("wcgs.txt",header=TRUE,sep="\t") > WCGS$chd69 <- factor(WCGS$chd69,labels=c("Noncases","Cases")) > WCGS$arcus <- factor(WCGS$arcus,labels=c("Unexposed","Exposed")) > > # display a basic two-way table > chd69.tab <- with(WCGS, table(chd69,arcus)) > chd69.tab arcus chd69 Unexposed Exposed Noncases 2058 839 Cases 153 102 > > # interesting summary are provided by the `vcd` package > # however, we are more interested by association measures > library(vcd) > assocstats(chd69.tab) X^2 df P(> X^2) Likelihood Ratio 12.984 1 0.00031411 Pearson 13.638 1 0.00022163 Phi-Coefficient : 0.066 Contingency Coeff.: 0.066 Cramer's V : 0.066 > > # here are the functions we need (but see the `epi` and `epitools` packages) > odds.ratio <- function(x,alpha=.05) { + if (!is.table(x)) x <- as.table(x) + pow <- function(x,a=-1) x^a + or <- (x[1,1]*x[2,2]) / (x[1,2]*x[2,1]) + or.se <- sqrt(sum(pow(x))) + or.ci <- exp(log(or) + c(-1,1)*qnorm(1-alpha/2)*or.se) + return(list(OR=or,SE=or.se,CI=or.ci)) + } > > # Note: > # we don't use Cornfield approximation when computing confidence > # interval (Cornfield approximation relates in fact to SE calculation). > # > # See http://www.stata.com/help.cgi?cci for the Stata on-line help > # > # and the 'official' reference for CI computation with matched cases > # design: > # J J Schlesselman. Case-Control Studies. Design, Conduct, Analysis. > # Oxford University Press, 1982. > # > # Also see > # C-Y Lin & M-C Yang, Improved Exact Confidence Intervals for the > # Odds Ratio in Two Independent Binomial Samples, Clinical Trials, > # 48(6): 1008-1019, 2006. > # > # C C Brown, The Validity of Approximation Methods for Interval > # Estimation of the Odds Ratio, American Journal of Epidemiology, > # 113(4): 474-480, 1981. > # > > relative.risk <- function(x,srow=1,scol=1) { + # we assume that Cases/Exposed is contained in cell [srow,scol] + out <- c(as.numeric(x[srow,scol]/apply(x,2,sum)[scol]),as.numeric(x[srow,scol+1]/apply(x,2,sum)[scol+1])) + names(out) <- c("Cases+","Cases-") + return(out) + } > > # The following functions are adapted from > # S. BOLBOACA and A. ACHIMAS CADARIU. Binomial Distribution > # Sample Confidence Intervals Estimation. 6. Excess Risk > # http://lejpt.academicdirect.org/A04/01_21.htm > # > # Here, we are expressing excess risk as > # ER = a/(a+b) - c/(c+d) = y/n - x/m > # for the classical 2x2 table > # > # ----------- > # | + | - | > # |-------------- > # | + | a | b | > # |-------------- > # | - | c | d | > # |-------------- > # > > dwald <- function(x,m,y,n,z=1.96) { + lwr <- max(c(y/n-x/m-z*sqrt((x*(m-x))/m^3+(y*(n-y))/n^3),-1)) + upr <- min(c(y/n-x/m+z*sqrt((x*(m-x))/m^3+(y*(n-y))/n^3),1)) + return(c(lwr,upr)) + } > > dac <- function(x,m,y,n,z=1.96) { + return(dwald(x+z^2/(4*sqrt(2)),m+z^2/(2*sqrt(2)),y+z^2/(4*sqrt(2)),n+z^2/(4*sqrt(2)),z)) + } > > # odds-ratio > odds.ratio(chd69.tab) $OR [1] 1.63528 $SE [1] 0.1342299 $CI [1] 1.257000 2.127399 > > # relative risk > chd69.rr <- relative.risk(chd69.tab) > excess.risk <- as.numeric(abs(diff(chd69.rr))) > dac(chd69.tab[1,2],chd69.tab[1,1]+chd69.tab[1,2],chd69.tab[2,2],chd69.tab[2,1]+chd69.tab[2,2]) [1] 0.04956725 0.17420299 > # Note: > # I'm quite confuse for the moment since this doesn't seem to provide > # a logical result... > risk.ratio <- as.numeric(chd69.rr[1]/chd69.rr[2]) > > # global association test > # we don't need the correction for continuity when using chi square test > chisq.test(chd69.tab,correct=F) Pearson's Chi-squared test data: chd69.tab X-squared = 13.6382, df = 1, p-value = 0.0002216 > > # the `epitools` package offers facilities for computing the above > # estimates, together with test of association > # Note: > # CI for odds-ratio are estimated using Wald approximation > # We can add the option verbose=TRUE for a more detailed output > library(epitools) > epitab(chd69.tab,pvalue="chi2") $tab arcus chd69 Unexposed p0 Exposed p1 oddsratio lower upper Noncases 2058 0.93080054 839 0.8916047 1.00000 NA NA Cases 153 0.06919946 102 0.1083953 1.63528 1.257000 2.127399 arcus chd69 p.value Noncases NA Cases 0.0002216321 $measure [1] "wald" $conf.level [1] 0.95 $pvalue [1] "chi2" > > # the HIV Status by AIDS Diagnosis of Male Partner data > HIV <- matrix(c(3,2,4,22),nrow=2) > dimnames(HIV) <- list(c("Cases","Noncases"),c("Exposed","Unexposed")) > HIV Exposed Unexposed Cases 3 4 Noncases 2 22 > > epitab(HIV) $tab Exposed p0 Unexposed p1 oddsratio lower upper p.value Cases 3 0.6 4 0.1538462 1.00 NA NA NA Noncases 2 0.4 22 0.8461538 8.25 1.028252 66.19241 0.06192065 $measure [1] "wald" $conf.level [1] 0.95 $pvalue [1] "fisher.exact" > > # compare OR estimate with that obtained using mid-p method > # Kenneth J. Rothman and Sander Greenland (1998), Modern Epidemiology, > # Lippincott-Raven Publishers (p. 251) > or.midp(c(3,4,2,22)) $x [,1] [,2] [1,] 3 4 [2,] 2 22 $estimate [1] 7.332308 $conf.int [1] 0.868172 80.674513 $conf.level [1] 0.95 attr(,"method") [1] "median-unbiased estimate & mid-p exact CI" > > # alternatively, we can directly make use of Fisher exact test > fisher.test(HIV) Fisher's Exact Test for Count Data data: HIV p-value = 0.06192 alternative hypothesis: true odds ratio is not equal to 1 95 percent confidence interval: 0.6484488 117.9447821 sample estimates: odds ratio 7.48285 > fisher.test(HIV,alternative="greater") Fisher's Exact Test for Count Data data: HIV p-value = 0.06192 alternative hypothesis: true odds ratio is greater than 1 95 percent confidence interval: 0.8981707 Inf sample estimates: odds ratio 7.48285 > > # Now, if we use the `epicalc` package, we get all in one > library(epicalc) > HIV.tab <- make2x2(3,2,4,22) > # csi(3,2,4,22) > cc(cctable=HIV.tab) Exposure Outcome Non-exposed Exposed Total Non-diseased 22 2 24 Diseased 4 3 7 Total 26 5 31 OR = 7.48 95% CI = 0.65 117.94 Chi-squared = 4.77 , 1 d.f. , P value = 0.029 Fisher's exact test (2-sided) P value = 0.062 One of more cells is/are less than 5, not appropriate for graphing > cs(cctable=HIV.tab) Exposure Outcome Non-exposed Exposed Total Non-diseased 22 2 24 Diseased 4 3 7 Total 26 5 31 Rne Re Rt Risk 0.15 0.6 0.23 Estimate Lower95ci Upper95ci Risk difference (attributable risk) 0.45 0.04 0.85 Risk ratio 3.9 0.74 20.63 Attr. frac. exp. -- (Re-Rne)/Re 0.74 Attr. frac. pop. -- (Rt-Rne)/Rt*100 % 31.87 > > # are the OR constant across subgroups of age (Mantel-Haenzel test) > # WCGS$agec is a 5-level factor > > # first we look at a simple two-way classification > (chd69.tab2 <- with(WCGS, table(chd69,agec))) agec chd69 0 1 2 3 4 Noncases 512 1036 680 463 206 Cases 31 55 70 65 36 > # column profile > apply(chd69.tab2,1,function(x) round(x/apply(chd69.tab2,2,sum)*100,2)) chd69 Noncases Cases 0 94.29 5.71 1 94.96 5.04 2 90.67 9.33 3 87.69 12.31 4 85.12 14.88 > > # global association test with chi square > chisq.test(chd69.tab2) Pearson's Chi-squared test data: chd69.tab2 X-squared = 46.6534, df = 4, p-value = 1.801e-09 > > # a more detailed inspection of association estimates can be obtained > # using > # cc(cctable=chd69.tab2) > > # Note: > # I have not yet calculated the score test trend of odds > > # Twenty-year Vital Status by Smoking Behavior, Stratified by Age > # are not available as raw data, so we will use the WCGS study > chd69.tab3 <- xtabs(~chd69+arcus+agec,data=WCGS) > > # we can get OR by strata as follows > apply(chd69.tab3,3,function(x) (x[1,1]*x[2,2])/(x[1,2]*x[2,1])) 0 1 2 3 4 1.5482625 1.8703704 2.0288066 0.8463815 0.9252336 > > # run the Mantel-Haenszel test and an Homogeneity of OR test > mhor(mhtable=chd69.tab3) Stratified analysis by agec OR lower lim. upper lim. P value agec 0 1.547 0.606 3.64 0.2672 agec 1 1.869 0.993 3.42 0.0454 agec 2 2.027 1.195 3.43 0.0073 agec 3 0.847 0.465 1.51 0.5849 agec 4 0.926 0.427 2.00 0.8582 M-H combined 1.380 1.056 1.80 0.0164 M-H Chi2(1) = 5.76 , P value = 0.016 Homogeneity test, chi-squared 4 d.f. = 7.72 , P value = 0.102 > > # alternatively, one can use (wht correction for continuity to get > # similar result) > with(WCGS, mantelhaen.test(chd69,arcus,agec,correct=FALSE)) Mantel-Haenszel chi-squared test without continuity correction data: chd69 and arcus and agec Mantel-Haenszel X-squared = 5.756, df = 1, p-value = 0.01643 alternative hypothesis: true common odds ratio is not equal to 1 95 percent confidence interval: 1.056304 1.802479 sample estimates: common odds ratio 1.379842 > > > ############################################################################## > ### survival data > > LEUK <- read.table("leuk.txt",header=TRUE) > > library(survival) > # create a survival object > LEUK.surv <- Surv(LEUK$time,LEUK$cens) > > # Kaplan-Meier estimate > LEUK.km <- survfit(LEUK.surv~group,data=LEUK) > plot(LEUK.km[1]) > lines(LEUK.km[2],col="gray60") > legend("topright",c("6-MP","Placebo"),lty=1,col=c("black","gray60"),bty="n") > title(xlab="Weeks since randomization",ylab="Proportion in remission") > > # numerical summary > LEUK.km Call: survfit(formula = LEUK.surv ~ group, data = LEUK) n events median 0.95LCL 0.95UCL group=1 21 9 23 16 Inf group=2 21 21 8 4 12 > summary(LEUK.km) Call: survfit(formula = LEUK.surv ~ group, data = LEUK) group=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 6 21 3 0.857 0.0764 0.720 1.000 7 17 1 0.807 0.0869 0.653 0.996 10 15 1 0.753 0.0963 0.586 0.968 13 12 1 0.690 0.1068 0.510 0.935 16 11 1 0.627 0.1141 0.439 0.896 22 7 1 0.538 0.1282 0.337 0.858 23 6 1 0.448 0.1346 0.249 0.807 group=2 time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 21 2 0.9048 0.0641 0.78754 1.000 2 19 2 0.8095 0.0857 0.65785 0.996 3 17 1 0.7619 0.0929 0.59988 0.968 4 16 2 0.6667 0.1029 0.49268 0.902 5 14 2 0.5714 0.1080 0.39455 0.828 8 12 4 0.3810 0.1060 0.22085 0.657 11 8 2 0.2857 0.0986 0.14529 0.562 12 6 2 0.1905 0.0857 0.07887 0.460 15 4 1 0.1429 0.0764 0.05011 0.407 17 3 1 0.0952 0.0641 0.02549 0.356 22 2 1 0.0476 0.0465 0.00703 0.322 23 1 1 0.0000 NA NA NA > > # comparing the two groups with log-rank test using survdiff() > # rho = 0 (default) gives logrank test while rho=1 gives Wilcoxon test > survdiff(LEUK.surv~group,data=LEUK) Call: survdiff(formula = LEUK.surv ~ group, data = LEUK) N Observed Expected (O-E)^2/E (O-E)^2/V group=1 21 9 19.3 5.46 16.8 group=2 21 21 10.7 9.77 16.8 Chisq= 16.8 on 1 degrees of freedom, p= 4.17e-05 > > dev.off() # release the postscript device null device 1 >