# ################################################# # h e l p e r f u n c t i o n s f o r t h e s i m u l a t i o n
# ################################################# reportROC<−f u n c t i o n( roc , e r =1 , J =3 , nstep =100 ,
CorS= ’ S ’ , t o P l o t =T , main= ’SNR ’ , pcex =2) {
# CorS , c gh o r s e q u e n c i n g
i f( CorS== ’ S ’ ) {
s r s<−seq(−1, e r ^( J−1) +2 , length . out=nstep ) } e l s e {
s r s<−seq(−1, e r * ( J−1) +2 , length . out=nstep ) }
nsim<−nrow( r o c [ [ 1 ] ] [ [ ’ t ’ ] ] ) ngrid<−length( r o c )
methods<−names( r o c [ [ 1 ] ] [ [ ’ t ’ ] ] ) # g e t run t i m e
c a t( ’ a l g o r i t h m names : ’ , methods , ’ \n ’ , sep= ’ \ t ’ )
avgt<−colMeans ( do . c a l l ( rbind , do . c a l l ( rbind , r o c ) [ , ’ t ’ ] ) )
c a t( ’ average run time : ’ , avgt , ’ \n ’ , sep= ’ \ t ’ ) # g e t summary o f b r e a k p o i n t no .
p r i n t( apply ( do . c a l l ( rbind , do . c a l l ( rbind , r o c ) [ , ’ ncp ’ ] ) , 2 , summary ) ) # g e t e r r o r e s t i m a t e
segmse<−apply(
do. c a l l ( rbind , lapply ( roc , f u n c t i o n ( l l ) l l $ncp−length( l l $L ) ) ) , 2, f u n c t i o n ( x ) sum( x ^2) / nsim / ngrid )
c a t( ’MSE f o r no . s e g s : ’ , segmse , ’ \n ’ , sep= ’ \ t ’ ) segame<−apply(
do. c a l l ( rbind , lapply ( roc , f u n c t i o n ( l l ) l l $ncp−length( l l $L ) ) ) , 2, f u n c t i o n ( x ) sum( abs ( x ) ) / nsim / ngrid )
c a t( ’AME f o r no . s e g s : ’ , segame , ’ \n ’ , sep= ’ \ t ’ ) # c a l c p o s and t r u e p o s
rmethods<−names( r o c [ [ 1 ] ] [ [ ’A ’ ] ] [ [ 1 ] ] ) tp<−Reduce ( ’ + ’ , lapply ( roc ,
f u n c t i o n( l l ) sapply ( rmethods ,
f u n c t i o n(m) sapply ( seq _ l e n ( nstep ) ,
f u n c t i o n( i ) sum( sapply ( seq _ l e n ( nsim ) ,
f u n c t i o n( n ) l l $A[ [ n ] ] [ ,m] > s r s [ i ] & rep ( l l $S , ti me s= l l $L ) ==3) ) ) ) ) ) p<−Reduce ( ’ + ’ , lapply ( roc ,
f u n c t i o n( l l ) sapply ( rmethods ,
f u n c t i o n(m) sapply ( seq _ l e n ( nstep ) ,
f u n c t i o n( i ) sum( sapply ( seq _ l e n ( nsim ) ,
f u n c t i o n( n ) l l $A[ [ n ] ] [ ,m] > s r s [ i ] ) ) ) ) ) )
i f( CorS== ’C ’ ) {
l t p<−Reduce ( ’ + ’ , lapply ( roc ,
f u n c t i o n( l l ) sapply ( rmethods ,
f u n c t i o n(m) sapply ( seq _ l e n ( nstep ) ,
f u n c t i o n( i ) sum( sapply ( seq _ l e n ( nsim ) ,
f u n c t i o n( n ) l l $A[ [ n ] ] [ ,m] < s r s [ i ] & rep ( l l $S , t im es= l l $L ) ==1) ) ) ) ) ) l p<−Reduce ( ’ + ’ , lapply ( roc ,
f u n c t i o n( l l ) sapply ( rmethods ,
f u n c t i o n(m) sapply ( seq _ l e n ( nstep ) ,
f u n c t i o n( i ) sum( sapply ( seq _ l e n ( nsim ) ,
f u n c t i o n( n ) l l $A[ [ n ] ] [ ,m] < s r s [ i ] ) ) ) ) ) ) }
# g e t AUC s t a t
c a t( ’ \nalgorithm runned : ’ , rmethods , ’ \n ’ , sep= ’ \ t ’ ) aucg<−sapply( rmethods , f u n c t i o n (m) callAUC (
t p r =tp [ ,m] / sum( sapply ( roc , f u n c t i o n ( l l ) sum( l l $L [ l l $S== ’ 3 ’ ] ) ) ) / nsim ,
f p r =(p [ ,m]−tp [ ,m] ) / sum( sapply ( roc , f u n c t i o n ( l l ) sum( l l $L [ l l $S ! = ’ 3 ’ ] ) ) ) / nsim ) )
c a t( ’AUC f o r gain / 3 : ’ , aucg , ’ \n ’ , sep= ’ \ t ’ )
i f( CorS== ’C ’ ) {
a u c l<−sapply( rmethods , f u n c t i o n (m) callAUC (
t p r = l t p [ ,m] / sum( sapply ( roc , f u n c t i o n ( l l ) sum( l l $L [ l l $S== ’ 1 ’ ] ) ) ) / nsim , f p r =( l p [ ,m]−l t p [ ,m] ) / sum( sapply ( roc , f u n c t i o n ( l l ) sum( l l $L [ l l $S ! = ’ 1 ’ ] ) ) ) /
nsim ) )
c a t( ’AUC f o r l o s s / 1 : ’ , aucl , ’ \n ’ , sep= ’ \ t ’ ) } # o r d e r names f o r p l o t i n g rmethods<−s o r t( rmethods ) i f( CorS== ’C ’ ) { rmethods<−rmethods [ c ( 1 , 3 : 8 , 2 ) ] } i f( t o P l o t ) { # s e p e r a t e g a i n and l o s s
c o l o r s<−p a l e t t e( ) [ 1 : 8 ] ; c o l o r s [ 7 ]<−’ orange ’ # p l o t b a c k g r o u n d
p l o t( c ( 0 , 0 ) , c ( 1 , 1 ) , c o l = ’ white ’ , x l a b = ’ FPR ’ , y l a b= ’TPR ’ , xlim=c ( 0 , 1 ) , ylim=c ( 0 , 1 ) , main=main , cex . l a b =pcex , cex . a x i s =pcex , cex . main=pcex , cex . sub=pcex )
f o r( i i n seq _ along ( rmethods ) ) { m<−rmethods [ i ]
p o i n t s(
( p [ ,m]−tp [ ,m] ) / sum( sapply ( roc , f u n c t i o n ( l ) sum( l $L [ l $S ! = ’ 3 ’ ] ) ) ) / nsim , tp [ ,m] / sum( sapply ( roc , f u n c t i o n ( l ) sum( l $L [ l $S== ’ 3 ’ ] ) ) ) / nsim ,
c o l= c o l o r s [ i ] , cex=pcex , pch =17 , type= ’ b ’ )
i f( CorS== ’C ’ ) {
p o i n t s(
( l p [ ,m]−l t p [ ,m] ) / sum( sapply ( roc , f u n c t i o n ( l ) sum( l $L [ l $S ! = ’ 1 ’ ] ) ) ) / nsim ,
l t p [ ,m] / sum( sapply ( roc , f u n c t i o n ( l ) sum( l $L [ l $S== ’ 1 ’ ] ) ) ) / nsim ,
c o l= c o l o r s [ i ] , cex=pcex , pch =6 , type= ’ b ’ ) }
}
rmethods [ rmethods== ’ mine ’ ]<−’hsmm ’ gandl<−c( ’ gain ’ , ’ l o s s ’ )
i f( CorS== ’C ’ ) {
legend( ’ b o t t o m r i g h t ’ , cex =3 , ncol =2 ,
p a s t e 0 ( rep ( rmethods , t im es =2) , ’ . ’ , rep ( gandl , each=length ( rmethods ) ) ) ,
c o l= c o l o r s [ rep ( seq _ along ( rmethods ) , t im es =2) ] , pch=rep ( c ( 1 7 , 6 ) , each=length ( rmethods ) ) )
} e l s e {
legend( ’ b o t t o m r i g h t ’ , rmethods , c o l = c o l o r s [ seq _ along ( rmethods ) ] , pch =17 , cex=pcex ) } } } # c r e a t e a uc o b j f o r t e s t callAUC<−f u n c t i o n( tpr , f p r ) {
r o c o b j<−new( ’ r o c c ’ , sens=tpr , spec=1−fpr , c a s e L a b e l = " c a s e " , markerLabel= " marker " )
r e t u r n(AUC( r o c o b j ) ) }
# sim u n i v a r i a t e s e r i e s
simUvSegDat<−f u n c t i o n( n , j , param , seed=NULL) {
i f( ! i s . n u l l ( seed ) ) s e t. seed ( seed ) x<−switch( param$ type ,
norm = rnorm ( n , mean=param$mean [ j ] , sd=param$sd [ j ] ) ,
t = r t ( n , ncp=param$ncp [ j ] , df=param$ df [ j ] ) ,
p o i s = r p o i s ( n , lambda=param$lambda [ j ] ) ,
nbinom = rnbinom ( n , mu=param$mu[ j ] , s i z e =param$ s i z e [ j ] ) ) }
# b a t c h sim d a t a and s e g m e n t
simROCDataE<−f u n c t i o n( J =3 , nsim =4 , ngrid =2 , s o j , emis , er , seed =832314 , pool =20 , t o P l o t =FALSE , bioHMM=FALSE ) { # c h e c k J t o s e e i f i t c o n f o r m s w i t h s o j and e m i s
i f( ! i s . n u l l ( s o j ) ) {
i f( ! s o j $ type %i n% c ( ’gamma ’ , ’ p o i s ’ , ’ nbinom ’ ) ) {
stop( " s o j type not supported " ) }
}
i f( ! i s . n u l l ( emis ) ) {
i f( ! emis $ type %i n% c ( ’ norm ’ , ’ t ’ , ’ p o i s ’ , ’ nbinom ’ ) ) {
stop( " emis type not supported " ) }
paraLen<−sapply( emis , length )
i f( ! a l l ( paraLen [ names ( paraLen ) ! = ’ type ’ ]== J ) ) {
stop( " i n c o r r e c t l e n g t h f o r t h e emis parameter " ) }
}
# c r e a t e p o o l o f s e g m e n t l e n g t h f o r J s t a t e s
segLen<−sapply( seq _ l e n ( J ) , f u n c t i o n ( j ) simUvSegDat ( pool , j , s o j , seed=seed ) ) nnres<−lapply( seq _ l e n ( ngrid ) , f u n c t i o n ( g ) {
# draw s e g m e n t s , and p i l e up
s e l<−sample( seq _ l e n ( J ) , s i z e =pool , r e p l a c e =T ) S<−runValue ( Rle ( s e l ) )
n s e l<−length( S )
L<−segLen [ ( S−1) * pool +1: n s e l ] t r u e . cp<−cumsum( L )
t r u e . seg<−IRanges ( s t a r t =c ( 1 , t r u e . cp [−( length ( t r u e . cp ) ) ] + 1 ) , end= t r u e . cp ) t r u e . s t a t e<−rep( S , ti m es=L )
r o c<−l i s t( L=L , S=S , E=numeric ( ) , pdf= c h a r a c t e r ( ) , A= l i s t ( ) , ncp=data . frame ( matrix ( 0 , ncol =8 , nrow=nsim ) ) ,
t=data . frame ( matrix ( 0 , ncol =8 , nrow=nsim ) ) )
colnames( r o c [ [ ’ t ’ ] ] ) <−colnames( r o c [ [ ’ ncp ’ ] ] ) <−
c( ’ bcp ’ , ’biohmm ’ , ’ cbs ’ , ’ cghseg ’ , ’ glad ’ , ’ haarseg ’ , ’hmm’ , ’ mine ’ ) e r f<−seq _l e n ( J )−1
i f( emis $ type == ’ p o i s ’ ) { emis $lambda <−e r ^ e r f
} e l s e i f ( emis $ type == ’ norm ’ ) { emis $mean <− e r * ( e r f +1)
emis $sd <− rep( i f e l s e ( er >=1 , 1 , e r ) , J ) } e l s e i f ( emis $ type == ’ t ’ ) {
emis $ df <− rep( i f e l s e ( er >=1 , 1 , e r ) , J ) } e l s e {
stop( ’ emis $ type not supported y e t ! ’ ) }
f o r( n i n seq _ l e n ( nsim ) ) {
E<−u n l i s t( sapply ( seq _ along ( S ) ,
f u n c t i o n( i ) simUvSegDat ( L [ i ] , S [ i ] , emis ) ) ) r o c $E<−cbind( r o c $E , E )
i f( t o P l o t ) {
f i l e n a m e<−p a s t e 0 ( ’ sim . ’ , gsub ( ’ : ’ , ’−’ , date ( ) ) , ’ . pdf ’ ) pdf ( f i l e n a m e ) t s. p l o t ( E , type= ’ p ’ ) dev. o f f ( ) r o c $ pdf<−c( r o c $ pdf , f i l e n a m e ) } xx<−as. matrix ( E , ncol =1) s s x x<−s s x x # # biomvRhsmm
mine . t<−system. time (
mine . r e s<−biomvRhsmm ( x=xx , maxk=min ( 5 0 0 , nrow ( xx )−1) ,
emis . type=emis $ type , s o j . type= ’gamma ’ , p r i o r .m= ’ q u a n t i l e ’ ,
q. alpha = 0 . 0 5 , r . var = 0 . 7 5 , avg .m= ’mean ’ ) ) mine . r e s @ r e s<−s o r t( mine . r e s @ r e s )
mine . cp<−end( mine . r e s @ r e s ) r o c $ t [ n , ’ mine ’ ]<−mine . t [ 3 ]
r o c $ncp [ n , ’ mine ’ ]<−length( mine . cp )
rm( mine . t ) # # b c p
bcp . t<−system. time ( bcp . r e s<−bcp ( E ) )
bcp . cp<−cumsum( runLength ( Rle ( bcp . r e s $ p o s t e r i o r . mean ) ) ) r o c $ t [ n , ’ bcp ’ ]<−bcp . t [ 3 ]
r o c $ncp [ n , ’ bcp ’ ]<−length( bcp . cp )
rm( bcp . t )
# ## CBS − DNAcopy
cbs . o b j<−CNA( xx , maploc=ssxx , chrom= ’ s s e q ’ )
cbs . t<−system. time ( cbs . r e s<−DNAcopy : : segment ( cbs . o b j ) ) cbs . cp<−cbs . r e s $ output $ l o c . end r o c $ t [ n , ’ cbs ’ ]<−cbs . t [ 3 ] r o c $ncp [ n , ’ cbs ’ ]<−length( cbs . cp ) rm( cbs . obj , cbs . t ) # ## MAlist o b j − snapCGH ma . o b j<−l i s t( ) ma . o b j $ design<−1
ma . o b j $M<−xx
ma . o b j $ genes<−data. frame ( Chr= ’ s s e q ’ , P o s i t i o n =ssxx , S t a r t =ssxx , End= s s x x )
c l a s s(ma . o b j )<−’ MAList ’
# ## bioHMM− snapCGH , when n o t u s i n g d i s t a n c e , r e v e r t t o HMM
i f(bioHMM) {
biohmm . t<−system. time ( biohmm . r e s<−runBioHMM(ma . obj , u s e C l o n e D i s t s =T ) ) biohmm . cp<−cumsum( runLength ( Rle ( biohmm . r e s $ s t a t e ) ) )
r o c $ t [ n , ’biohmm ’ ]<−biohmm . t [ 3 ]
r o c $ncp [ n , ’biohmm ’ ]<−length( biohmm . cp )
rm( biohmm . t ) }
# ## HMM− snapCGH w r a p e r f o r aCGH
hmm. t<−system. time (hmm. r e s<−runHomHMM(ma . o b j ) ) hmm. cp<−cumsum( runLength ( Rle (hmm. r e s $ s t a t e ) ) )
r o c $ t [ n , ’hmm’ ]<−hmm. t [ 3 ]
r o c $ncp [ n , ’hmm’ ]<−length(hmm. cp )
rm(hmm. t , ma . o b j ) # ##GLAD− o r i g i n a l
profV<−data. frame ( PosOrder=ssxx , LogRatio=xx , PosBase=ssxx , Chromosome= ’ 9 9 9’ )
profileCGH<−l i s t( p r o f i l e V a l u e s = profV )
c l a s s( profileCGH ) <− " profileCGH "
glad . t<−system. time ( glad . r e s <− glad ( profileCGH ) ) glad . cp<−c( glad . r e s $ BkpInfo $ PosBase , sum( L ) ) r o c $ t [ n , ’ glad ’ ]<−glad . t [ 3 ]
r o c $ncp [ n , ’ glad ’ ]<−length( glad . cp )
rm( p r o f i l e V a l u e s , glad . t , profileCGH ) # ## m u l t i s e g − c g h s e g
cgh . o b j <− new( " CGHdata " ,Y=as . data . frame ( xx ) ) CGHo <− new( " CGHoptions " )
cghseg . t<−system. time ( cghseg . r e s<−m u l t i s e g ( cgh . obj ,CGHo) ) cghseg . cp<−cghseg . res@mu [ [ 1 ] ] [ , ’ end ’ ]
r o c $ t [ n , ’ cghseg ’ ]<−cghseg . t [ 3 ]
r o c $ncp [ n , ’ cghseg ’ ]<−length( cghseg . cp )
rm( cgh . obj , cghseg . t , CGHo) # ## h a a r s e g
haarseg . t<−system. time ( haarseg . r e s<−haarSeg ( E ) ) r o c $ t [ n , ’ haarseg ’ ]<−haarseg . t [ 3 ]
r o c $ncp [ n , ’ haarseg ’ ]<−nrow( haarseg . r e s $ SegmentsTable )
rm( haarseg . t )
r o c $A<−c( r o c $A, l i s t ( data . frame ( bcp=bcp . r e s $ p o s t e r i o r . mean , biohmm=biohmm . r e s $M. p r e d i c t e d ,
cbs=rep ( cbs . r e s $ output $ seg . mean , t im e s=cbs . r e s $ output $num . mark ) , cghseg=rep ( cghseg . res@mu [ [ 1 ] ] [ , ’mean ’ ] ,
t im es =( cghseg . res@mu [ [ 1 ] ] [ , ’ end ’ ]−cghseg . res@mu [ [ 1 ] ] [ , ’ begin ’ ] + 1 ) ) , haarseg=haarseg . r e s $Segmented ,
glad=glad . r e s $ p r o f i l e V a l u e s $ Smoothing , hmm=hmm. r e s $M. p r e d i c t e d ,
mine=rep ( as . numeric ( mcols ( mine . r e s @ r e s ) [ , ’AVG’ ] ) , t im es=width ( mine . r e s @ r e s ) )
) ) ) } e l s e {
r o c $A<−c( r o c $A, l i s t ( data . frame ( bcp=bcp . r e s $ p o s t e r i o r . mean ,
cbs=rep ( cbs . r e s $ output $ seg . mean , t im es=cbs . r e s $ output $num . mark ) , cghseg=rep ( cghseg . res@mu [ [ 1 ] ] [ , ’mean ’ ] ,
t im es =( cghseg . res@mu [ [ 1 ] ] [ , ’ end ’ ]−cghseg . res@mu [ [ 1 ] ] [ , ’ begin ’ ] + 1 ) ) , haarseg=haarseg . r e s $Segmented ,
glad=glad . r e s $ p r o f i l e V a l u e s $ Smoothing , hmm=hmm. r e s $M. p r e d i c t e d ,
mine=rep ( as . numeric ( mcols ( mine . r e s @ r e s ) [ , ’AVG’ ] ) , t im es=width ( mine . r e s @ r e s ) ) ) ) ) } c a t( ’ l a y o u t ’ , g , ’ s i m u l a t i o n ’ , n , ’ f i n i s h e d \n ’ ) } r e t u r n( r o c ) } ) r e t u r n( nnres ) } # ################################################# # m o d e l s c o m p a r i s i o n w i t h s i m u l a t e d d a t a # ################################################# l i b r a r y(DNAcopy) l i b r a r y( bcp ) l i b r a r y( cghseg ) l i b r a r y( biomvRCNS ) l i b r a r y(aCGH) l i b r a r y( HaarSeg ) l i b r a r y(GLAD) l i b r a r y( snapCGH ) l i b r a r y(ROC) seed<−8 3 2 3 1 4 nsim<−1 0 0; ngrid = 1 0 0 ;
# p o i s c o u n t s e q , s t a t e 3 o f i n t e r e s t e r s<−c( 1 . 5 , 1 . 7 5 , 2 )
s o j<−l i s t( type= ’ p o i s ’ , lambda=c ( 2 8 5 , 5 , 1 0 ) , s h i f t =c ( 0 , 0 , 0 ) ) emis<−l i s t( type= ’ p o i s ’ )
f o r( e r i n e r s ) {
r o c<−simROCDataE ( nsim=nsim , ngrid=ngrid , s o j = s o j , emis=emis , seed=seed , e r = e r )
recName<−p a s t e 0 ( ’ emis . ’ , emis $ type , ’ . r o c . nsim . ’ , nsim , ’ . ngrid . ’ , ngrid , ’ . e r . ’ , er , ’ . RData ’ )
save( roc , seed , nsim , ngrid , er , s o j , emis , f i l e =recName ) dirName<−p a s t e 0 ( ’ emis . ’ , emis $ type , ’ . r o c . nsim . ’ ,
nsim , ’ . ngrid . ’ , ngrid , ’ . e r . ’ , e r )