• No results found

R code of segmentation data simulation and benchmarking

# ################################################# # h e l p e r f u n c t i o n s f o r t h e s i m u l a t i o n

# ################################################# reportROC<f u n c t i o n( roc , e r =1 , J =3 , nstep =100 ,

CorS= ’ S ’ , t o P l o t =T , main= ’SNR ’ , pcex =2) {

# CorS , c gh o r s e q u e n c i n g

i f( CorS== ’ S ’ ) {

s r s<seq(−1, e r ^( J−1) +2 , length . out=nstep ) } e l s e {

s r s<seq(−1, e r * ( J−1) +2 , length . out=nstep ) }

nsim<nrow( r o c [ [ 1 ] ] [ [ ’ t ’ ] ] ) ngrid<length( r o c )

methods<names( r o c [ [ 1 ] ] [ [ ’ t ’ ] ] ) # g e t run t i m e

c a t( ’ a l g o r i t h m names : ’ , methods , ’ \n ’ , sep= ’ \ t ’ )

avgt<colMeans ( do . c a l l ( rbind , do . c a l l ( rbind , r o c ) [ , ’ t ’ ] ) )

c a t( ’ average run time : ’ , avgt , ’ \n ’ , sep= ’ \ t ’ ) # g e t summary o f b r e a k p o i n t no .

p r i n t( apply ( do . c a l l ( rbind , do . c a l l ( rbind , r o c ) [ , ’ ncp ’ ] ) , 2 , summary ) ) # g e t e r r o r e s t i m a t e

segmse<apply(

do. c a l l ( rbind , lapply ( roc , f u n c t i o n ( l l ) l l $ncplength( l l $L ) ) ) , 2, f u n c t i o n ( x ) sum( x ^2) / nsim / ngrid )

c a t( ’MSE f o r no . s e g s : ’ , segmse , ’ \n ’ , sep= ’ \ t ’ ) segame<apply(

do. c a l l ( rbind , lapply ( roc , f u n c t i o n ( l l ) l l $ncplength( l l $L ) ) ) , 2, f u n c t i o n ( x ) sum( abs ( x ) ) / nsim / ngrid )

c a t( ’AME f o r no . s e g s : ’ , segame , ’ \n ’ , sep= ’ \ t ’ ) # c a l c p o s and t r u e p o s

rmethods<names( r o c [ [ 1 ] ] [ [ ’A ’ ] ] [ [ 1 ] ] ) tp<Reduce ( ’ + ’ , lapply ( roc ,

f u n c t i o n( l l ) sapply ( rmethods ,

f u n c t i o n(m) sapply ( seq _ l e n ( nstep ) ,

f u n c t i o n( i ) sum( sapply ( seq _ l e n ( nsim ) ,

f u n c t i o n( n ) l l $A[ [ n ] ] [ ,m] > s r s [ i ] & rep ( l l $S , ti me s= l l $L ) ==3) ) ) ) ) ) p<Reduce ( ’ + ’ , lapply ( roc ,

f u n c t i o n( l l ) sapply ( rmethods ,

f u n c t i o n(m) sapply ( seq _ l e n ( nstep ) ,

f u n c t i o n( i ) sum( sapply ( seq _ l e n ( nsim ) ,

f u n c t i o n( n ) l l $A[ [ n ] ] [ ,m] > s r s [ i ] ) ) ) ) ) )

i f( CorS== ’C ’ ) {

l t p<Reduce ( ’ + ’ , lapply ( roc ,

f u n c t i o n( l l ) sapply ( rmethods ,

f u n c t i o n(m) sapply ( seq _ l e n ( nstep ) ,

f u n c t i o n( i ) sum( sapply ( seq _ l e n ( nsim ) ,

f u n c t i o n( n ) l l $A[ [ n ] ] [ ,m] < s r s [ i ] & rep ( l l $S , t im es= l l $L ) ==1) ) ) ) ) ) l p<Reduce ( ’ + ’ , lapply ( roc ,

f u n c t i o n( l l ) sapply ( rmethods ,

f u n c t i o n(m) sapply ( seq _ l e n ( nstep ) ,

f u n c t i o n( i ) sum( sapply ( seq _ l e n ( nsim ) ,

f u n c t i o n( n ) l l $A[ [ n ] ] [ ,m] < s r s [ i ] ) ) ) ) ) ) }

# g e t AUC s t a t

c a t( ’ \nalgorithm runned : ’ , rmethods , ’ \n ’ , sep= ’ \ t ’ ) aucg<sapply( rmethods , f u n c t i o n (m) callAUC (

t p r =tp [ ,m] / sum( sapply ( roc , f u n c t i o n ( l l ) sum( l l $L [ l l $S== ’ 3 ’ ] ) ) ) / nsim ,

f p r =(p [ ,m]−tp [ ,m] ) / sum( sapply ( roc , f u n c t i o n ( l l ) sum( l l $L [ l l $S ! = ’ 3 ’ ] ) ) ) / nsim ) )

c a t( ’AUC f o r gain / 3 : ’ , aucg , ’ \n ’ , sep= ’ \ t ’ )

i f( CorS== ’C ’ ) {

a u c l<sapply( rmethods , f u n c t i o n (m) callAUC (

t p r = l t p [ ,m] / sum( sapply ( roc , f u n c t i o n ( l l ) sum( l l $L [ l l $S== ’ 1 ’ ] ) ) ) / nsim , f p r =( l p [ ,m]−l t p [ ,m] ) / sum( sapply ( roc , f u n c t i o n ( l l ) sum( l l $L [ l l $S ! = ’ 1 ’ ] ) ) ) /

nsim ) )

c a t( ’AUC f o r l o s s / 1 : ’ , aucl , ’ \n ’ , sep= ’ \ t ’ ) } # o r d e r names f o r p l o t i n g rmethods<s o r t( rmethods ) i f( CorS== ’C ’ ) { rmethods<rmethods [ c ( 1 , 3 : 8 , 2 ) ] } i f( t o P l o t ) { # s e p e r a t e g a i n and l o s s

c o l o r s<p a l e t t e( ) [ 1 : 8 ] ; c o l o r s [ 7 ]<−’ orange ’ # p l o t b a c k g r o u n d

p l o t( c ( 0 , 0 ) , c ( 1 , 1 ) , c o l = ’ white ’ , x l a b = ’ FPR ’ , y l a b= ’TPR ’ , xlim=c ( 0 , 1 ) , ylim=c ( 0 , 1 ) , main=main , cex . l a b =pcex , cex . a x i s =pcex , cex . main=pcex , cex . sub=pcex )

f o r( i i n seq _ along ( rmethods ) ) { m<−rmethods [ i ]

p o i n t s(

( p [ ,m]−tp [ ,m] ) / sum( sapply ( roc , f u n c t i o n ( l ) sum( l $L [ l $S ! = ’ 3 ’ ] ) ) ) / nsim , tp [ ,m] / sum( sapply ( roc , f u n c t i o n ( l ) sum( l $L [ l $S== ’ 3 ’ ] ) ) ) / nsim ,

c o l= c o l o r s [ i ] , cex=pcex , pch =17 , type= ’ b ’ )

i f( CorS== ’C ’ ) {

p o i n t s(

( l p [ ,m]−l t p [ ,m] ) / sum( sapply ( roc , f u n c t i o n ( l ) sum( l $L [ l $S ! = ’ 1 ’ ] ) ) ) / nsim ,

l t p [ ,m] / sum( sapply ( roc , f u n c t i o n ( l ) sum( l $L [ l $S== ’ 1 ’ ] ) ) ) / nsim ,

c o l= c o l o r s [ i ] , cex=pcex , pch =6 , type= ’ b ’ ) }

}

rmethods [ rmethods== ’ mine ’ ]<−’hsmm ’ gandl<c( ’ gain ’ , ’ l o s s ’ )

i f( CorS== ’C ’ ) {

legend( ’ b o t t o m r i g h t ’ , cex =3 , ncol =2 ,

p a s t e 0 ( rep ( rmethods , t im es =2) , ’ . ’ , rep ( gandl , each=length ( rmethods ) ) ) ,

c o l= c o l o r s [ rep ( seq _ along ( rmethods ) , t im es =2) ] , pch=rep ( c ( 1 7 , 6 ) , each=length ( rmethods ) ) )

} e l s e {

legend( ’ b o t t o m r i g h t ’ , rmethods , c o l = c o l o r s [ seq _ along ( rmethods ) ] , pch =17 , cex=pcex ) } } } # c r e a t e a uc o b j f o r t e s t callAUC<f u n c t i o n( tpr , f p r ) {

r o c o b j<new( ’ r o c c ’ , sens=tpr , spec=1−fpr , c a s e L a b e l = " c a s e " , markerLabel= " marker " )

r e t u r n(AUC( r o c o b j ) ) }

# sim u n i v a r i a t e s e r i e s

simUvSegDat<f u n c t i o n( n , j , param , seed=NULL) {

i f( ! i s . n u l l ( seed ) ) s e t. seed ( seed ) x<switch( param$ type ,

norm = rnorm ( n , mean=param$mean [ j ] , sd=param$sd [ j ] ) ,

t = r t ( n , ncp=param$ncp [ j ] , df=param$ df [ j ] ) ,

p o i s = r p o i s ( n , lambda=param$lambda [ j ] ) ,

nbinom = rnbinom ( n , mu=param$mu[ j ] , s i z e =param$ s i z e [ j ] ) ) }

# b a t c h sim d a t a and s e g m e n t

simROCDataE<f u n c t i o n( J =3 , nsim =4 , ngrid =2 , s o j , emis , er , seed =832314 , pool =20 , t o P l o t =FALSE , bioHMM=FALSE ) { # c h e c k J t o s e e i f i t c o n f o r m s w i t h s o j and e m i s

i f( ! i s . n u l l ( s o j ) ) {

i f( ! s o j $ type %i n% c ( ’gamma ’ , ’ p o i s ’ , ’ nbinom ’ ) ) {

stop( " s o j type not supported " ) }

}

i f( ! i s . n u l l ( emis ) ) {

i f( ! emis $ type %i n% c ( ’ norm ’ , ’ t ’ , ’ p o i s ’ , ’ nbinom ’ ) ) {

stop( " emis type not supported " ) }

paraLen<sapply( emis , length )

i f( ! a l l ( paraLen [ names ( paraLen ) ! = ’ type ’ ]== J ) ) {

stop( " i n c o r r e c t l e n g t h f o r t h e emis parameter " ) }

}

# c r e a t e p o o l o f s e g m e n t l e n g t h f o r J s t a t e s

segLen<sapply( seq _ l e n ( J ) , f u n c t i o n ( j ) simUvSegDat ( pool , j , s o j , seed=seed ) ) nnres<lapply( seq _ l e n ( ngrid ) , f u n c t i o n ( g ) {

# draw s e g m e n t s , and p i l e up

s e l<sample( seq _ l e n ( J ) , s i z e =pool , r e p l a c e =T ) S<−runValue ( Rle ( s e l ) )

n s e l<length( S )

L<−segLen [ ( S−1) * pool +1: n s e l ] t r u e . cp<cumsum( L )

t r u e . seg<IRanges ( s t a r t =c ( 1 , t r u e . cp [( length ( t r u e . cp ) ) ] + 1 ) , end= t r u e . cp ) t r u e . s t a t e<rep( S , ti m es=L )

r o c<l i s t( L=L , S=S , E=numeric ( ) , pdf= c h a r a c t e r ( ) , A= l i s t ( ) , ncp=data . frame ( matrix ( 0 , ncol =8 , nrow=nsim ) ) ,

t=data . frame ( matrix ( 0 , ncol =8 , nrow=nsim ) ) )

colnames( r o c [ [ ’ t ’ ] ] ) <colnames( r o c [ [ ’ ncp ’ ] ] ) <

c( ’ bcp ’ , ’biohmm ’ , ’ cbs ’ , ’ cghseg ’ , ’ glad ’ , ’ haarseg ’ , ’hmm’ , ’ mine ’ ) e r f<seq _l e n ( J )−1

i f( emis $ type == ’ p o i s ’ ) { emis $lambda <−e r ^ e r f

} e l s e i f ( emis $ type == ’ norm ’ ) { emis $mean <e r * ( e r f +1)

emis $sd <rep( i f e l s e ( er >=1 , 1 , e r ) , J ) } e l s e i f ( emis $ type == ’ t ’ ) {

emis $ df <rep( i f e l s e ( er >=1 , 1 , e r ) , J ) } e l s e {

stop( ’ emis $ type not supported y e t ! ’ ) }

f o r( n i n seq _ l e n ( nsim ) ) {

E<u n l i s t( sapply ( seq _ along ( S ) ,

f u n c t i o n( i ) simUvSegDat ( L [ i ] , S [ i ] , emis ) ) ) r o c $E<cbind( r o c $E , E )

i f( t o P l o t ) {

f i l e n a m e<p a s t e 0 ( ’ sim . ’ , gsub ( ’ : ’ , ’’ , date ( ) ) , ’ . pdf ’ ) pdf ( f i l e n a m e ) t s. p l o t ( E , type= ’ p ’ ) dev. o f f ( ) r o c $ pdf<c( r o c $ pdf , f i l e n a m e ) } xx<as. matrix ( E , ncol =1) s s x x<−s s x x # # biomvRhsmm

mine . t<system. time (

mine . r e s<biomvRhsmm ( x=xx , maxk=min ( 5 0 0 , nrow ( xx )−1) ,

emis . type=emis $ type , s o j . type= ’gamma ’ , p r i o r .m= ’ q u a n t i l e ’ ,

q. alpha = 0 . 0 5 , r . var = 0 . 7 5 , avg .m= ’mean ’ ) ) mine . r e s @ r e s<s o r t( mine . r e s @ r e s )

mine . cp<end( mine . r e s @ r e s ) r o c $ t [ n , ’ mine ’ ]<mine . t [ 3 ]

r o c $ncp [ n , ’ mine ’ ]<length( mine . cp )

rm( mine . t ) # # b c p

bcp . t<system. time ( bcp . r e s<−bcp ( E ) )

bcp . cp<cumsum( runLength ( Rle ( bcp . r e s $ p o s t e r i o r . mean ) ) ) r o c $ t [ n , ’ bcp ’ ]<bcp . t [ 3 ]

r o c $ncp [ n , ’ bcp ’ ]<length( bcp . cp )

rm( bcp . t )

# ## CBS − DNAcopy

cbs . o b j<−CNA( xx , maploc=ssxx , chrom= ’ s s e q ’ )

cbs . t<system. time ( cbs . r e s<−DNAcopy : : segment ( cbs . o b j ) ) cbs . cp<cbs . r e s $ output $ l o c . end r o c $ t [ n , ’ cbs ’ ]<cbs . t [ 3 ] r o c $ncp [ n , ’ cbs ’ ]<length( cbs . cp ) rm( cbs . obj , cbs . t ) # ## MAlist o b j − snapCGH ma . o b j<l i s t( ) ma . o b j $ design<−1

ma . o b j $M<−xx

ma . o b j $ genes<data. frame ( Chr= ’ s s e q ’ , P o s i t i o n =ssxx , S t a r t =ssxx , End= s s x x )

c l a s s(ma . o b j )<−’ MAList ’

# ## bioHMM− snapCGH , when n o t u s i n g d i s t a n c e , r e v e r t t o HMM

i f(bioHMM) {

biohmm . t<system. time ( biohmm . r e s<−runBioHMM(ma . obj , u s e C l o n e D i s t s =T ) ) biohmm . cp<cumsum( runLength ( Rle ( biohmm . r e s $ s t a t e ) ) )

r o c $ t [ n , ’biohmm ’ ]<biohmm . t [ 3 ]

r o c $ncp [ n , ’biohmm ’ ]<length( biohmm . cp )

rm( biohmm . t ) }

# ## HMM− snapCGH w r a p e r f o r aCGH

hmm. t<system. time (hmm. r e s<−runHomHMM(ma . o b j ) ) hmm. cp<cumsum( runLength ( Rle (hmm. r e s $ s t a t e ) ) )

r o c $ t [ n , ’hmm’ ]<hmm. t [ 3 ]

r o c $ncp [ n , ’hmm’ ]<length(hmm. cp )

rm(hmm. t , ma . o b j ) # ##GLAD− o r i g i n a l

profV<data. frame ( PosOrder=ssxx , LogRatio=xx , PosBase=ssxx , Chromosome= ’ 9 9 9’ )

profileCGH<l i s t( p r o f i l e V a l u e s = profV )

c l a s s( profileCGH ) <− " profileCGH "

glad . t<system. time ( glad . r e s <− glad ( profileCGH ) ) glad . cp<c( glad . r e s $ BkpInfo $ PosBase , sum( L ) ) r o c $ t [ n , ’ glad ’ ]<glad . t [ 3 ]

r o c $ncp [ n , ’ glad ’ ]<length( glad . cp )

rm( p r o f i l e V a l u e s , glad . t , profileCGH ) # ## m u l t i s e g − c g h s e g

cgh . o b j <new( " CGHdata " ,Y=as . data . frame ( xx ) ) CGHo <new( " CGHoptions " )

cghseg . t<system. time ( cghseg . r e s<−m u l t i s e g ( cgh . obj ,CGHo) ) cghseg . cp<−cghseg . res@mu [ [ 1 ] ] [ , ’ end ’ ]

r o c $ t [ n , ’ cghseg ’ ]<cghseg . t [ 3 ]

r o c $ncp [ n , ’ cghseg ’ ]<length( cghseg . cp )

rm( cgh . obj , cghseg . t , CGHo) # ## h a a r s e g

haarseg . t<system. time ( haarseg . r e s<−haarSeg ( E ) ) r o c $ t [ n , ’ haarseg ’ ]<haarseg . t [ 3 ]

r o c $ncp [ n , ’ haarseg ’ ]<nrow( haarseg . r e s $ SegmentsTable )

rm( haarseg . t )

r o c $A<c( r o c $A, l i s t ( data . frame ( bcp=bcp . r e s $ p o s t e r i o r . mean , biohmm=biohmm . r e s $M. p r e d i c t e d ,

cbs=rep ( cbs . r e s $ output $ seg . mean , t im e s=cbs . r e s $ output $num . mark ) , cghseg=rep ( cghseg . res@mu [ [ 1 ] ] [ , ’mean ’ ] ,

t im es =( cghseg . res@mu [ [ 1 ] ] [ , ’ end ’ ]−cghseg . res@mu [ [ 1 ] ] [ , ’ begin ’ ] + 1 ) ) , haarseg=haarseg . r e s $Segmented ,

glad=glad . r e s $ p r o f i l e V a l u e s $ Smoothing , hmm=hmm. r e s $M. p r e d i c t e d ,

mine=rep ( as . numeric ( mcols ( mine . r e s @ r e s ) [ , ’AVG’ ] ) , t im es=width ( mine . r e s @ r e s ) )

) ) ) } e l s e {

r o c $A<c( r o c $A, l i s t ( data . frame ( bcp=bcp . r e s $ p o s t e r i o r . mean ,

cbs=rep ( cbs . r e s $ output $ seg . mean , t im es=cbs . r e s $ output $num . mark ) , cghseg=rep ( cghseg . res@mu [ [ 1 ] ] [ , ’mean ’ ] ,

t im es =( cghseg . res@mu [ [ 1 ] ] [ , ’ end ’ ]−cghseg . res@mu [ [ 1 ] ] [ , ’ begin ’ ] + 1 ) ) , haarseg=haarseg . r e s $Segmented ,

glad=glad . r e s $ p r o f i l e V a l u e s $ Smoothing , hmm=hmm. r e s $M. p r e d i c t e d ,

mine=rep ( as . numeric ( mcols ( mine . r e s @ r e s ) [ , ’AVG’ ] ) , t im es=width ( mine . r e s @ r e s ) ) ) ) ) } c a t( ’ l a y o u t ’ , g , ’ s i m u l a t i o n ’ , n , ’ f i n i s h e d \n ’ ) } r e t u r n( r o c ) } ) r e t u r n( nnres ) } # ################################################# # m o d e l s c o m p a r i s i o n w i t h s i m u l a t e d d a t a # ################################################# l i b r a r y(DNAcopy) l i b r a r y( bcp ) l i b r a r y( cghseg ) l i b r a r y( biomvRCNS ) l i b r a r y(aCGH) l i b r a r y( HaarSeg ) l i b r a r y(GLAD) l i b r a r y( snapCGH ) l i b r a r y(ROC) seed<−8 3 2 3 1 4 nsim<−1 0 0; ngrid = 1 0 0 ;

# p o i s c o u n t s e q , s t a t e 3 o f i n t e r e s t e r s<c( 1 . 5 , 1 . 7 5 , 2 )

s o j<l i s t( type= ’ p o i s ’ , lambda=c ( 2 8 5 , 5 , 1 0 ) , s h i f t =c ( 0 , 0 , 0 ) ) emis<l i s t( type= ’ p o i s ’ )

f o r( e r i n e r s ) {

r o c<−simROCDataE ( nsim=nsim , ngrid=ngrid , s o j = s o j , emis=emis , seed=seed , e r = e r )

recName<p a s t e 0 ( ’ emis . ’ , emis $ type , ’ . r o c . nsim . ’ , nsim , ’ . ngrid . ’ , ngrid , ’ . e r . ’ , er , ’ . RData ’ )

save( roc , seed , nsim , ngrid , er , s o j , emis , f i l e =recName ) dirName<p a s t e 0 ( ’ emis . ’ , emis $ type , ’ . r o c . nsim . ’ ,

nsim , ’ . ngrid . ’ , ngrid , ’ . e r . ’ , e r )

Related documents