2009-07-02 22:54:01 +08:00
/ *
* Copyright ( c ) 2009 The Broad Institute
* Permission is hereby granted , free of charge , to any person
* obtaining a copy of this software and associated documentation
* files ( the "Software" ) , to deal in the Software without
* restriction , including without limitation the rights to use ,
* copy , modify , merge , publish , distribute , sublicense , and / or sell
* copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following
* conditions :
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software .
*
* THE SOFTWARE IS PROVIDED "AS IS" , WITHOUT WARRANTY OF ANY KIND ,
* EXPRESS OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT . IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER LIABILITY ,
* WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING
* FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE .
* /
2009-03-16 06:42:24 +08:00
package org.broadinstitute.sting.gatk.walkers ;
2009-08-05 05:01:37 +08:00
import org.broadinstitute.sting.gatk.contexts.AlignmentContext ;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext ;
2009-03-16 06:42:24 +08:00
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum ;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP ;
2009-04-04 03:54:54 +08:00
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker ;
2009-03-29 04:37:27 +08:00
import org.broadinstitute.sting.utils.cmdLine.Argument ;
2009-11-25 11:51:41 +08:00
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup ;
2009-05-22 06:25:37 +08:00
import org.broadinstitute.sting.utils.Utils ;
2009-03-16 06:42:24 +08:00
2009-07-22 05:58:27 +08:00
import java.util.ArrayList ;
2009-03-16 06:42:24 +08:00
/ * *
2009-05-22 06:25:37 +08:00
* samtools pileup [ - f in . ref . fasta ] [ - t in . ref_list ] [ - l in . site_list ] [ - iscg ] [ - T theta ] [ - N nHap ] [ - r pairDiffRate ] < in . alignment >
*
* Print the alignment in the pileup format . In the pileup format , each line represents a genomic position ,
* consisting of chromosome name , coordinate , reference base , read bases , read qualities and alignment mapping
* qualities . Information on match , mismatch , indel , strand , mapping quality and start and end of a read are all
* encoded at the read base column . At this column , a dot stands for a match to the reference base on the forward strand ,
2009-05-22 22:07:07 +08:00
* a comma for a match on the reverse strand , ' ACGTN ' for a mismatch on the forward strand and ' acgtn ' for a mismatch on the
2009-05-22 06:25:37 +08:00
* reverse strand .
*
2009-05-22 22:07:07 +08:00
* A pattern ' \ + [ 0 - 9 ] + [ ACGTNacgtn ] + ' indicates there is an insertion between this reference position and the next
2009-05-22 06:25:37 +08:00
* reference position . The length of the insertion is given by the integer in the pattern , followed by the inserted sequence .
2009-05-22 22:07:07 +08:00
* Similarly , a pattern ' - [ 0 - 9 ] + [ ACGTNacgtn ] + ' represents a deletion from the reference .
* Also at the read base column , a symbol '^' marks the start of a read segment which is a contiguous subsequence on the read
* separated by ' N / S / H ' CIGAR operations . The ASCII of the character following '^' minus 33 gives the mapping quality .
* A symbol '$' marks the end of a read segment .
2009-03-16 06:42:24 +08:00
* /
2009-05-02 05:40:46 +08:00
public class PileupWalker extends LocusWalker < Integer , Integer > implements TreeReducible < Integer > {
2009-05-22 06:25:37 +08:00
@Argument ( fullName = "alwaysShowSecondBase" , doc = "If true, prints dummy bases for the second bases in the BAM file where they are missing" , required = false )
public boolean alwaysShowSecondBase = false ;
2009-06-05 02:37:39 +08:00
@Argument ( fullName = "qualsAsInts" , doc = "If true, prints out qualities in the pileup as comma-separated integers" , required = false )
public boolean qualsAsInts = false ;
2009-04-22 06:27:26 +08:00
2009-07-02 22:54:01 +08:00
@Argument ( fullName = "ignore_uncovered_bases" , shortName = "skip_uncov" , doc = "Output nothing when a base is uncovered" )
public boolean IGNORE_UNCOVERED_BASES = false ;
2009-03-29 04:37:27 +08:00
2009-03-16 06:42:24 +08:00
public void initialize ( ) {
}
2009-08-05 05:01:37 +08:00
public Integer map ( RefMetaDataTracker tracker , ReferenceContext ref , AlignmentContext context ) {
ReadBackedPileup pileup = new ReadBackedPileup ( ref . getBase ( ) , context ) ;
2009-04-15 06:13:10 +08:00
2009-07-02 22:54:01 +08:00
String secondBasePileup = "" ;
if ( shouldShowSecondaryBasePileup ( pileup ) )
secondBasePileup = getSecondBasePileup ( pileup ) ;
String rods = getReferenceOrderedData ( tracker ) ;
2009-04-04 03:54:54 +08:00
2009-07-02 22:54:01 +08:00
out . printf ( "%s%s %s%n" , pileup . getPileupString ( qualsAsInts ) , secondBasePileup , rods ) ;
2009-03-16 06:42:24 +08:00
return 1 ;
}
// Given result of map function
public Integer reduceInit ( ) { return 0 ; }
public Integer reduce ( Integer value , Integer sum ) {
2009-05-07 07:26:21 +08:00
return treeReduce ( sum , value ) ;
2009-05-02 05:40:46 +08:00
}
public Integer treeReduce ( Integer lhs , Integer rhs ) {
return lhs + rhs ;
2009-03-16 06:42:24 +08:00
}
2009-07-02 22:54:01 +08:00
/ * *
* Should the secondary base be shown under all circumstances ?
* @param pileup The ReadBackedPileup at the current locus .
* @return True , if a secondary base pileup should always be shown .
* /
private boolean shouldShowSecondaryBasePileup ( ReadBackedPileup pileup ) {
2009-11-25 11:51:41 +08:00
return ( pileup . hasSecondaryBases ( ) | | alwaysShowSecondBase ) ;
2009-07-02 22:54:01 +08:00
}
/ * *
* Gets second base information for the pileup , if requested .
* @param pileup Pileup from which to extract secondary base info .
* @return String representation of the secondary base .
* /
private String getSecondBasePileup ( ReadBackedPileup pileup ) {
2009-11-25 11:51:41 +08:00
if ( pileup . hasSecondaryBases ( ) )
return " " + new String ( pileup . getSecondaryBases ( ) ) ;
2009-07-02 22:54:01 +08:00
else
2009-11-25 11:51:41 +08:00
return " " + Utils . dupString ( 'N' , pileup . size ( ) ) ;
2009-07-02 22:54:01 +08:00
}
/ * *
* Get a string representation the reference - ordered data .
* @param tracker Container for the reference - ordered data .
* @return String representation of the reference - ordered data .
* /
private String getReferenceOrderedData ( RefMetaDataTracker tracker ) {
2009-07-22 05:58:27 +08:00
ArrayList < String > rodStrings = new ArrayList < String > ( ) ;
2009-07-02 22:54:01 +08:00
for ( ReferenceOrderedDatum datum : tracker . getAllRods ( ) ) {
if ( datum ! = null & & ! ( datum instanceof rodDbSNP ) ) {
2009-07-22 05:58:27 +08:00
rodStrings . add ( datum . toSimpleString ( ) ) ;
2009-07-02 22:54:01 +08:00
}
}
2009-07-22 05:58:27 +08:00
String rodString = Utils . join ( ", " , rodStrings ) ;
2009-07-02 22:54:01 +08:00
rodDbSNP dbsnp = ( rodDbSNP ) tracker . lookup ( "dbSNP" , null ) ;
if ( dbsnp ! = null )
rodString + = dbsnp . toMediumString ( ) ;
if ( ! rodString . equals ( "" ) )
rodString = "[ROD: " + rodString + "]" ;
return rodString ;
}
2009-03-16 06:42:24 +08:00
}