135 lines
6.1 KiB
Java
135 lines
6.1 KiB
Java
/*
|
|
* Copyright (c) 2009 The Broad Institute
|
|
* Permission is hereby granted, free of charge, to any person
|
|
* obtaining a copy of this software and associated documentation
|
|
* files (the "Software"), to deal in the Software without
|
|
* restriction, including without limitation the rights to use,
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following
|
|
* conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
package org.broadinstitute.sting.gatk.walkers;
|
|
|
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|
import org.broadinstitute.sting.utils.Utils;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
/**
|
|
* samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] <in.alignment>
|
|
*
|
|
* Print the alignment in the pileup format. In the pileup format, each line represents a genomic position,
|
|
* consisting of chromosome name, coordinate, reference base, read bases, read qualities and alignment mapping
|
|
* qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all
|
|
* encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand,
|
|
* a comma for a match on the reverse strand, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the
|
|
* reverse strand.
|
|
*
|
|
* A pattern '\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next
|
|
* reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence.
|
|
* Similarly, a pattern '-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference.
|
|
* Also at the read base column, a symbol '^' marks the start of a read segment which is a contiguous subsequence on the read
|
|
* separated by 'N/S/H' CIGAR operations. The ASCII of the character following '^' minus 33 gives the mapping quality.
|
|
* A symbol '$' marks the end of a read segment.
|
|
*/
|
|
public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
|
|
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
|
|
public boolean alwaysShowSecondBase = false;
|
|
|
|
@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
|
|
public boolean qualsAsInts = false;
|
|
|
|
@Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered")
|
|
public boolean IGNORE_UNCOVERED_BASES = false;
|
|
|
|
public void initialize() {
|
|
}
|
|
|
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
|
|
|
String secondBasePileup = "";
|
|
if(shouldShowSecondaryBasePileup(pileup))
|
|
secondBasePileup = getSecondBasePileup(pileup);
|
|
String rods = getReferenceOrderedData( tracker );
|
|
|
|
out.printf("%s%s %s%n", pileup.getPileupString(qualsAsInts), secondBasePileup, rods);
|
|
|
|
return 1;
|
|
}
|
|
|
|
// Given result of map function
|
|
public Integer reduceInit() { return 0; }
|
|
public Integer reduce(Integer value, Integer sum) {
|
|
return treeReduce(sum,value);
|
|
}
|
|
public Integer treeReduce(Integer lhs, Integer rhs) {
|
|
return lhs + rhs;
|
|
}
|
|
|
|
/**
|
|
* Should the secondary base be shown under all circumstances?
|
|
* @param pileup The ReadBackedPileup at the current locus.
|
|
* @return True, if a secondary base pileup should always be shown.
|
|
*/
|
|
private boolean shouldShowSecondaryBasePileup( ReadBackedPileup pileup ) {
|
|
return ( pileup.hasSecondaryBases() || alwaysShowSecondBase );
|
|
}
|
|
|
|
/**
|
|
* Gets second base information for the pileup, if requested.
|
|
* @param pileup Pileup from which to extract secondary base info.
|
|
* @return String representation of the secondary base.
|
|
*/
|
|
private String getSecondBasePileup( ReadBackedPileup pileup ) {
|
|
if( pileup.hasSecondaryBases() )
|
|
return " " + new String(pileup.getSecondaryBases());
|
|
else
|
|
return " " + Utils.dupString('N', pileup.size());
|
|
}
|
|
|
|
/**
|
|
* Get a string representation the reference-ordered data.
|
|
* @param tracker Container for the reference-ordered data.
|
|
* @return String representation of the reference-ordered data.
|
|
*/
|
|
private String getReferenceOrderedData( RefMetaDataTracker tracker ) {
|
|
ArrayList<String> rodStrings = new ArrayList<String>();
|
|
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) {
|
|
if ( datum != null && ! (datum instanceof rodDbSNP)) {
|
|
rodStrings.add(datum.toSimpleString());
|
|
}
|
|
}
|
|
String rodString = Utils.join(", ", rodStrings);
|
|
|
|
rodDbSNP dbsnp = (rodDbSNP)tracker.lookup("dbSNP", null);
|
|
if ( dbsnp != null )
|
|
rodString += dbsnp.toMediumString();
|
|
|
|
if ( !rodString.equals("") )
|
|
rodString = "[ROD: " + rodString + "]";
|
|
|
|
return rodString;
|
|
}
|
|
}
|