gatk-3.8/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java

171 lines
6.5 KiB
Java
Raw Normal View History

package org.broadinstitute.sting.alignment;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
import org.broadinstitute.sting.alignment.bwa.c.BWAPath;
import net.sf.samtools.SAMRecord;
import java.util.Iterator;
import java.util.SortedMap;
import java.util.TreeMap;
import java.io.PrintWriter;
import java.io.FileNotFoundException;
/**
* Validates alignments against existing reads.
*
* @author mhanna
* @version 0.1
*/
public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
/**
* The supporting BWT index generated using BWT.
*/
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
//SortedMap<Integer,Integer> alignmentCounts = new TreeMap<Integer,Integer>();
/**
* The instance used to generate alignments.
*/
private BWACAligner aligner = null;
/**
* Number of reads processed.
*/
private int count = 0;
/**
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
*/
@Override
public void initialize() {
BWACConfiguration configuration = new BWACConfiguration(prefix);
aligner = new BWACAligner(configuration);
}
/** Must return true for reads that need to be processed. Reads, for which this method return false will
* be skipped by the engine and never passed to the walker.
*/
@Override
public boolean filter(char[] ref, SAMRecord read) {
return true;
//return read.getReadName().contains("SL-XBC:1:13:1296:1410#0");
}
/**
* Aligns a read to the given reference.
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
* @param read Read to align.
* @return Number of reads aligned by this map (aka 1).
*/
@Override
public Integer map(char[] ref, SAMRecord read) {
//logger.info(String.format("examining read %s", read.getReadName()));
byte[] bases = read.getReadBases();
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
boolean matches = true;
Iterator<Alignment[]> alignments = aligner.getAlignments(bases);
/*
BWAPath[] paths = aligner.getPaths(bases);
int bestCount = 0;
if(paths.length > 0) bestCount = paths[0].bestCount;
if(alignmentCounts.containsKey(bestCount))
alignmentCounts.put(bestCount,alignmentCounts.get(bestCount)+1);
else
alignmentCounts.put(bestCount,1);
*/
if(!alignments.hasNext()) {
matches = read.getReadUnmappedFlag();
}
else {
Alignment[] alignmentsOfBestQuality = alignments.next();
for(Alignment alignment: alignmentsOfBestQuality) {
matches = (alignment.getContigIndex() == read.getReferenceIndex());
matches &= (alignment.getAlignmentStart() == read.getAlignmentStart());
matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag());
matches &= (alignment.getCigar().equals(read.getCigar()));
matches &= (alignment.getMappingQuality() == read.getMappingQuality());
if(matches) break;
}
}
if(!matches) {
logger.error("Found mismatch!");
logger.error(String.format("Read %s:",read.getReadName()));
logger.error(String.format(" Contig index: %d",read.getReferenceIndex()));
logger.error(String.format(" Alignment start: %d", read.getAlignmentStart()));
logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag()));
logger.error(String.format(" Cigar: %s%n", read.getCigarString()));
logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality()));
Alignment[] allAlignments = aligner.getAllAlignments(bases);
for(int i = 0; i < allAlignments.length; i++) {
logger.error(String.format("Alignment %d:",i));
logger.error(String.format(" Contig index: %d",allAlignments[i].getContigIndex()));
logger.error(String.format(" Alignment start: %d", allAlignments[i].getAlignmentStart()));
logger.error(String.format(" Negative strand: %b", allAlignments[i].isNegativeStrand()));
logger.error(String.format(" Cigar: %s", allAlignments[i].getCigarString()));
logger.error(String.format(" Mapping quality: %s%n", allAlignments[i].getMappingQuality()));
}
throw new StingException(String.format("Read %s mismatches!", read.getReadName()));
}
if(++count % 10000 == 0) {
logger.info(String.format("Processed %d reads", count));
}
//logger.info(String.format("validated read %s", read.getReadName()));
return 1;
}
/**
* Initial value for reduce. In this case, validated reads will be counted.
* @return 0, indicating no reads yet validated.
*/
@Override
public Integer reduceInit() { return 0; }
/**
* Calculates the number of reads processed.
* @param value Number of reads processed by this map.
* @param sum Number of reads processed before this map.
* @return Number of reads processed up to and including this map.
*/
@Override
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
/**
* Cleanup.
* @param result Number of reads processed.
*/
@Override
public void onTraversalDone(Integer result) {
aligner.close();
super.onTraversalDone(result);
/*
try {
PrintWriter output = new PrintWriter("GATK_distribution.out");
for(SortedMap.Entry<Integer,Integer> results: alignmentCounts.entrySet())
output.printf("%d\t%d%n", results.getKey(), results.getValue());
output.close();
}
catch(FileNotFoundException ex) {
throw new StingException("Unable to open output file");
}
*/
}
}