A downsampling validator. Compares the generated pileup passed in from the alignment context to the reads,
passed in as a Tribble SAM text feature. If the generated pileup contains a valid set of reads according to the downsampling rules, the test passes. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3421 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e9ee55d7dd
commit
a40e64e47b
|
|
@ -4,6 +4,7 @@ import net.sf.picard.filter.FilteringIterator;
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||||
|
|
@ -127,7 +128,7 @@ public abstract class LocusView extends LocusIterator implements View {
|
||||||
|
|
||||||
// Find the next.
|
// Find the next.
|
||||||
seedNextLocus();
|
seedNextLocus();
|
||||||
if( sourceInfo.getDownsamplingMethod().toCoverage != null )
|
if( sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && sourceInfo.getDownsamplingMethod().toCoverage != null )
|
||||||
current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage );
|
current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage );
|
||||||
|
|
||||||
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
|
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ import static org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileup
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> {
|
public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> {
|
||||||
// the number of tokens we expect to parse from a dbSNP line
|
// the number of tokens we expect to parse from a pileup line
|
||||||
private static final int expectedTokenCount = 10;
|
private static final int expectedTokenCount = 10;
|
||||||
private static final char fldDelim = '\t';
|
private static final char fldDelim = '\t';
|
||||||
|
|
||||||
|
|
@ -59,8 +59,7 @@ public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> {
|
||||||
* Return the # of header lines for this file.
|
* Return the # of header lines for this file.
|
||||||
*
|
*
|
||||||
* @param reader the line reader
|
* @param reader the line reader
|
||||||
* @return 0 in this case, we assume no header lines. The DbSNP file may have a
|
* @return 0 in this case, we assume no header lines.
|
||||||
* header line beginning with '#', but we can ignore that in the decode function.
|
|
||||||
*/
|
*/
|
||||||
public int readHeader(LineReader reader) {
|
public int readHeader(LineReader reader) {
|
||||||
// we don't require a header line, but it may exist. We'll deal with that above.
|
// we don't require a header line, but it may exist. We'll deal with that above.
|
||||||
|
|
@ -82,10 +81,9 @@ public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> {
|
||||||
"(expected = " + expectedTokenCount + ", saw = " + count + " on " +
|
"(expected = " + expectedTokenCount + ", saw = " + count + " on " +
|
||||||
"line = " + line + ")");
|
"line = " + line + ")");
|
||||||
|
|
||||||
SAMPileupFeature feature = new SAMPileupFeature(tokens[0],
|
SAMPileupFeature feature = new SAMPileupFeature();
|
||||||
0,
|
|
||||||
0);
|
|
||||||
|
|
||||||
|
feature.setChr(tokens[0]);
|
||||||
feature.setStart(Integer.parseInt(tokens[1]));
|
feature.setStart(Integer.parseInt(tokens[1]));
|
||||||
|
|
||||||
if(tokens[2].length() != 1)
|
if(tokens[2].length() != 1)
|
||||||
|
|
|
||||||
|
|
@ -59,19 +59,9 @@ public class SAMPileupFeature implements Feature {
|
||||||
private double variantScore = 0;
|
private double variantScore = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create the dbSNP feature, given the following information:
|
* create the pileup feature. Default protection so that only other classes in this package can create it.
|
||||||
*
|
|
||||||
* @param contig the contig rsID
|
|
||||||
* @param start the start position, one based
|
|
||||||
* @param stop the stop position, one based
|
|
||||||
*/
|
*/
|
||||||
SAMPileupFeature(String contig,
|
SAMPileupFeature() {}
|
||||||
int start,
|
|
||||||
int stop) {
|
|
||||||
this.contig = contig;
|
|
||||||
this.start = start;
|
|
||||||
this.stop = stop;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getChr() {
|
public String getChr() {
|
||||||
return contig;
|
return contig;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,113 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.refdata.features.samread;
|
||||||
|
|
||||||
|
import org.broad.tribble.FeatureCodec;
|
||||||
|
import org.broad.tribble.exception.CodecLineParsingException;
|
||||||
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
|
import org.broad.tribble.util.LineReader;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import net.sf.samtools.TextCigarCodec;
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decodes a simple SAM text string.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class SAMReadCodec implements FeatureCodec<SAMReadFeature> {
|
||||||
|
/* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */
|
||||||
|
|
||||||
|
// the number of tokens we expect to parse from a read line
|
||||||
|
private static final int expectedTokenCount = 11;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the # of header lines for this file.
|
||||||
|
*
|
||||||
|
* @param reader the line reader
|
||||||
|
* @return 0 in this case, we assume no header lines. The reads file may have a
|
||||||
|
* header line beginning with '@', but we can ignore that in the decode function.
|
||||||
|
*/
|
||||||
|
public int readHeader(LineReader reader) {
|
||||||
|
// we don't require a header line, but it may exist. We'll deal with that above.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode a single line in a SAM text file.
|
||||||
|
* @param line line to decode.
|
||||||
|
* @return A SAMReadFeature modeling that line.
|
||||||
|
*/
|
||||||
|
public SAMReadFeature decode(String line) {
|
||||||
|
// we may be asked to process a header line; ignore it
|
||||||
|
if (line.startsWith("@")) return null;
|
||||||
|
|
||||||
|
String[] tokens = new String[expectedTokenCount];
|
||||||
|
|
||||||
|
// split the line
|
||||||
|
int count = ParsingUtils.splitWhitespace(line,tokens);
|
||||||
|
|
||||||
|
// check to see if we've parsed the string into the right number of tokens (expectedTokenCount)
|
||||||
|
if (count != expectedTokenCount)
|
||||||
|
throw new CodecLineParsingException("the SAM read line didn't have the expected number of tokens " +
|
||||||
|
"(expected = " + expectedTokenCount + ", saw = " + count + " on " +
|
||||||
|
"line = " + line + ")");
|
||||||
|
|
||||||
|
final String readName = tokens[0];
|
||||||
|
final int flags = Integer.parseInt(tokens[1]);
|
||||||
|
final String contigName = tokens[2];
|
||||||
|
final int alignmentStart = Integer.parseInt(tokens[3]);
|
||||||
|
final int mapQ = Integer.parseInt(tokens[4]);
|
||||||
|
final String cigarString = tokens[5];
|
||||||
|
final String mateContigName = tokens[6];
|
||||||
|
final int mateAlignmentStart = Integer.parseInt(tokens[7]);
|
||||||
|
final int inferredInsertSize = Integer.parseInt(tokens[8]);
|
||||||
|
final byte[] bases = StringUtil.stringToBytes(tokens[9]);
|
||||||
|
final byte[] qualities = StringUtil.stringToBytes(tokens[10]);
|
||||||
|
|
||||||
|
// Infer the alignment end.
|
||||||
|
Cigar cigar = TextCigarCodec.getSingleton().decode(cigarString);
|
||||||
|
int alignmentEnd = alignmentStart + cigar.getReferenceLength() - 1;
|
||||||
|
|
||||||
|
// Remove printable character conversion from the qualities.
|
||||||
|
for(byte quality: qualities) quality -= 33;
|
||||||
|
|
||||||
|
return new SAMReadFeature(readName,
|
||||||
|
flags,
|
||||||
|
contigName,
|
||||||
|
alignmentStart,
|
||||||
|
alignmentEnd,
|
||||||
|
mapQ,
|
||||||
|
cigarString,
|
||||||
|
mateContigName,
|
||||||
|
mateAlignmentStart,
|
||||||
|
inferredInsertSize,
|
||||||
|
bases,
|
||||||
|
qualities);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,198 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.refdata.features.samread;
|
||||||
|
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a SAM record read from a SAM text format file.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class SAMReadFeature implements Feature {
|
||||||
|
/**
|
||||||
|
* Name of this read.
|
||||||
|
*/
|
||||||
|
private final String readName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flags associated with this read.
|
||||||
|
*/
|
||||||
|
private final int flags;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Contig to which this read is aligned.
|
||||||
|
*/
|
||||||
|
private final String contig;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Position on contig to which this read is aligned.
|
||||||
|
*/
|
||||||
|
private final int alignmentStart;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Position on contig at which this alignment ends.
|
||||||
|
*/
|
||||||
|
private final int alignmentEnd;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mapping quality for the read.
|
||||||
|
*/
|
||||||
|
private final int mapQ;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cigar string matching read to reference.
|
||||||
|
*/
|
||||||
|
private final String cigarString;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Contig to which this read's pair is aligned.
|
||||||
|
*/
|
||||||
|
private final String mateContig;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Position in contig to which this read's pair is aligned.
|
||||||
|
*/
|
||||||
|
private final int mateAlignmentStart;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Size between pairs.
|
||||||
|
*/
|
||||||
|
private final int insertSize;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bases in this read.
|
||||||
|
*/
|
||||||
|
private final byte[] bases;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Qualities constituting this read.
|
||||||
|
*/
|
||||||
|
private final byte[] qualities;
|
||||||
|
|
||||||
|
// Tags are not currently supported.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create the read feature. Default protection so that only other classes in this package can create it.
|
||||||
|
*/
|
||||||
|
SAMReadFeature(final String readName,
|
||||||
|
final int flags,
|
||||||
|
final String contig,
|
||||||
|
final int alignmentStart,
|
||||||
|
final int alignmentEnd,
|
||||||
|
final int mapQ,
|
||||||
|
final String cigarString,
|
||||||
|
final String mateContig,
|
||||||
|
final int mateAlignmentStart,
|
||||||
|
final int insertSize,
|
||||||
|
final byte[] bases,
|
||||||
|
final byte[] qualities) {
|
||||||
|
this.readName = readName;
|
||||||
|
this.flags = flags;
|
||||||
|
this.contig = contig;
|
||||||
|
this.alignmentStart = alignmentStart;
|
||||||
|
this.alignmentEnd = alignmentEnd;
|
||||||
|
this.mapQ = mapQ;
|
||||||
|
this.cigarString = cigarString;
|
||||||
|
this.mateContig = mateContig;
|
||||||
|
this.mateAlignmentStart = mateAlignmentStart;
|
||||||
|
this.insertSize = insertSize;
|
||||||
|
this.bases = bases;
|
||||||
|
this.qualities = qualities;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getReadName() {
|
||||||
|
return readName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getFlags() {
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getReferenceName() {
|
||||||
|
return contig;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getAlignmentStart() {
|
||||||
|
return alignmentStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getAlignmentEnd() {
|
||||||
|
return alignmentEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An alias for getReferenceName, required by Feature interface.
|
||||||
|
* @return Aligned contig name.
|
||||||
|
*/
|
||||||
|
public String getChr() {
|
||||||
|
return getReferenceName();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An alias for getAlignmentEnd(), required by Feature interface.
|
||||||
|
* @return End of alignment, inclusive.
|
||||||
|
*/
|
||||||
|
public int getStart() {
|
||||||
|
return getAlignmentStart();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An alias for getAlignmentStart(), required by Feature interface.
|
||||||
|
* @return Aligned position. 1-based.
|
||||||
|
*/
|
||||||
|
public int getEnd() {
|
||||||
|
return getAlignmentEnd();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMappingQuality() {
|
||||||
|
return mapQ;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCigarString() {
|
||||||
|
return cigarString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMateReferenceName() {
|
||||||
|
return mateContig;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMateAlignmentStart() {
|
||||||
|
return mateAlignmentStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getInferredInsertSize() {
|
||||||
|
return insertSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getReadBases() {
|
||||||
|
return bases;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getReadQualities() {
|
||||||
|
return qualities;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,107 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.features.samread.SAMReadFeature;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks a given downsampled pileup against the full pileup to ensure that the downsampled pileup could
|
||||||
|
* possibly be a valid version of the full pileup.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class DownsamplingValidationWalker extends LocusWalker<Integer,Long> {
|
||||||
|
@Argument(fullName="max_expected_number_of_reads",shortName="menr",doc="The expected number of reads chosed by the downsampler. Fewer than this number might be added to a given alignment start, but more than this should never be.",required=true)
|
||||||
|
private int maxExpectedNumberOfReads = 0;
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
ReadBackedPileup pileup = context.getBasePileup();
|
||||||
|
Collection<Object> allFeatures = tracker.getReferenceMetaData("reads");
|
||||||
|
|
||||||
|
Collection<SAMReadFeature> unsampledReadsStartingAtThisLocus = new ArrayList<SAMReadFeature>();
|
||||||
|
for(Object featureCandidate: allFeatures) {
|
||||||
|
if(featureCandidate instanceof SAMReadFeature) {
|
||||||
|
SAMReadFeature feature = (SAMReadFeature)featureCandidate;
|
||||||
|
if(feature.getReferenceName().equals(ref.getLocus().getContig()) && feature.getAlignmentStart() == ref.getLocus().getStart())
|
||||||
|
unsampledReadsStartingAtThisLocus.add(feature);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Collection<SAMRecord> sampledReadsStartingAtThisLocus = new ArrayList<SAMRecord>();
|
||||||
|
for(SAMRecord read: pileup.getReads()) {
|
||||||
|
if(read.getReferenceName().equals(ref.getLocus().getContig()) && read.getAlignmentStart() == ref.getLocus().getStart())
|
||||||
|
sampledReadsStartingAtThisLocus.add(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
int matchingReadsFound = 0;
|
||||||
|
if(unsampledReadsStartingAtThisLocus.isEmpty()) {
|
||||||
|
if(!sampledReadsStartingAtThisLocus.isEmpty())
|
||||||
|
throw new StingException("Downsampler hallucinated a read starting at locus "+ref.getLocus());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
boolean foundMatch = false;
|
||||||
|
for(SAMReadFeature unsampledRead: unsampledReadsStartingAtThisLocus) {
|
||||||
|
for(SAMRecord sampledRead: sampledReadsStartingAtThisLocus) {
|
||||||
|
if(unsampledRead.getReadName().equals(sampledRead.getReadName()) &&
|
||||||
|
Arrays.equals(unsampledRead.getReadBases(),sampledRead.getReadBases())) {
|
||||||
|
foundMatch = true;
|
||||||
|
matchingReadsFound++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!foundMatch)
|
||||||
|
throw new StingException("Downsampler failed to include any read starting at locus "+ref.getLocus());
|
||||||
|
|
||||||
|
if(matchingReadsFound > maxExpectedNumberOfReads)
|
||||||
|
throw new StingException("Downsampler found too many reads starting at locus "+ref.getLocus());
|
||||||
|
}
|
||||||
|
|
||||||
|
return matchingReadsFound;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given result of map function
|
||||||
|
public Long reduceInit() { return 0L; }
|
||||||
|
public Long reduce(Integer value, Long sum) {
|
||||||
|
return value + sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long treeReduce(Long lhs, Long rhs ) {
|
||||||
|
return lhs+rhs;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue