diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java new file mode 100644 index 000000000..bddf27d84 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.reference.ReferenceSequence; +import net.sf.samtools.SAMSequenceRecord; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RefWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.collections.ExpandingArrayList; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.StingException; + +import java.io.PrintStream; +import java.util.Collections; +import java.util.List; + +/** + * Prints out counts of the number of reference ordered data objects encountered. + * + * + *

Input

+ *

+ * One reference file only. And optionally -L intervals + *

+ * + *

Output

+ *

+ * If ok, nothing, else will throw an exception at the site where there's been a problem + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T QCRefWalker
+ * 
+ * + */ +public class QCRefWalker extends RefWalker { + @Output + public PrintStream out; + + String contigName = ""; + int contigStart, contigEnd; + IndexedFastaSequenceFile uncachedRef; + byte[] uncachedBases; + + @Override + public void initialize() { + super.initialize(); //To change body of overridden methods use File | Settings | File Templates. + uncachedRef = getToolkit().getReferenceDataSource().getReference(); + } + + private final void throwError(ReferenceContext ref, String message) { + throw new StingException(String.format("Site %s failed: %s", ref, message)); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + final String locusContigName = ref.getLocus().getContig(); + if ( ! locusContigName.equals(contigName) ) { + contigName = locusContigName; + ReferenceSequence refSeq = uncachedRef.getSequence(contigName); + contigStart = 1; + contigEnd = contigStart + refSeq.length(); + uncachedBases = uncachedRef.getSubsequenceAt(contigName, contigStart, contigEnd).getBases(); + logger.warn(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd)); + } + + final byte refBase = ref.getBase(); + if (! ( BaseUtils.isRegularBase(refBase) || BaseUtils.isNBase(refBase) ) ) + throwError(ref, String.format("Refbase isn't a regular base (%d %c)", refBase, (char)refBase)); + + // check bases are equal + final int pos = (int)context.getPosition() - contigStart; + if ( pos > contigEnd ) + throwError(ref, String.format("off contig (len=%d)", contigEnd)); + final byte uncachedBase = uncachedBases[pos]; + + if ( uncachedBase != refBase ) + throwError(ref, String.format("Provided refBase (%d %c) not equal to uncached one (%d %c)", + refBase, (char)refBase, uncachedBase, (char)uncachedBase)); + + return 1; + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer one, Integer sum) { + return one + sum; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java index 43ef4aa74..44b586bcd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -167,7 +167,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { if ( start < myCache.start || stop > myCache.stop || myCache.seq == null || myCache.seq.getContigIndex() != contigInfo.getSequenceIndex() ) { cacheMisses++; myCache.start = Math.max(start - cacheMissBackup, 0); - myCache.stop = Math.min(myCache.start + cacheSize, contigInfo.getSequenceLength()); + myCache.stop = Math.min(start + cacheSize + cacheMissBackup, contigInfo.getSequenceLength()); myCache.seq = super.getSubsequenceAt(contig, myCache.start, myCache.stop); //System.out.printf("New cache at %s %d-%d%n", contig, cacheStart, cacheStop); } else {