RealignerTargetCreator is officially live
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2697 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c1f154c31f
commit
476d6f3076
|
|
@ -1,137 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.AlignmentBlock;
|
||||
import net.sf.samtools.CigarElement;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Emits intervals consisting of indels from the aligned reads.
|
||||
*/
|
||||
@WalkerName("IndelIntervals")
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class})
|
||||
public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval, IndelIntervalWalker.Interval> {
|
||||
@Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false)
|
||||
boolean allow454 = false;
|
||||
@Argument(fullName="minIndelsPerInterval", shortName="minIndels", doc="min indels per interval", required=false)
|
||||
int minIntervalIndelCount = 1;
|
||||
|
||||
public void initialize() {}
|
||||
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
return ( !read.getReadUnmappedFlag() && // mapped
|
||||
read.getMappingQuality() != 0 && // positive mapping quality
|
||||
read.getCigarLength() > 1 && // indel
|
||||
(allow454 || !Utils.is454Read(read)) );
|
||||
}
|
||||
|
||||
public Interval map(char[] ref, SAMRecord read) {
|
||||
// List<AlignmentBlock> blocks = read.getAlignmentBlocks();
|
||||
// long indelLeftEdge = read.getAlignmentStart() + blocks.get(0).getLength() - 1;
|
||||
// long indelRightEdge = read.getAlignmentEnd() - blocks.get(blocks.size()-1).getLength() + 1;
|
||||
|
||||
long indelLeftEdge = -1;
|
||||
long indelRightEdge = -1;
|
||||
int lengthOnRef = 0; // length of the event on the reference (this preset value is right for insertion)
|
||||
int pos = read.getAlignmentStart();
|
||||
for ( final CigarElement ce : read.getCigar().getCigarElements() ) {
|
||||
switch( ce.getOperator() ) {
|
||||
case S :
|
||||
case H : break; // ignore clipping completely: alignment start is set to the first NON-clipped base
|
||||
case P : // skip padding and matching bases, advance position on the ref:
|
||||
case M : pos += ce.getLength(); break;
|
||||
case D :
|
||||
lengthOnRef = ce.getLength(); // deletion occupies non-zero number of bases on the ref
|
||||
case I :
|
||||
// for insertion we do not have to set lengthOnRef as it was already initialized to 0
|
||||
if ( indelLeftEdge == -1 ) { // it's the first indel in the current read
|
||||
indelLeftEdge = pos - 1; // set to the last (matching) base before the indel
|
||||
}
|
||||
pos += lengthOnRef;
|
||||
indelRightEdge = pos; // whether it is the first indel in the read or not, we set right edge immediately after it
|
||||
break;
|
||||
default :
|
||||
throw new StingException("Unrecognized cigar element '"+ce.getOperator()+"' in read "+read.getReadName());
|
||||
}
|
||||
}
|
||||
|
||||
// at this point indelLeftEdge == -1 or [indelLeftEdge,indelRightEdge] is the interval encompassing ALL indels in the current read:
|
||||
if ( indelLeftEdge == -1 ) return null;
|
||||
|
||||
// we've seen some bam files coming from other centers that have reads (and indels!!) hanging over the contig ends;
|
||||
// we do not want to deal with this stuff (what is it, anyway??)
|
||||
if ( indelRightEdge > GenomeLocParser.getContigInfo(read.getReferenceName()).getSequenceLength() ) {
|
||||
System.out.println("WARNING: read " + read.getReadName()+" contains indel(s) spanning past the contig end (read ignored).");
|
||||
return null;
|
||||
}
|
||||
|
||||
// if ( indelLeftEdge == 49563377 ) System.out.println("read: " +read.getReadName());
|
||||
GenomeLoc indelLoc = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(), indelLeftEdge, indelRightEdge);
|
||||
GenomeLoc refLoc = GenomeLocParser.createGenomeLoc(read);
|
||||
|
||||
// if ( indelLeftEdge == 10313124 || indelLeftEdge == 10313170 ) System.out.println("read: " +read.getReadName() + " ; " + refLoc + " ; " + indelLoc);
|
||||
|
||||
return new Interval(refLoc, indelLoc);
|
||||
}
|
||||
|
||||
public Interval reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Note that the reduce step assumes that reductions occur in order (i.e. no hierarchical reductions),
|
||||
// although this can easily be changed if necessary.
|
||||
public Interval reduce(Interval value, Interval sum) {
|
||||
// if there is no interval to the left, then this is the first one
|
||||
if ( sum == null )
|
||||
return value;
|
||||
|
||||
if ( value == null ) return sum; // nothing to do, wait for the next
|
||||
//System.out.println(sum + " ==> " + value);
|
||||
|
||||
// if the intervals don't overlap, print out the leftmost one and start a new one
|
||||
if ( !sum.readLoc.overlapsP(value.readLoc) ) {
|
||||
if ( sum.indelCount >= minIntervalIndelCount )
|
||||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
// otherwise, merge them
|
||||
return sum.merge(value);
|
||||
}
|
||||
|
||||
public void onTraversalDone(Interval result) {
|
||||
if ( result != null && result.indelCount >= minIntervalIndelCount )
|
||||
out.println(result);
|
||||
}
|
||||
|
||||
public class Interval {
|
||||
public GenomeLoc readLoc = null;
|
||||
public GenomeLoc indelLoc = null;
|
||||
public int indelCount;
|
||||
|
||||
Interval(GenomeLoc readLoc, GenomeLoc indelLoc) {
|
||||
this.indelLoc = indelLoc;
|
||||
this.readLoc = readLoc;
|
||||
indelCount = 1;
|
||||
}
|
||||
|
||||
public Interval merge(Interval i) {
|
||||
long indelLeftEdge = Math.min(this.indelLoc.getStart(), i.indelLoc.getStart());
|
||||
long indelRightEdge = Math.max(this.indelLoc.getStop(), i.indelLoc.getStop());
|
||||
GenomeLoc mergedIndelLoc = GenomeLocParser.createGenomeLoc(this.indelLoc.getContigIndex(), indelLeftEdge, indelRightEdge);
|
||||
Interval mergedInterval = new Interval(this.readLoc.merge(i.readLoc), mergedIndelLoc);
|
||||
mergedInterval.indelCount = this.indelCount + i.indelCount;
|
||||
return mergedInterval;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return indelLoc.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,114 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* Merges sets of intervals based on reads which overlap them; no two intervals in the final list
|
||||
* will have a read that even partially spans them both. A maximum interval size is enforced though.
|
||||
*/
|
||||
@WalkerName("IntervalMerger")
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class})
|
||||
public class IntervalMergerWalker extends ReadWalker<Integer,Integer> {
|
||||
|
||||
@Argument(fullName="intervalsToMerge", shortName="intervals", doc="Intervals to merge", required=true)
|
||||
List<String> intervalsSource = null;
|
||||
@Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false)
|
||||
boolean allow454 = false;
|
||||
@Argument(fullName="maxIntervalSize", shortName="maxInterval", doc="max interval size", required=false)
|
||||
int maxIntervalSize = 500;
|
||||
|
||||
private LinkedList<GenomeLoc> intervals;
|
||||
private GenomeLoc currentInterval = null;
|
||||
private boolean currentIntervalIsUsed = false;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
intervals = new LinkedList<GenomeLoc>(GenomeLocParser.parseIntervals(intervalsSource, GenomeAnalysisEngine.instance.getArguments().intervalMerging));
|
||||
currentInterval = (intervals.size() > 0 ? intervals.removeFirst() : null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
if ( currentInterval == null ||
|
||||
(!allow454 && Utils.is454Read(read)) )
|
||||
return 0;
|
||||
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
|
||||
|
||||
// ignore all intervals which we've passed
|
||||
while ( loc.isPast(currentInterval) ) {
|
||||
if ( currentIntervalIsUsed && (maxIntervalSize <= 0 || currentInterval.getStop() - currentInterval.getStart() <= maxIntervalSize) ) {
|
||||
out.println(currentInterval);
|
||||
currentIntervalIsUsed = false;
|
||||
}
|
||||
if ( intervals.size() > 0 ) {
|
||||
currentInterval = intervals.removeFirst();
|
||||
} else {
|
||||
currentInterval = null;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// if we're not yet in the current interval, we're done
|
||||
if ( !loc.overlapsP(currentInterval))
|
||||
return 0;
|
||||
|
||||
// at this point, we're in the current interval.
|
||||
// now we can merge any other intervals which we overlap
|
||||
currentIntervalIsUsed = true;
|
||||
while ( intervals.size() > 0 && loc.overlapsP(intervals.getFirst()) )
|
||||
currentInterval = GenomeLocParser.setStop(currentInterval, intervals.removeFirst().getStop());
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
@Override
|
||||
public Integer reduce( Integer value, Integer accum ) {
|
||||
return accum + value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone( Integer value ) {
|
||||
if ( currentInterval != null && currentIntervalIsUsed &&
|
||||
(maxIntervalSize <= 0 || currentInterval.getStop() - currentInterval.getStart() <= maxIntervalSize) )
|
||||
out.println(currentInterval);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,120 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Emits intervals consisting of nearby loci with high mismatch rates.
|
||||
*/
|
||||
@WalkerName("MismatchIntervals")
|
||||
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class})
|
||||
public class MismatchIntervalWalker extends LocusWalker<Pair<GenomeLoc, Boolean>, Pair<LinkedList<Boolean>, GenomeLoc>> {
|
||||
@Argument(fullName="windowSize", shortName="window", doc="window size for calculating entropy", required=false)
|
||||
int windowSize = 10;
|
||||
@Argument(fullName="mismatchFraction", shortName="mismatch", doc="fraction of mismatching base qualities threshold", required=false)
|
||||
double mismatchThreshold = 0.15;
|
||||
@Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false)
|
||||
boolean allow454 = false;
|
||||
|
||||
private final int minReadsAtInterval = 4;
|
||||
|
||||
public void initialize() {
|
||||
if ( windowSize < 1)
|
||||
throw new RuntimeException("Window Size must be a positive integer");
|
||||
}
|
||||
|
||||
public Pair<GenomeLoc, Boolean> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
char upperRef = Character.toUpperCase(ref.getBase());
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
int goodReads = 0, mismatchQualities = 0, totalQualities = 0;
|
||||
for (int i = 0; i < reads.size(); i++) {
|
||||
SAMRecord read = reads.get(i);
|
||||
if ( read.getMappingQuality() == 0 ||
|
||||
read.getAlignmentBlocks().size() > 1 ||
|
||||
(!allow454 && Utils.is454Read(read)) )
|
||||
continue;
|
||||
|
||||
goodReads++;
|
||||
int offset = offsets.get(i);
|
||||
int quality = (int)read.getBaseQualityString().charAt(offset) - 33;
|
||||
totalQualities += quality;
|
||||
|
||||
char base = Character.toUpperCase((char)read.getReadBases()[offset]);
|
||||
if ( base != upperRef )
|
||||
mismatchQualities += quality;
|
||||
}
|
||||
|
||||
boolean flag = false;
|
||||
if ( goodReads >= minReadsAtInterval && (double)mismatchQualities / (double)totalQualities > mismatchThreshold )
|
||||
flag = true;
|
||||
return new Pair<GenomeLoc, Boolean>(context.getLocation(), flag);
|
||||
}
|
||||
|
||||
public void onTraversalDone(Pair<LinkedList<Boolean>, GenomeLoc> sum) {
|
||||
if (sum.second != null)
|
||||
out.println(sum.second);
|
||||
}
|
||||
|
||||
public Pair<LinkedList<Boolean>, GenomeLoc> reduceInit() {
|
||||
return new Pair<LinkedList<Boolean>, GenomeLoc>(new LinkedList<Boolean>(), null);
|
||||
}
|
||||
|
||||
public Pair<LinkedList<Boolean>, GenomeLoc> reduce(Pair<GenomeLoc, Boolean> value, Pair<LinkedList<Boolean>, GenomeLoc> sum) {
|
||||
// if we hit a new contig, clear the list
|
||||
if ( sum.second != null && sum.second.getContigIndex() != value.first.getContigIndex() ) {
|
||||
out.println(sum.second);
|
||||
sum.first.clear();
|
||||
sum.second = null;
|
||||
}
|
||||
|
||||
sum.first.addLast(value.second);
|
||||
if ( sum.first.size() <= windowSize )
|
||||
return sum;
|
||||
|
||||
sum.first.remove();
|
||||
if ( !value.second )
|
||||
return sum;
|
||||
|
||||
int mismatches = 0;
|
||||
int firstMismatch = -1;
|
||||
for (int i = 0; i < windowSize; i++) {
|
||||
if ( sum.first.get(i) ) {
|
||||
mismatches++;
|
||||
if ( firstMismatch == -1 )
|
||||
firstMismatch = i;
|
||||
}
|
||||
}
|
||||
|
||||
if ( mismatches > 1 ) {
|
||||
// if there is no interval to the left, then this is the first one
|
||||
if ( sum.second == null ) {
|
||||
sum.second = value.first;
|
||||
sum.second = GenomeLocParser.setStart(sum.second, sum.second.getStart() - windowSize + firstMismatch + 1);
|
||||
}
|
||||
// if the intervals don't overlap, print out the leftmost one and start a new one
|
||||
else if ( value.first.getStop() - sum.second.getStop() > windowSize ) {
|
||||
out.println(sum.second);
|
||||
sum.second = value.first;
|
||||
sum.second = GenomeLocParser.setStart(sum.second,sum.second.getStart() - windowSize + firstMismatch + 1);
|
||||
}
|
||||
// otherwise, merge them
|
||||
else {
|
||||
sum.second = GenomeLocParser.setStop(sum.second, value.first.getStop());
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
|
@ -15,30 +15,24 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads.
|
||||
* Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 and MQ0 reads.
|
||||
*/
|
||||
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class})
|
||||
public class RealignerTargetCreator extends LocusWalker<RealignerTargetCreator.Event, RealignerTargetCreator.Event> {
|
||||
|
||||
// mismatch/entropy arguments
|
||||
// mismatch/entropy/SNP arguments
|
||||
@Argument(fullName="windowSize", shortName="window", doc="window size for calculating entropy or SNP clusters", required=false)
|
||||
protected int windowSize = 10;
|
||||
|
||||
@Argument(fullName="mismatchFraction", shortName="mismatch", doc="fraction of base qualities needing to mismatch for a position to have high entropy; to disable set to <= 0 or > 1", required=false)
|
||||
protected double mismatchThreshold = 0.15;
|
||||
|
||||
|
||||
// observed indels arguments
|
||||
@Argument(fullName="minIndelsPerInterval", shortName="minIndels", doc="min indels per interval", required=false)
|
||||
int minIntervalIndelCount = 1;
|
||||
|
||||
@Argument(fullName="minReadsAtLocus", shortName="minReads", doc="minimum reads at a locus to enable using the entropy calculation", required=false)
|
||||
protected int minReadsAtLocus = 4;
|
||||
|
||||
// interval merging arguments
|
||||
@Argument(fullName="maxIntervalSize", shortName="maxInterval", doc="max interval size", required=false)
|
||||
int maxIntervalSize = 500;
|
||||
|
||||
|
||||
private final int minReadsAtInterval = 4;
|
||||
@Argument(fullName="maxIntervalSize", shortName="maxInterval", doc="maximum interval size", required=false)
|
||||
protected int maxIntervalSize = 500;
|
||||
|
||||
@Override
|
||||
public boolean generateExtendedEvents() { return true; }
|
||||
|
|
@ -115,7 +109,7 @@ public class RealignerTargetCreator extends LocusWalker<RealignerTargetCreator.E
|
|||
// make sure we're supposed to look for high entropy
|
||||
if ( mismatchThreshold > 0.0 &&
|
||||
mismatchThreshold <= 1.0 &&
|
||||
pileup.size() >= minReadsAtInterval &&
|
||||
pileup.size() >= minReadsAtLocus &&
|
||||
(double)mismatchQualities / (double)totalQualities >= mismatchThreshold )
|
||||
hasPointEvent = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,68 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariationRod;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
/**
|
||||
* Given a ROD track SNP calls called "snps", emits intervals consisting of clustered SNPs.
|
||||
*/
|
||||
@WalkerName("SNPClusters")
|
||||
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="snps",type= VariationRod.class)})
|
||||
public class SNPClusterWalker extends RefWalker<GenomeLoc, GenomeLoc> {
|
||||
@Argument(fullName="windowSize", shortName="window", doc="window size for calculating clusters", required=false)
|
||||
int windowSize = 10;
|
||||
|
||||
public void initialize() {
|
||||
if ( windowSize < 1)
|
||||
throw new RuntimeException("Window Size must be a positive integer");
|
||||
}
|
||||
|
||||
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
Variation snp = (Variation)tracker.lookup("snps", null);
|
||||
return (snp != null && snp.isBiallelic() && snp.isSNP()) ? context.getLocation() : null;
|
||||
}
|
||||
|
||||
public void onTraversalDone(GenomeLoc sum) {
|
||||
if ( sum != null && sum.getStart() != sum.getStop() )
|
||||
out.println(sum);
|
||||
}
|
||||
|
||||
public GenomeLoc reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
|
||||
// ignore non-SNP variants
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
// if we have no previous SNPs start with the new location
|
||||
if ( sum == null )
|
||||
return value;
|
||||
|
||||
// if we hit a new contig, emit and start with the new location
|
||||
if ( sum.getContigIndex() != value.getContigIndex() ) {
|
||||
if ( sum.getStart() != sum.getStop() )
|
||||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
|
||||
// if the last SNP location was within a window, merge them
|
||||
if ( value.getStart() - sum.getStop() <= windowSize ) {
|
||||
sum = GenomeLocParser.setStop(sum,value.getStart());
|
||||
return sum;
|
||||
}
|
||||
|
||||
// otherwise, emit and start with the new location
|
||||
if ( sum.getStart() != sum.getStop() )
|
||||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
|
@ -10,22 +10,15 @@ public class IntervalsIntegrationTest extends WalkerTest {
|
|||
public void testIntervals() {
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
"-T IndelIntervals -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -o %s",
|
||||
"-T RealignerTargetCreator -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
|
||||
1,
|
||||
Arrays.asList("76f97b91921f427ab639b6b8228ac4dc"));
|
||||
executeTest("testIndelIntervals", spec1);
|
||||
Arrays.asList("d21e83a8b0d3f63acd9ca3b0b636e515"));
|
||||
executeTest("test standard", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
"-T MismatchIntervals -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -o %s",
|
||||
"-T RealignerTargetCreator -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
|
||||
1,
|
||||
Arrays.asList("31e8b5d4c42f2c63c08b8f6b8e10ac99"));
|
||||
executeTest("testMismatchIntervals", spec2);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
"-T IntervalMerger -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -intervals " + validationDataLocation + "indelIntervals.test -intervals " + validationDataLocation + "mismatchIntervals.test -o %s",
|
||||
1,
|
||||
Arrays.asList("bf1f23667ef0065bbcb9754f50c2d664"));
|
||||
executeTest("testMergeIntervals", spec3);
|
||||
|
||||
Arrays.asList("bfccfa50f62d10ee2fe8cfa68fb70002"));
|
||||
executeTest("test dbsnp", spec2);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue