Rev'ing Picard, which includes the update to get all the reads in the query region (GSA-173). With it come a bunch of fixes, including retiring the FourBaseRecaller code, and updated md5 for some walker tests.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1751 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-30 20:37:59 +00:00
parent 303972aa4b
commit 2e4949c4d6
17 changed files with 161 additions and 404 deletions

View File

@ -1,13 +1,12 @@
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import java.util.List;
import java.util.Arrays;
import net.sf.picard.util.SequenceUtil;
import java.util.List;
/**
* ReferenceOrderedDatum class to hold HapMap AlleleFrequency Data

View File

@ -1,8 +1,10 @@
package org.broadinstitute.sting.gatk.refdata;
import net.sf.picard.util.SequenceUtil;
import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import java.util.ArrayList;
import java.util.Arrays;

View File

@ -163,9 +163,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
while ( iter.hasNext() )
writer.addAlignment(iter.next().getRecord());
}
if ( writer != null ) {
writer.close();
}
if ( OUT_INDELS != null ) {
try {
indelOutput.close();

View File

@ -25,21 +25,21 @@
package org.broadinstitute.sting.gatk.walkers.recalibration;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.*;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.File;
import java.io.FileNotFoundException;
import java.lang.reflect.Method;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@WalkerName("TableRecalibration")
@Requires({DataSource.READS}) // , DataSource.REFERENCE})
@ -320,17 +320,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
return recalQuals;
}
// --------------------------------------------------------------------------------------------------------------
//
// Standard I/O routines
//
// --------------------------------------------------------------------------------------------------------------
public void onTraversalDone(SAMFileWriter output) {
if ( output != null ) {
output.close();
}
}
public SAMFileWriter reduceInit() {
return outputBam;
}

View File

@ -1,242 +0,0 @@
package org.broadinstitute.sting.secondarybase;
import edu.mit.broad.picard.illumina.AbstractBustardFileParser;
import edu.mit.broad.picard.illumina.BustardFileParser;
import edu.mit.broad.picard.illumina.BustardFileParser_1_1;
import edu.mit.broad.picard.illumina.BustardReadData;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
public class FourBaseRecaller extends CommandLineProgram {
public static FourBaseRecaller Instance = null;
@Argument(fullName="dir", shortName="D", doc="Illumina Bustard directory")
public File DIR;
@Argument(fullName="lane", shortName="L", doc="Illumina flowcell lane")
public int LANE;
@Argument(fullName="run_barcode", shortName="B", doc="Illumina Run Barcode (e.g. 305PJAAXX080716)")
public String RUN_BARCODE;
@Argument(fullName="out", shortName="O", doc="Output path for sam file")
public File OUT;
@Argument(fullName="end", shortName="E", doc="End of read to process (0 = whole read, i.e. unpaired; 1 = first end; 2 = second end)", required=false)
public int END = 0;
@Argument(fullName="tlim", shortName="T", doc="Number of reads to use for parameter initialization", required=false)
public int TRAINING_LIMIT = 1000000000;
@Argument(fullName="clim", shortName="C", doc="Number of reads to basecall", required=false)
public int CALLING_LIMIT = 1000000000;
@Argument(fullName="raw", shortName="R", doc="Use raw intensities?", required=false)
public Boolean RAW = false;
@Argument(fullName="old", shortName="1", doc="Old Bustard 1.1 mode?", required=false)
public Boolean OLD = false;
@Argument(fullName="parse_sig2", shortName="P", doc="Parse sig2 files for Bustard pre-1.1 files (pre-1.1 only)")
public Boolean PARSE_SIG2_FILES = false;
@Argument(fullName="barcode", shortName="BCODE", doc="Only process reads that match this barcode (pre-1.1 only)")
public String BARCODE = null;
@Argument(fullName="barcode_cycle", shortName="BCYCLE", doc="1-based cycle number where barcode starts (pre-1.1 only)")
public Integer BARCODE_CYCLE = null;
@Argument(fullName="first_read_length", shortName="F", doc="First read length (v1.1 only)")
public Integer FIRST_READ_LENGTH;
public static void main(String[] argv) {
Instance = new FourBaseRecaller();
start(Instance, argv);
}
protected int execute() {
boolean isPaired = (END > 0);
// Set up debugging paths
File debugdir = new File(OUT.getPath() + ".debug/");
debugdir.mkdir();
PrintWriter debugout = null;
try {
debugout = new PrintWriter(debugdir.getPath() + "/debug.out");
} catch (IOException e) {
}
AbstractBustardFileParser bfp;
BustardReadData bread;
FirecrestFileParser ffp;
FirecrestReadData fread;
bfp = OLD ? new BustardFileParser_1_1(DIR, LANE, isPaired, RUN_BARCODE, FIRST_READ_LENGTH) : new BustardFileParser(DIR, LANE, isPaired, RUN_BARCODE, PARSE_SIG2_FILES, BARCODE, BARCODE_CYCLE);
bread = bfp.next();
ffp = new FirecrestFileParser(DIR.getParentFile(), LANE);
fread = ffp.next();
int cycle_offset = (END <= 1) ? 0 : bread.getIntensities().length/2;
BasecallingReadModel model = new BasecallingReadModel(bread.getFirstReadSequence().length());
int queryid;
// learn mean parameters
System.err.println("Computing mean parameters...");
queryid = 0;
do {
assert(bread.getXCoordinate() == fread.getXCoordinate() && bread.getYCoordinate() == fread.getYCoordinate());
String bases = (END <= 1) ? bread.getFirstReadSequence() : bread.getSecondReadSequence();
byte[] quals = (END <= 1) ? bread.getFirstReadPhredBinaryQualities() : bread.getSecondReadPhredBinaryQualities();
double[][] intensities = bread.getIntensities();
double[][] rawintensities = fread.getIntensities();
for (int cycle = 0; cycle < bases.length(); cycle++) {
char baseCur = bases.charAt(cycle);
byte qualCur = quals[cycle];
//double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]);
double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
//model.addMeanPoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity);
model.addMeanPoint(cycle,
model.getBaseProbabilityMatrix(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1),
baseCur,
QualityUtils.qualToProb(qualCur)),
fourintensity);
}
queryid++;
} while (queryid < TRAINING_LIMIT && bfp.hasNext() && (bread = bfp.next()) != null && (fread = ffp.next()) != null);
// learn covariance parameters
System.err.println("Computing covariance parameters...");
bfp = OLD ? new BustardFileParser_1_1(DIR, LANE, isPaired, RUN_BARCODE, FIRST_READ_LENGTH) : new BustardFileParser(DIR, LANE, isPaired, RUN_BARCODE, PARSE_SIG2_FILES, BARCODE, BARCODE_CYCLE);
bread = bfp.next();
ffp = new FirecrestFileParser(DIR.getParentFile(), LANE);
fread = ffp.next();
queryid = 0;
do {
assert(bread.getXCoordinate() == fread.getXCoordinate() && bread.getYCoordinate() == fread.getYCoordinate());
String bases = (END <= 1) ? bread.getFirstReadSequence() : bread.getSecondReadSequence();
byte[] quals = (END <= 1) ? bread.getFirstReadPhredBinaryQualities() : bread.getSecondReadPhredBinaryQualities();
double[][] intensities = bread.getIntensities();
double[][] rawintensities = fread.getIntensities();
for (int cycle = 0; cycle < bases.length(); cycle++) {
char baseCur = bases.charAt(cycle);
byte qualCur = quals[cycle];
double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
model.addCovariancePoint(cycle,
model.getBaseProbabilityMatrix(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1),
baseCur,
QualityUtils.qualToProb(qualCur)),
fourintensity);
}
queryid++;
} while (queryid < TRAINING_LIMIT && bfp.hasNext() && (bread = bfp.next()) != null && (fread = ffp.next()) != null);
// write model parameters
model.write(debugdir);
// call bases
System.err.println("Calling bases...");
SAMFileHeader sfh = new SAMFileHeader();
SAMFileWriter sfw = new SAMFileWriterFactory().makeSAMOrBAMWriter(sfh, false, OUT);
bfp = OLD ? new BustardFileParser_1_1(DIR, LANE, isPaired, RUN_BARCODE, FIRST_READ_LENGTH) : new BustardFileParser(DIR, LANE, isPaired, RUN_BARCODE, PARSE_SIG2_FILES, BARCODE, BARCODE_CYCLE);
bread = bfp.next();
ffp = new FirecrestFileParser(DIR.getParentFile(), LANE);
fread = ffp.next();
int[][] base_counts = new int[4][bread.getFirstReadSequence().length()];
int bases_incorrect = 0, bases_total = 0;
if (debugout != null) { debugout.println("cycle int_a int_c int_g int_t bustard_base kiran_base bustard_prob kiran_prob kiran_base_prev"); }
queryid = 0;
do {
assert(bread.getXCoordinate() == fread.getXCoordinate() && bread.getYCoordinate() == fread.getYCoordinate());
String bases = (END <= 1) ? bread.getFirstReadSequence() : bread.getSecondReadSequence();
byte[] quals = (END <= 1) ? bread.getFirstReadPhredBinaryQualities() : bread.getSecondReadPhredBinaryQualities();
double[][] intensities = bread.getIntensities();
double[][] rawintensities = fread.getIntensities();
byte[] asciiseq = new byte[bases.length()];
byte[] bestqual = new byte[bases.length()];
byte[] nextbestqual = new byte[bases.length()];
for (int cycle = 0; cycle < bases.length(); cycle++) {
double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
char basePrev = (cycle == 0 ? '.' : (char) asciiseq[cycle - 1]);
byte qualPrev = (cycle == 0 ? 40 : bestqual[cycle - 1]);
FourProb fp = model.computeProbabilities(cycle, basePrev, qualPrev, fourintensity);
asciiseq[cycle] = (byte) fp.baseAtRank(0);
bestqual[cycle] = fp.qualAtRank(0);
nextbestqual[cycle] = QualityUtils.baseAndProbToCompressedQuality(fp.indexAtRank(1), fp.probAtRank(1));
if (debugout != null && bases.charAt(cycle) != '.' && base_counts[BaseUtils.simpleBaseToBaseIndex(bases.charAt(cycle))][cycle] < 1000) {
debugout.println(cycle + " " + fourintensity[0] + " " + fourintensity[1] + " " + fourintensity[2] + " " + fourintensity[3] + " " + (bases.charAt(cycle)) + " " + ((char) asciiseq[cycle]) + " " + bestqual[cycle] + " " + quals[cycle] + " " + basePrev);
base_counts[BaseUtils.simpleBaseToBaseIndex(bases.charAt(cycle))][cycle]++;
}
bases_incorrect += (bases.charAt(cycle) == (char) asciiseq[cycle]) ? 0 : 1;
bases_total++;
}
sfw.addAlignment(constructSAMRecord("KIR_", new String(asciiseq), bestqual, nextbestqual, bread, sfh));
sfw.addAlignment(constructSAMRecord("BUS_", bases, quals, null, bread, sfh));
queryid++;
} while (queryid < CALLING_LIMIT && bfp.hasNext() && (bread = bfp.next()) != null && (fread = ffp.next()) != null);
if (debugout != null) {
debugout.close();
}
sfw.close();
System.out.println("Disagreement rate: " + ((double) bases_incorrect)/((double) bases_total));
return 0;
}
private double[] getBaseProbabilityDistribution(char base, byte qual) {
double[] dist = new double[4];
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
dist[baseIndex] = QualityUtils.qualToProb(qual);
double residualprob = (1.0 - dist[baseIndex])/3.0;
for (int i = 0; i < 4; i++) {
if (i != baseIndex) {
dist[i] = residualprob;
}
}
return dist;
}
private SAMRecord constructSAMRecord(String readNamePrefix, String bases, byte[] bestqual, byte[] nextbestqual, BustardReadData bread, SAMFileHeader sfh) {
SAMRecord sr = new SAMRecord(sfh);
sr.setReadName(readNamePrefix + bread.getReadName());
sr.setReadUmappedFlag(true);
sr.setReadString(bases);
sr.setBaseQualities(bestqual);
if (nextbestqual != null) { sr.setAttribute("SQ", nextbestqual); }
sr.setReadFailsVendorQualityCheckFlag(!bread.isPf());
return sr;
}
}

View File

@ -1,28 +1,27 @@
package org.broadinstitute.sting.utils.fasta;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.PicardException;
import net.sf.picard.io.IoUtil;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.SAMTextHeaderCodec;
import net.sf.samtools.util.AsciiLineReader;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.ByteBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.Charset;
import java.nio.channels.FileChannel;
import java.nio.charset.CharacterCodingException;
import java.util.Scanner;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.Iterator;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMTextHeaderCodec;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.AsciiLineReader;
import org.broadinstitute.sting.utils.StingException;
import java.util.Scanner;
/**
* Created by IntelliJ IDEA.
@ -255,6 +254,11 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
return getSequence( indexIterator.next().getContig() );
}
@Override
public void reset() {
// TODO: FOR MATT TO IMPL.
}
public String toString() {
return this.file.getAbsolutePath();
}

View File

@ -29,9 +29,7 @@ import java.util.*;
* functionality.
*/
/**
* Base support for testing variants of the LocusView family of classes.
*/
/** Base support for testing variants of the LocusView family of classes. */
public abstract class LocusViewTemplate extends BaseTest {
protected static ReferenceSequenceFile sequenceSourceFile = null;
@ -46,200 +44,203 @@ public abstract class LocusViewTemplate extends BaseTest {
public void emptyAlignmentContextTest() {
SAMRecordIterator iterator = new SAMRecordIterator();
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1",1,5);
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(shardBounds);
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView( dataProvider );
LocusView view = createView(dataProvider);
testReadsInContext( view, shard.getGenomeLoc(), Collections.<SAMRecord>emptyList() );
testReadsInContext(view, shard.getGenomeLoc(), Collections.<SAMRecord>emptyList());
}
@Test
public void singleReadTest() {
SAMRecord read = buildSAMRecord("chr1",1,5);
SAMRecord read = buildSAMRecord("chr1", 1, 5);
SAMRecordIterator iterator = new SAMRecordIterator(read);
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1",1,5);
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(shardBounds);
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView( dataProvider );
LocusView view = createView(dataProvider);
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
}
@Test
public void readCoveringFirstPartTest() {
SAMRecord read = buildSAMRecord("chr1",1,5);
SAMRecord read = buildSAMRecord("chr1", 1, 5);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 1, 10));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
}
@Test
public void readCoveringLastPartTest() {
SAMRecord read = buildSAMRecord("chr1",6,10);
SAMRecord read = buildSAMRecord("chr1", 6, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 1, 10));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
}
@Test
public void readCoveringMiddleTest() {
SAMRecord read = buildSAMRecord("chr1",3,7);
SAMRecord read = buildSAMRecord("chr1", 3, 7);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 1, 10));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
}
@Test
public void readOverlappingStartTest() {
SAMRecord read = buildSAMRecord("chr1",1,10);
SAMRecord read = buildSAMRecord("chr1", 1, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",6,15));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 6, 15));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
}
@Test
public void readOverlappingEndTest() {
SAMRecord read = buildSAMRecord("chr1",6,15);
SAMRecord read = buildSAMRecord("chr1", 6, 15);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 1, 10));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
}
@Test
public void readsSpanningTest() {
SAMRecord read1 = buildSAMRecord("chr1",1,5);
SAMRecord read2 = buildSAMRecord("chr1",6,10);
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2);
SAMRecord read1 = buildSAMRecord("chr1", 1, 5);
SAMRecord read2 = buildSAMRecord("chr1", 6, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 1, 10));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads,read1,read2);
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
Collections.addAll(expectedReads, read1, read2);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
}
@Test
public void duplicateReadsTest() {
SAMRecord read1 = buildSAMRecord("chr1",1,5);
SAMRecord read2 = buildSAMRecord("chr1",1,5);
SAMRecord read3 = buildSAMRecord("chr1",6,10);
SAMRecord read4 = buildSAMRecord("chr1",6,10);
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4);
SAMRecord read1 = buildSAMRecord("chr1", 1, 5);
SAMRecord read2 = buildSAMRecord("chr1", 1, 5);
SAMRecord read3 = buildSAMRecord("chr1", 6, 10);
SAMRecord read4 = buildSAMRecord("chr1", 6, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 1, 10));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads,read1,read2,read3,read4);
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
Collections.addAll(expectedReads, read1, read2, read3, read4);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
}
@Test
public void cascadingReadsWithinBoundsTest() {
SAMRecord read1 = buildSAMRecord("chr1",2,6);
SAMRecord read2 = buildSAMRecord("chr1",3,7);
SAMRecord read3 = buildSAMRecord("chr1",4,8);
SAMRecord read4 = buildSAMRecord("chr1",5,9);
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4);
SAMRecord read1 = buildSAMRecord("chr1", 2, 6);
SAMRecord read2 = buildSAMRecord("chr1", 3, 7);
SAMRecord read3 = buildSAMRecord("chr1", 4, 8);
SAMRecord read4 = buildSAMRecord("chr1", 5, 9);
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 1, 10));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads,read1,read2,read3,read4);
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
Collections.addAll(expectedReads, read1, read2, read3, read4);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
}
@Test
public void cascadingReadsAtBoundsTest() {
SAMRecord read1 = buildSAMRecord("chr1",1,5);
SAMRecord read2 = buildSAMRecord("chr1",2,6);
SAMRecord read3 = buildSAMRecord("chr1",3,7);
SAMRecord read4 = buildSAMRecord("chr1",4,8);
SAMRecord read5 = buildSAMRecord("chr1",5,9);
SAMRecord read6 = buildSAMRecord("chr1",6,10);
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4,read5,read6);
SAMRecord read1 = buildSAMRecord("chr1", 1, 5);
SAMRecord read2 = buildSAMRecord("chr1", 2, 6);
SAMRecord read3 = buildSAMRecord("chr1", 3, 7);
SAMRecord read4 = buildSAMRecord("chr1", 4, 8);
SAMRecord read5 = buildSAMRecord("chr1", 5, 9);
SAMRecord read6 = buildSAMRecord("chr1", 6, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 1, 10));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads,read1,read2,read3,read4,read5,read6);
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
Collections.addAll(expectedReads, read1, read2, read3, read4, read5, read6);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
}
@Test
public void cascadingReadsOverlappingBoundsTest() {
SAMRecord read01 = buildSAMRecord("chr1",1,5);
SAMRecord read02 = buildSAMRecord("chr1",2,6);
SAMRecord read03 = buildSAMRecord("chr1",3,7);
SAMRecord read04 = buildSAMRecord("chr1",4,8);
SAMRecord read05 = buildSAMRecord("chr1",5,9);
SAMRecord read06 = buildSAMRecord("chr1",6,10);
SAMRecord read07 = buildSAMRecord("chr1",7,11);
SAMRecord read08 = buildSAMRecord("chr1",8,12);
SAMRecord read09 = buildSAMRecord("chr1",9,13);
SAMRecord read10 = buildSAMRecord("chr1",10,14);
SAMRecord read11 = buildSAMRecord("chr1",11,15);
SAMRecord read12 = buildSAMRecord("chr1",12,16);
SAMRecordIterator iterator = new SAMRecordIterator(read01,read02,read03,read04,read05,read06,
read07,read08,read09,read10,read11,read12);
SAMRecord read01 = buildSAMRecord("chr1", 1, 5);
SAMRecord read02 = buildSAMRecord("chr1", 2, 6);
SAMRecord read03 = buildSAMRecord("chr1", 3, 7);
SAMRecord read04 = buildSAMRecord("chr1", 4, 8);
SAMRecord read05 = buildSAMRecord("chr1", 5, 9);
SAMRecord read06 = buildSAMRecord("chr1", 6, 10);
SAMRecord read07 = buildSAMRecord("chr1", 7, 11);
SAMRecord read08 = buildSAMRecord("chr1", 8, 12);
SAMRecord read09 = buildSAMRecord("chr1", 9, 13);
SAMRecord read10 = buildSAMRecord("chr1", 10, 14);
SAMRecord read11 = buildSAMRecord("chr1", 11, 15);
SAMRecord read12 = buildSAMRecord("chr1", 12, 16);
SAMRecordIterator iterator = new SAMRecordIterator(read01, read02, read03, read04, read05, read06,
read07, read08, read09, read10, read11, read12);
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",6,15));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1", 6, 15));
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads,read01,read02,read03,read04,read05,read06,
read07,read08,read09,read10,read11,read12);
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
}
Collections.addAll(expectedReads, read01, read02, read03, read04, read05, read06,
read07, read08, read09, read10, read11, read12);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
}
/**
* Creates a view of the type required for testing.
*
* @return The correct view to test.
*/
protected abstract LocusView createView( ShardDataProvider provider );
protected abstract LocusView createView(ShardDataProvider provider);
/**
* Test the reads according to an independently derived context.
*
* @param view
* @param bounds
* @param reads
*/
protected abstract void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads );
protected abstract void testReadsInContext(LocusView view, GenomeLoc bounds, List<SAMRecord> reads);
/**
* Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data.
*
* @return A 'fake' reference sequence file
*/
private static ReferenceSequenceFile fakeReferenceSequenceFile() {
@ -249,18 +250,27 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Collections.singletonList(sequenceRecord));
return dictionary;
}
public ReferenceSequence nextSequence() { throw new UnsupportedOperationException("Fake implementation doesn't support a getter"); }
public ReferenceSequence nextSequence() {
throw new UnsupportedOperationException("Fake implementation doesn't support a getter");
}
public void reset() {
return; // TODO MATT FIX ME
}
};
}
/**
* Build a SAM record featuring the absolute minimum required dataset.
* @param contig Contig to populate.
*
* @param contig Contig to populate.
* @param alignmentStart start of alignment
* @param alignmentEnd end of alignment
* @param alignmentEnd end of alignment
*
* @return New SAM Record
*/
protected SAMRecord buildSAMRecord( String contig, int alignmentStart, int alignmentEnd ) {
protected SAMRecord buildSAMRecord(String contig, int alignmentStart, int alignmentEnd) {
SAMFileHeader header = new SAMFileHeader();
header.setSequenceDictionary(sequenceSourceFile.getSequenceDictionary());
@ -269,18 +279,16 @@ public abstract class LocusViewTemplate extends BaseTest {
record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig));
record.setAlignmentStart(alignmentStart);
Cigar cigar = new Cigar();
cigar.add(new CigarElement(alignmentEnd-alignmentStart+1, CigarOperator.M));
cigar.add(new CigarElement(alignmentEnd - alignmentStart + 1, CigarOperator.M));
record.setCigar(cigar);
return record;
}
/**
* A simple iterator which iterates over a list of reads.
*/
/** A simple iterator which iterates over a list of reads. */
protected class SAMRecordIterator implements StingSAMIterator {
private Iterator<SAMRecord> backingIterator = null;
public SAMRecordIterator( SAMRecord... reads ) {
public SAMRecordIterator(SAMRecord... reads) {
List<SAMRecord> backingList = new ArrayList<SAMRecord>();
backingList.addAll(Arrays.asList(reads));
backingIterator = backingList.iterator();

View File

@ -33,6 +33,12 @@ public class SSGenotypeCallTest extends BaseTest {
return null;
}
/** Method for clearing the cache (in case we change the priors) */
@Override
protected void clearCache() {
// do nothing
}
/**
* Must be overridden by concrete subclasses
*

View File

@ -9,21 +9,21 @@ public class IntervalCleanerIntegrationTest extends WalkerTest {
@Test
public void testIntervals() {
String[] md5lod5 = {"bfe1c76bf352b22f79c9b7242197a126", "4aa3637e86822c95af3e2c9b414530c3"};
String[] md5lod5 = {"54ba9ef0d90659475f7d694fb97b5f2b", "4aa3637e86822c95af3e2c9b414530c3"};
WalkerTestSpec spec1 = new WalkerTestSpec(
"-T IntervalCleaner -LOD 5 -maxConsensuses 100 -greedy 100 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chrom1.SLX.SRP000032.2009_06.bam -L /humgen/gsa-scr1/GATK_Data/Validation_Data/cleaner.test.intervals --OutputCleaned %s -snps %s",
2,
Arrays.asList(md5lod5));
executeTest("testLod5", spec1);
String[] md5lod200 = {"4481e4d24d61a3e438323b368ad0eee7", "e39aa718c9810364ebe30964d878d5ff"};
String[] md5lod200 = {"55097ed556f41c5d527d7381b8e858dd", "e39aa718c9810364ebe30964d878d5ff"};
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T IntervalCleaner -LOD 200 -maxConsensuses 100 -greedy 100 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chrom1.SLX.SRP000032.2009_06.bam -L /humgen/gsa-scr1/GATK_Data/Validation_Data/cleaner.test.intervals --OutputCleaned %s -snps %s",
2,
Arrays.asList(md5lod200));
executeTest("testLod200", spec2);
String[] md5cleanedOnly = {"c8da42cc2298e05ab4f2bc9e5d7445e1", "4aa3637e86822c95af3e2c9b414530c3"};
String[] md5cleanedOnly = {"1f844be094cd30c351586ebf0f8ff38c", "4aa3637e86822c95af3e2c9b414530c3"};
WalkerTestSpec spec3 = new WalkerTestSpec(
"-T IntervalCleaner -LOD 5 -cleanedOnly -maxConsensuses 100 -greedy 100 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chrom1.SLX.SRP000032.2009_06.bam -L /humgen/gsa-scr1/GATK_Data/Validation_Data/cleaner.test.intervals --OutputCleaned %s -snps %s",
2,

View File

@ -40,7 +40,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibrator1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "8d2194cf8134800def32d5268033de70" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "3ace4b9b8495429cc32e7008145f4996" );
//e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "" );
//e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "" );

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="edu.mit.broad" module="picard-private" revision="875" status="integration" publication="20090630104500" />
<info organisation="edu.mit.broad" module="picard-private" revision="1060" status="integration" publication="20090930104500" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.02.63" status="release" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.01.63" status="release" />
</ivy-module>