From 7fc4472e6df41e442f4e2fd9eec7075e3363163e Mon Sep 17 00:00:00 2001 From: aaron Date: Fri, 2 Oct 2009 19:35:35 +0000 Subject: [PATCH] A big fix for MergingSamRecordIterator, where we weren't correctly handling the comparisons of SAMRecords correctly (we weren't applying the new reference index first, so sometimes the MT contig would be ID 23, sometimes 24 in different records). Also a fix to the GLF tests, and a correction to PrintReadsWalker to remove the close() on the output source, the source handles that itself (and you get a double close). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1758 348d0f76-0448-11de-a6fe-93d51630548a --- .../iterators/MergingSamRecordIterator2.java | 61 +++++++++++-------- .../sting/gatk/refdata/RodGLF.java | 2 +- .../sting/gatk/walkers/PrintReadsWalker.java | 21 +------ .../sting/gatk/refdata/RodGLFTest.java | 30 ++++----- .../gatk/walkers/PrintReadsWalkerTest.java | 11 +--- .../utils/genotype/glf/GLFWriterTest.java | 2 +- 6 files changed, 51 insertions(+), 76 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java index 931da02c1..bac7f1c52 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -64,7 +64,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, * @param headerMerger the header to merge * @param reads the reads pile */ - public MergingSamRecordIterator2( final SamFileHeaderMerger headerMerger, Reads reads ) { + public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger, Reads reads) { this.samHeaderMerger = headerMerger; this.reads = reads; this.sortOrder = headerMerger.getMergedHeader().getSortOrder(); @@ -85,7 +85,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, for (final SAMFileReader reader : samHeaderMerger.getReaders()) { checkSortOrder(reader); - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, comparator); + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, comparator); addIfNotEmpty(iterator); } setInitialized(); @@ -97,13 +97,13 @@ public class MergingSamRecordIterator2 implements CloseableIterator, * * @param reader the reader to check */ - private void checkSortOrder( SAMFileReader reader ) { + private void checkSortOrder(SAMFileReader reader) { if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) { if (reads.getSafetyChecking()) { throw new PicardException("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() + " vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals " + reader.getFileHeader().getAttribute("SO") + " in this case)."); - } else if(!warnedUserAboutSortOrder) { + } else if (!warnedUserAboutSortOrder) { warnedUserAboutSortOrder = true; Utils.warnUser("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() + " vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals " @@ -117,7 +117,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, return true; } - public void queryOverlapping( final String contig, final int start, final int stop ) { + public void queryOverlapping(final String contig, final int start, final int stop) { if (initialized) { throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); } @@ -126,14 +126,14 @@ public class MergingSamRecordIterator2 implements CloseableIterator, for (final SAMFileReader reader : samHeaderMerger.getReaders()) { checkSortOrder(reader); Iterator recordIter = reader.queryOverlapping(contig, start, stop); - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator); + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator); addIfNotEmpty(iterator); } setInitialized(); } - public void query( final String contig, final int start, final int stop, final boolean contained ) { + public void query(final String contig, final int start, final int stop, final boolean contained) { if (initialized) { throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); } @@ -141,7 +141,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, for (final SAMFileReader reader : samHeaderMerger.getReaders()) { checkSortOrder(reader); Iterator recordIter = reader.query(contig, start, stop, contained); - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator); + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator); addIfNotEmpty(iterator); } setInitialized(); @@ -155,17 +155,16 @@ public class MergingSamRecordIterator2 implements CloseableIterator, final SAMRecordComparator comparator = getComparator(); for (final SAMFileReader reader : samHeaderMerger.getReaders()) { Iterator recordIter = null; - if( reader.hasIndex() ) { + if (reader.hasIndex()) { recordIter = reader.queryUnmapped(); - } - else { + } else { // HACK: Supporting completely unmapped BAM files is easy. Let's do a quick check to make sure // these BAMs aren't partially indexed. - if( reader.getFileHeader().getSequenceDictionary().size() > 0 ) + if (reader.getFileHeader().getSequenceDictionary().size() > 0) throw new StingException("Partially mapped BAM files without indices are not supported"); recordIter = reader.iterator(); } - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator); + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator); addIfNotEmpty(iterator); } setInitialized(); @@ -173,7 +172,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, } - public void queryContained( final String contig, final int start, final int stop ) { + public void queryContained(final String contig, final int start, final int stop) { if (initialized) { throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); } @@ -181,7 +180,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, for (final SAMFileReader reader : samHeaderMerger.getReaders()) { checkSortOrder(reader); Iterator recordIter = reader.queryContained(contig, start, stop); - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator); + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator); addIfNotEmpty(iterator); } setInitialized(); @@ -261,13 +260,9 @@ public class MergingSamRecordIterator2 implements CloseableIterator, } record.setHeader(samHeaderMerger.getMergedHeader()); - if (this.samHeaderMerger.hasMergedSequenceDictionary() && record.getReferenceIndex() >= 0) { - record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader(), record.getReferenceIndex())); - } //System.out.printf("NEXT = %s %s %d%n", record.getReadName(), record.getReferenceName(), record.getAlignmentStart()); //System.out.printf("PEEK = %s %s %d%n", this.pq.peek().peek().getReadName(), this.pq.peek().peek().getReferenceName(), this.pq.peek().peek().getAlignmentStart()); - return record; } @@ -277,7 +272,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, * * @param iterator the iterator to add */ - protected void addIfNotEmpty( final ComparableSamRecordIterator iterator ) { + protected void addIfNotEmpty(final ComparableSamRecordIterator iterator) { //System.out.printf("Adding %s %s %d%n", iterator.peek().getReadName(), iterator.peek().getReferenceName(), iterator.peek().getAlignmentStart()); if (iterator.hasNext()) { pq.offer(iterator); @@ -302,11 +297,11 @@ public class MergingSamRecordIterator2 implements CloseableIterator, // For unsorted build a fake comparator that compares based on object ID if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) { return new SAMRecordComparator() { - public int fileOrderCompare( final SAMRecord lhs, final SAMRecord rhs ) { + public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) { return System.identityHashCode(lhs) - System.identityHashCode(rhs); } - public int compare( final SAMRecord lhs, final SAMRecord rhs ) { + public int compare(final SAMRecord lhs, final SAMRecord rhs) { return fileOrderCompare(lhs, rhs); } }; @@ -376,6 +371,7 @@ class ComparableSamRecordIterator extends PeekableIterator implements private Reads sourceInfo; private final Comparator comparator; private final SAMFileReader reader; + private final SamFileHeaderMerger mHeaderMerger; /** * Constructs an iterator for iteration over the supplied SAM file that will be @@ -385,16 +381,18 @@ class ComparableSamRecordIterator extends PeekableIterator implements * @param sam the SAM file to read records from * @param comparator the Comparator to use to provide ordering fo SAMRecords */ - public ComparableSamRecordIterator( final SAMFileReader sam, final Comparator comparator ) { + public ComparableSamRecordIterator(SamFileHeaderMerger samHeaderMerger, final SAMFileReader sam, final Comparator comparator) { super(sam.iterator()); this.reader = sam; this.comparator = comparator; + mHeaderMerger = samHeaderMerger; } - public ComparableSamRecordIterator( final SAMFileReader sam, Iterator iterator, final Comparator comparator ) { + public ComparableSamRecordIterator(SamFileHeaderMerger samHeaderMerger, final SAMFileReader sam, Iterator iterator, final Comparator comparator) { super(iterator); // use the provided iterator this.reader = sam; this.comparator = comparator; + mHeaderMerger = samHeaderMerger; } public Reads getSourceInfo() { @@ -421,7 +419,7 @@ class ComparableSamRecordIterator extends PeekableIterator implements * * @return a negative, 0 or positive number as described in the Comparator interface */ - public int compareTo( final ComparableSamRecordIterator that ) { + public int compareTo(final ComparableSamRecordIterator that) { if (this.comparator.getClass() != that.comparator.getClass()) { throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " + "have different orderings internally"); @@ -429,7 +427,18 @@ class ComparableSamRecordIterator extends PeekableIterator implements final SAMRecord record = this.peek(); final SAMRecord record2 = that.peek(); - //System.out.printf("Comparing %s vs. %s => %d%n", record.getReadName(), record2.getReadName(), comparator.compare(record, record2)); + record.setHeader(mHeaderMerger.getMergedHeader()); + record2.setHeader(mHeaderMerger.getMergedHeader()); + int index, index2; + try { + index = mHeaderMerger.getMergedHeader().getSequenceIndex(record.getReferenceName()); + record.setReferenceIndex(index); + + index2 = mHeaderMerger.getMergedHeader().getSequenceIndex(record2.getReferenceName()); + record2.setReferenceIndex(index2); + } catch (Exception e) { + throw new StingException("MergingSamRecordIterator2: unable to correct the reference index for read " + record.getReadName() + " or record " + record2.getReadName(),e); + } return comparator.compare(record, record2); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java index 86264d792..88b08253f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java @@ -260,7 +260,7 @@ public class RodGLF implements VariationRod, Iterator { */ @Override public String getAlternateBases() { - return this.getBestGenotype(0).toString(); + return this.getBestGenotype(1).toString(); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index e27fe6f45..a656fc175 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -1,16 +1,9 @@ package org.broadinstitute.sting.gatk.walkers; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.Utils; - -import java.io.PrintStream; -import java.io.FileNotFoundException; -import java.io.File; -import java.util.Random; /* * Copyright (c) 2009 The Broad Institute @@ -121,14 +114,4 @@ public class PrintReadsWalker extends ReadWalker { return output; } - - /** - * when we're done traversing, close the reads file - * @param output the SAMFileWriter we've used in the reduce phase - */ - public void onTraversalDone( SAMFileWriter output ) { - if (output != null) { - output.close(); - } - } } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java index 96beac7b3..61bdc09de 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java @@ -3,9 +3,7 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequenceFileFactory; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.junit.Assert; import static org.junit.Assert.assertEquals; import org.junit.Before; @@ -13,7 +11,6 @@ import org.junit.BeforeClass; import org.junit.Test; import java.io.File; -import java.util.ArrayList; /** * Created by IntelliJ IDEA. @@ -31,9 +28,9 @@ import java.util.ArrayList; */ public class RodGLFTest extends BaseTest { static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/glfTestFile.glf"); - static final int finalRecordCount = 3; // the number of records in the above file + static final int finalRecordCount = 100; // the number of records in the above file static final int contigCount = 1; - static final String ref = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"; + static final String ref = "/broad/1KG/reference/human_b36_both.fasta"; static ReferenceSequenceFile r; private RodGLF iter = null; @@ -79,11 +76,11 @@ public class RodGLFTest extends BaseTest { glf = iter.next(); Assert.assertFalse(iter.isReference()); } - /* + @Test(expected = IllegalStateException.class) public void testGetAltSnpFWDIllegalException() { RodGLF glf = iter.next(); - iter.getAltSnpFWD(); + iter.getAlternativeBaseForSNP(); } @@ -105,23 +102,18 @@ public class RodGLFTest extends BaseTest { public void testGetAltSnpFWD() { RodGLF glf = iter.next(); glf = iter.next(); - Assert.assertEquals('T', iter.getAltSnpFWD()); + Assert.assertEquals('C', iter.getAlternativeBaseForSNP()); } @Test public void testGetRefSnpFWD() { RodGLF glf = iter.next(); glf = iter.next(); - Assert.assertEquals('A', iter.getRefSnpFWD()); + glf = iter.next(); + Assert.assertEquals('A', iter.getReferenceForSNP()); } - @Test - public void testGetRefBasesFWD() { - RodGLF glf = iter.next(); - Assert.assertTrue("A".equals(iter.getRefBasesFWD())); - glf = iter.next(); - Assert.assertTrue("A".equals(iter.getRefBasesFWD())); - } + /** * move to the second and third bases, and check that the @@ -136,7 +128,7 @@ public class RodGLFTest extends BaseTest { Assert.assertTrue("CT".equals(iter.getAltBasesFWD())); } -*/ + @Test public void testRodLocations() { GenomeLoc loc = null; @@ -172,7 +164,7 @@ public class RodGLFTest extends BaseTest { * @param ref the reference base * * @return the likelihood object - */ + * private LikelihoodObject createLikelihood(char ref) { ArrayList vals = new ArrayList(); for (LikelihoodObject.GENOTYPE type : LikelihoodObject.GENOTYPE.values()) { @@ -185,7 +177,7 @@ public class RodGLFTest extends BaseTest { ret[x] = vals.get(x); } return new LikelihoodObject(ret, LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG); - } + }*/ /** diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java index 152fabb3f..44c6eaac3 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java @@ -104,14 +104,5 @@ public class PrintReadsWalkerTest extends BaseTest { assertTrue(writer.getRecords().size() == 1); } - /** test that we close the output source */ - @Test - public void testClosingOnTraversalDone() { - ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - assertTrue(!writer.isClosed()); - PrintReadsWalker walker = new PrintReadsWalker(); - walker.onTraversalDone(writer); - assertTrue(writer.isClosed()); - - } + } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java index 62d4c0200..cdee2b4d7 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java @@ -110,7 +110,7 @@ public class GLFWriterTest extends BaseTest { @Test public void basicWriteThenRead() { File writeTo = new File("testGLF2.glf"); - //writeTo.deleteOnExit(); + writeTo.deleteOnExit(); List types = new ArrayList(); rec = new GLFWriter(header, writeTo); for (int x = 0; x < 100; x++) {