Merge pull request #428 from broadinstitute/eb_various_HC_bug_fixes

Eb various hc bug fixes
This commit is contained in:
Ryan Poplin 2013-11-25 06:36:31 -08:00
commit abf70dc071
6 changed files with 105 additions and 53 deletions

View File

@ -296,9 +296,9 @@ public class ReferenceConfidenceModel {
if( hqSoftClips != null && p.isNextToSoftClip() ) { if( hqSoftClips != null && p.isNextToSoftClip() ) {
hqSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28)); hqSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
} }
result.AD_Ref_Any[1]++; result.AD_Ref_Any[1] += p.getRepresentativeCount();
} else { } else {
result.AD_Ref_Any[0]++; result.AD_Ref_Any[0] += p.getRepresentativeCount();
} }
result.genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual); result.genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
result.genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + MathUtils.LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD + MathUtils.LOG_ONE_HALF ); result.genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + MathUtils.LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD + MathUtils.LOG_ONE_HALF );
@ -483,7 +483,7 @@ public class ReferenceConfidenceModel {
// todo -- this code really should handle CIGARs directly instead of relying on the above tests // todo -- this code really should handle CIGARs directly instead of relying on the above tests
if ( isReadInformativeAboutIndelsOfSize(read.getReadBases(), read.getBaseQualities(), offset, ref, pileupOffsetIntoRef, maxIndelSize) ) { if ( isReadInformativeAboutIndelsOfSize(read.getReadBases(), read.getBaseQualities(), offset, ref, pileupOffsetIntoRef, maxIndelSize) ) {
nInformative++; nInformative += p.getRepresentativeCount();
if( nInformative > MAX_N_INDEL_INFORMATIVE_READS ) { if( nInformative > MAX_N_INDEL_INFORMATIVE_READS ) {
return MAX_N_INDEL_INFORMATIVE_READS; return MAX_N_INDEL_INFORMATIVE_READS;
} }

View File

@ -95,10 +95,13 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
*/ */
public boolean isReferenceNode( final V v ) { public boolean isReferenceNode( final V v ) {
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); } if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
for( final BaseEdge e : edgesOf(v) ) {
if( e.isRef() ) { return true; } for ( final BaseEdge e : edgesOf(v) ) {
if ( e.isRef() ) { return true; }
} }
return false;
// edge case: if the graph only has one node then it's a ref node, otherwise it's not
return (vertexSet().size() == 1);
} }
/** /**
@ -154,62 +157,46 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
return v.getAdditionalSequence(isSource(v)); return v.getAdditionalSequence(isSource(v));
} }
/**
* @param e the edge to test
* @return true if this edge is a reference source edge
*/
public boolean isRefSource( final E e ) {
if( e == null ) { throw new IllegalArgumentException("Attempting to test a null edge."); }
for( final E edgeToTest : incomingEdgesOf(getEdgeSource(e)) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/** /**
* @param v the vertex to test * @param v the vertex to test
* @return true if this vertex is a reference source * @return true if this vertex is a reference source
*/ */
public boolean isRefSource( final V v ) { public boolean isRefSource( final V v ) {
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); } if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
for( final E edgeToTest : incomingEdgesOf(v) ) {
if( edgeToTest.isRef() ) { return false; } // confirm that no incoming edges are reference edges
for ( final E edgeToTest : incomingEdgesOf(v) ) {
if ( edgeToTest.isRef() ) { return false; }
} }
return true;
// confirm that there is an outgoing reference edge
for ( final E edgeToTest : outgoingEdgesOf(v) ) {
if ( edgeToTest.isRef() ) { return true; }
}
// edge case: if the graph only has one node then it's a ref sink, otherwise it's not
return (vertexSet().size() == 1);
} }
/** /**
* @param e the edge to test
* @return true if this edge is a reference sink edge
*/
public boolean isRefSink( final E e ) {
if( e == null ) { throw new IllegalArgumentException("Attempting to test a null edge."); }
for( final E edgeToTest : outgoingEdgesOf(getEdgeTarget(e)) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* // TODO -- the logic of this test is just wrong
* @param v the vertex to test * @param v the vertex to test
* @return true if this vertex is a reference sink * @return true if this vertex is a reference sink
*/ */
public boolean isRefSink( final V v ) { public boolean isRefSink( final V v ) {
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); } if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
for( final E edgeToTest : outgoingEdgesOf(v) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/** // confirm that no outgoing edges are reference edges
* Is this both a refsink node and a reference node for ( final E edgeToTest : outgoingEdgesOf(v) ) {
* @param v a non-null vertex if ( edgeToTest.isRef() ) { return false; }
* @return true if v is both a sink and a reference node }
*/
public boolean isRefNodeAndRefSink(final V v) { // confirm that there is an incoming reference edge
return isRefSink(v) && isReferenceNode(v); for ( final E edgeToTest : incomingEdgesOf(v) ) {
if ( edgeToTest.isRef() ) { return true; }
}
// edge case: if the graph only has one node then it's a ref source, otherwise it's not
return (vertexSet().size() == 1);
} }
/** /**
@ -217,7 +204,7 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
*/ */
public V getReferenceSourceVertex( ) { public V getReferenceSourceVertex( ) {
for( final V v : vertexSet() ) { for( final V v : vertexSet() ) {
if( isReferenceNode(v) && isRefSource(v) ) { if( isRefSource(v) ) {
return v; return v;
} }
} }
@ -229,7 +216,7 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
*/ */
public V getReferenceSinkVertex( ) { public V getReferenceSinkVertex( ) {
for( final V v : vertexSet() ) { for( final V v : vertexSet() ) {
if( isReferenceNode(v) && isRefSink(v) ) { if( isRefSink(v) ) {
return v; return v;
} }
} }
@ -490,7 +477,7 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
// Run through the graph and clean up singular orphaned nodes // Run through the graph and clean up singular orphaned nodes
final List<V> verticesToRemove = new LinkedList<>(); final List<V> verticesToRemove = new LinkedList<>();
for( final V v : vertexSet() ) { for( final V v : vertexSet() ) {
if( inDegreeOf(v) == 0 && outDegreeOf(v) == 0 ) { if( inDegreeOf(v) == 0 && outDegreeOf(v) == 0 && !isRefSource(v) ) {
verticesToRemove.add(v); verticesToRemove.add(v);
} }
} }

View File

@ -487,7 +487,7 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
int attempted = 0; int attempted = 0;
int nRecovered = 0; int nRecovered = 0;
for ( final MultiDeBruijnVertex v : vertexSet() ) { for ( final MultiDeBruijnVertex v : vertexSet() ) {
if ( outDegreeOf(v) == 0 && ! isRefNodeAndRefSink(v) ) { if ( outDegreeOf(v) == 0 && ! isRefSink(v) ) {
attempted++; attempted++;
nRecovered += recoverDanglingChain(v, pruneFactor); nRecovered += recoverDanglingChain(v, pruneFactor);
} }

View File

@ -94,6 +94,6 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"8e6a2002c59eafb78bdbf1db9660164b"); "f74d68cbc1ecb66a7128258e111cd030");
} }
} }

View File

@ -47,7 +47,6 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller; package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -164,6 +163,26 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
} }
} }
@Test
public void testCalcNIndelInformativeReducedReads() {
final String bases = "ACGGGTTTGGAC";
final byte[] quals = Utils.dupBytes((byte)30, bases.length());
final int count = 10;
final int[] counts = new int[bases.length()];
for ( int i = 0; i < counts.length; i++ )
counts[i] = count;
final int position = 100;
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialReducedRead(header, "foo", 0, position, counts.length, counts);
read.setReadString(bases);
read.setBaseQualities(quals);
read.setCigarString(bases.length() + "M");
final GenomeLoc loc = new UnvalidatingGenomeLoc("20", 0, position, position);
final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, Collections.singletonList(read), 0);
final int actual = model.calcNIndelInformativeReads(pileup, 0, bases.getBytes(), 3);
Assert.assertEquals(actual, count);
}
@Test @Test
public void testClose() { public void testClose() {
model.close(); model.close();

View File

@ -95,7 +95,7 @@ public class BaseGraphUnitTest extends BaseTest {
} }
@Test @Test
public void testRemoveSingletonOrphanVertices() throws Exception { public void testRemoveSingletonOrphanVertices() throws Exception {
// all vertices in graph are connected // all vertices in graph are connected
final List<SeqVertex> kept = new LinkedList<SeqVertex>(graph.vertexSet()); final List<SeqVertex> kept = new LinkedList<SeqVertex>(graph.vertexSet());
final SeqVertex rm1 = new SeqVertex("CAGT"); final SeqVertex rm1 = new SeqVertex("CAGT");
@ -116,6 +116,52 @@ public class BaseGraphUnitTest extends BaseTest {
Assert.assertFalse(graph.containsVertex(rm2)); Assert.assertFalse(graph.containsVertex(rm2));
} }
@Test
public void testRemoveSingletonOrphanVerticesOnSingleRefNode() throws Exception {
final SeqGraph original = new SeqGraph();
original.addVertex(v1);
original.removeSingletonOrphanVertices();
Assert.assertTrue(original.containsVertex(v1));
Assert.assertEquals(original.vertexSet().size(), 1);
}
@Test
public void testIsRefSourceAndSink() throws Exception {
final SeqGraph g = new SeqGraph();
g.addVertex(v1);
Assert.assertTrue(g.isRefSource(v1));
Assert.assertTrue(g.isRefSink(v1));
Assert.assertTrue(g.isReferenceNode(v1));
g.addVertices(v2, v3, v4, v5);
g.addEdge(v1, v2);
g.addEdge(v2, v3);
final BaseEdge refEdge = new BaseEdge(true, 1);
g.addEdge(v3, v4, refEdge);
g.addEdge(v4, v5);
Assert.assertFalse(g.isRefSource(v1));
Assert.assertFalse(g.isRefSink(v1));
Assert.assertFalse(g.isReferenceNode(v1));
Assert.assertFalse(g.isRefSource(v2));
Assert.assertFalse(g.isRefSink(v2));
Assert.assertFalse(g.isReferenceNode(v2));
Assert.assertTrue(g.isRefSource(v3));
Assert.assertFalse(g.isRefSink(v3));
Assert.assertTrue(g.isReferenceNode(v3));
Assert.assertFalse(g.isRefSource(v4));
Assert.assertTrue(g.isRefSink(v4));
Assert.assertTrue(g.isReferenceNode(v4));
Assert.assertFalse(g.isRefSource(v5));
Assert.assertFalse(g.isRefSink(v5));
Assert.assertFalse(g.isReferenceNode(v5));
}
@Test @Test
public void testRemovePathsNotConnectedToRef() throws Exception { public void testRemovePathsNotConnectedToRef() throws Exception {
final SeqGraph graph = new SeqGraph(); final SeqGraph graph = new SeqGraph();