Merge pull request #428 from broadinstitute/eb_various_HC_bug_fixes

Eb various hc bug fixes
This commit is contained in:
Ryan Poplin 2013-11-25 06:36:31 -08:00
commit abf70dc071
6 changed files with 105 additions and 53 deletions

View File

@ -296,9 +296,9 @@ public class ReferenceConfidenceModel {
if( hqSoftClips != null && p.isNextToSoftClip() ) {
hqSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
}
result.AD_Ref_Any[1]++;
result.AD_Ref_Any[1] += p.getRepresentativeCount();
} else {
result.AD_Ref_Any[0]++;
result.AD_Ref_Any[0] += p.getRepresentativeCount();
}
result.genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
result.genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + MathUtils.LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD + MathUtils.LOG_ONE_HALF );
@ -483,7 +483,7 @@ public class ReferenceConfidenceModel {
// todo -- this code really should handle CIGARs directly instead of relying on the above tests
if ( isReadInformativeAboutIndelsOfSize(read.getReadBases(), read.getBaseQualities(), offset, ref, pileupOffsetIntoRef, maxIndelSize) ) {
nInformative++;
nInformative += p.getRepresentativeCount();
if( nInformative > MAX_N_INDEL_INFORMATIVE_READS ) {
return MAX_N_INDEL_INFORMATIVE_READS;
}

View File

@ -95,10 +95,13 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
*/
public boolean isReferenceNode( final V v ) {
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
for( final BaseEdge e : edgesOf(v) ) {
if( e.isRef() ) { return true; }
for ( final BaseEdge e : edgesOf(v) ) {
if ( e.isRef() ) { return true; }
}
return false;
// edge case: if the graph only has one node then it's a ref node, otherwise it's not
return (vertexSet().size() == 1);
}
/**
@ -154,62 +157,46 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
return v.getAdditionalSequence(isSource(v));
}
/**
* @param e the edge to test
* @return true if this edge is a reference source edge
*/
public boolean isRefSource( final E e ) {
if( e == null ) { throw new IllegalArgumentException("Attempting to test a null edge."); }
for( final E edgeToTest : incomingEdgesOf(getEdgeSource(e)) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* @param v the vertex to test
* @return true if this vertex is a reference source
*/
public boolean isRefSource( final V v ) {
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
for( final E edgeToTest : incomingEdgesOf(v) ) {
if( edgeToTest.isRef() ) { return false; }
// confirm that no incoming edges are reference edges
for ( final E edgeToTest : incomingEdgesOf(v) ) {
if ( edgeToTest.isRef() ) { return false; }
}
return true;
// confirm that there is an outgoing reference edge
for ( final E edgeToTest : outgoingEdgesOf(v) ) {
if ( edgeToTest.isRef() ) { return true; }
}
// edge case: if the graph only has one node then it's a ref sink, otherwise it's not
return (vertexSet().size() == 1);
}
/**
* @param e the edge to test
* @return true if this edge is a reference sink edge
*/
public boolean isRefSink( final E e ) {
if( e == null ) { throw new IllegalArgumentException("Attempting to test a null edge."); }
for( final E edgeToTest : outgoingEdgesOf(getEdgeTarget(e)) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* // TODO -- the logic of this test is just wrong
* @param v the vertex to test
* @return true if this vertex is a reference sink
*/
public boolean isRefSink( final V v ) {
if( v == null ) { throw new IllegalArgumentException("Attempting to test a null vertex."); }
for( final E edgeToTest : outgoingEdgesOf(v) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* Is this both a refsink node and a reference node
* @param v a non-null vertex
* @return true if v is both a sink and a reference node
*/
public boolean isRefNodeAndRefSink(final V v) {
return isRefSink(v) && isReferenceNode(v);
// confirm that no outgoing edges are reference edges
for ( final E edgeToTest : outgoingEdgesOf(v) ) {
if ( edgeToTest.isRef() ) { return false; }
}
// confirm that there is an incoming reference edge
for ( final E edgeToTest : incomingEdgesOf(v) ) {
if ( edgeToTest.isRef() ) { return true; }
}
// edge case: if the graph only has one node then it's a ref source, otherwise it's not
return (vertexSet().size() == 1);
}
/**
@ -217,7 +204,7 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
*/
public V getReferenceSourceVertex( ) {
for( final V v : vertexSet() ) {
if( isReferenceNode(v) && isRefSource(v) ) {
if( isRefSource(v) ) {
return v;
}
}
@ -229,7 +216,7 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
*/
public V getReferenceSinkVertex( ) {
for( final V v : vertexSet() ) {
if( isReferenceNode(v) && isRefSink(v) ) {
if( isRefSink(v) ) {
return v;
}
}
@ -490,7 +477,7 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
// Run through the graph and clean up singular orphaned nodes
final List<V> verticesToRemove = new LinkedList<>();
for( final V v : vertexSet() ) {
if( inDegreeOf(v) == 0 && outDegreeOf(v) == 0 ) {
if( inDegreeOf(v) == 0 && outDegreeOf(v) == 0 && !isRefSource(v) ) {
verticesToRemove.add(v);
}
}

View File

@ -487,7 +487,7 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
int attempted = 0;
int nRecovered = 0;
for ( final MultiDeBruijnVertex v : vertexSet() ) {
if ( outDegreeOf(v) == 0 && ! isRefNodeAndRefSink(v) ) {
if ( outDegreeOf(v) == 0 && ! isRefSink(v) ) {
attempted++;
nRecovered += recoverDanglingChain(v, pruneFactor);
}

View File

@ -94,6 +94,6 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"8e6a2002c59eafb78bdbf1db9660164b");
"f74d68cbc1ecb66a7128258e111cd030");
}
}

View File

@ -47,7 +47,6 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import net.sf.samtools.SAMFileHeader;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -164,6 +163,26 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
}
}
@Test
public void testCalcNIndelInformativeReducedReads() {
final String bases = "ACGGGTTTGGAC";
final byte[] quals = Utils.dupBytes((byte)30, bases.length());
final int count = 10;
final int[] counts = new int[bases.length()];
for ( int i = 0; i < counts.length; i++ )
counts[i] = count;
final int position = 100;
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialReducedRead(header, "foo", 0, position, counts.length, counts);
read.setReadString(bases);
read.setBaseQualities(quals);
read.setCigarString(bases.length() + "M");
final GenomeLoc loc = new UnvalidatingGenomeLoc("20", 0, position, position);
final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, Collections.singletonList(read), 0);
final int actual = model.calcNIndelInformativeReads(pileup, 0, bases.getBytes(), 3);
Assert.assertEquals(actual, count);
}
@Test
public void testClose() {
model.close();

View File

@ -95,7 +95,7 @@ public class BaseGraphUnitTest extends BaseTest {
}
@Test
public void testRemoveSingletonOrphanVertices() throws Exception {
public void testRemoveSingletonOrphanVertices() throws Exception {
// all vertices in graph are connected
final List<SeqVertex> kept = new LinkedList<SeqVertex>(graph.vertexSet());
final SeqVertex rm1 = new SeqVertex("CAGT");
@ -116,6 +116,52 @@ public class BaseGraphUnitTest extends BaseTest {
Assert.assertFalse(graph.containsVertex(rm2));
}
@Test
public void testRemoveSingletonOrphanVerticesOnSingleRefNode() throws Exception {
final SeqGraph original = new SeqGraph();
original.addVertex(v1);
original.removeSingletonOrphanVertices();
Assert.assertTrue(original.containsVertex(v1));
Assert.assertEquals(original.vertexSet().size(), 1);
}
@Test
public void testIsRefSourceAndSink() throws Exception {
final SeqGraph g = new SeqGraph();
g.addVertex(v1);
Assert.assertTrue(g.isRefSource(v1));
Assert.assertTrue(g.isRefSink(v1));
Assert.assertTrue(g.isReferenceNode(v1));
g.addVertices(v2, v3, v4, v5);
g.addEdge(v1, v2);
g.addEdge(v2, v3);
final BaseEdge refEdge = new BaseEdge(true, 1);
g.addEdge(v3, v4, refEdge);
g.addEdge(v4, v5);
Assert.assertFalse(g.isRefSource(v1));
Assert.assertFalse(g.isRefSink(v1));
Assert.assertFalse(g.isReferenceNode(v1));
Assert.assertFalse(g.isRefSource(v2));
Assert.assertFalse(g.isRefSink(v2));
Assert.assertFalse(g.isReferenceNode(v2));
Assert.assertTrue(g.isRefSource(v3));
Assert.assertFalse(g.isRefSink(v3));
Assert.assertTrue(g.isReferenceNode(v3));
Assert.assertFalse(g.isRefSource(v4));
Assert.assertTrue(g.isRefSink(v4));
Assert.assertTrue(g.isReferenceNode(v4));
Assert.assertFalse(g.isRefSource(v5));
Assert.assertFalse(g.isRefSink(v5));
Assert.assertFalse(g.isReferenceNode(v5));
}
@Test
public void testRemovePathsNotConnectedToRef() throws Exception {
final SeqGraph graph = new SeqGraph();