We no longer calculate the population-level TDT statistic if there are fewer than 5 trios with full genotype likelihood information. When there is a high degree of missingness the results are skewed or in the worst case come out as NaN.

This commit is contained in:
Ryan Poplin 2012-01-18 09:42:41 -05:00
parent ff2fc514ae
commit 11982b5a34
2 changed files with 7 additions and 5 deletions

View File

@ -147,13 +147,13 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
ActiveRegion bestRegion = activeRegion;
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap(readLoc);
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( readLoc );
bestRegion = otherRegionToTest;
}
}
bestRegion.add( (GATKSAMRecord) read, true );
// The read is also added to all other region in which it overlaps but marked as non-primary
// The read is also added to all other regions in which it overlaps but marked as non-primary
if( !bestRegion.equals(activeRegion) ) {
activeRegion.add( (GATKSAMRecord) read, false );
}

View File

@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -18,7 +17,7 @@ import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* User: rpoplin, lfran
* Date: 11/14/11
*/
@ -28,6 +27,7 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
private final static int REF = 0;
private final static int HET = 1;
private final static int HOM = 2;
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( trios == null ) {
@ -50,7 +50,9 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
}
}
toRet.put("TDT", calculateTDT( vc, triosToTest ));
if( triosToTest.size() >= MIN_NUM_VALID_TRIOS ) {
toRet.put("TDT", calculateTDT( vc, triosToTest ));
}
return toRet;
}