From e5ef8388fc494d6553e167ec8aec3fecdfffa18f Mon Sep 17 00:00:00 2001 From: chartl Date: Fri, 15 Apr 2011 22:03:16 +0000 Subject: [PATCH] BatchMerge - AlleleVCF --> AllelesVCF, this (combined with Eric's fix) will solve James P.'s forum issue. After viewing results on real case/control data from RAW -- it's really working quite well. ReadIndels, however, needs to use a T-test rather than a U-test, especially in deep coverage (at indel sites, the reads with indels will have mostly the same number of CIGAR indel elements -- one -- which doesn't really play nicely with the UTest when sample sets are large). Modified ReadsLargeInsertSize to be a two-way test (e.g. ReadsLarge and ReadsSmall). BaseQualityScore also suffers from the same issue as read indels, so switching over to a T-test in that case as well. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5653 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/association/RAWSampleStats.java | 1 + .../modules/casecontrol/BaseQualityScore.java | 1 + .../modules/casecontrol/MismatchRate.java | 5 +-- .../modules/casecontrol/ReadIndels.java | 1 + ...Size.java => ReadsAberrantInsertSize.java} | 10 +++--- .../modules/casecontrol/ReadsWithIndels.java | 31 +++++++++++++++++++ scala/qscript/playground/BatchMerge.q | 2 +- 7 files changed, 44 insertions(+), 7 deletions(-) rename java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/{ReadsLargeInsertSize.java => ReadsAberrantInsertSize.java} (78%) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsWithIndels.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RAWSampleStats.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RAWSampleStats.java index 82dff0e8b..3d16bd1dd 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RAWSampleStats.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RAWSampleStats.java @@ -56,6 +56,7 @@ public class RAWSampleStats extends LocusWalker)stats).first; std = ((Pair)stats).second; @@ -62,10 +63,10 @@ public class MismatchRate extends ValueTest { if ( std <= 0.0 ) { std = 1.0; } - ra.add(pileup.size()); + ra.add(mmr); } - return Arrays.asList((Number) ((calcMMR(pileup) - mn) / std)); + return Arrays.asList((Number) ((mmr - mn) / std)); } public double calcMMR(ReadBackedPileup rbp) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java index db85acad0..15704ac23 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java @@ -32,4 +32,5 @@ public class ReadIndels extends ValueTest { } public boolean usePreviouslySeenReads() { return false; } + public boolean useTStatistic() { return true; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsLargeInsertSize.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsAberrantInsertSize.java similarity index 78% rename from java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsLargeInsertSize.java rename to java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsAberrantInsertSize.java index bcd281d6b..9095e93d6 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsLargeInsertSize.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsAberrantInsertSize.java @@ -13,9 +13,10 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; * Time: 3:31 PM * To change this template use File | Settings | File Templates. */ -public class ReadsLargeInsertSize extends ProportionTest { +public class ReadsAberrantInsertSize extends ProportionTest { - private int THRESHOLD; + private int HI_THRESHOLD; + private int LO_THRESHOLD; @Override public Pair map(ReadBackedPileup rbp) { @@ -24,7 +25,7 @@ public class ReadsLargeInsertSize extends ProportionTest { for (PileupElement e : rbp ) { if ( e.getRead().getReadPairedFlag() ) { ++total; - if ( e.getRead().getInferredInsertSize() > THRESHOLD ) { + if ( e.getRead().getInferredInsertSize() > HI_THRESHOLD || e.getRead().getInferredInsertSize() < LO_THRESHOLD ) { ++wonky; } } @@ -35,7 +36,8 @@ public class ReadsLargeInsertSize extends ProportionTest { public void init(RegionalAssociationWalker parent) { super.init(parent); - THRESHOLD = 150; + HI_THRESHOLD = 200; + LO_THRESHOLD = 50; } public boolean usePreviouslySeenReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsWithIndels.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsWithIndels.java new file mode 100755 index 000000000..d207886a3 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsWithIndels.java @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; + +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 4/15/11 + * Time: 12:54 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadsWithIndels extends ProportionTest { + + public Pair map(ReadBackedPileup pileup) { + int numReads = 0; + int numWithIndels = 0; + for ( PileupElement e : pileup ) { + ++numReads; + if ( e.getRead().getCigar().getCigarElements().contains(CigarOperator.DELETION) || e.getRead().getCigar().getCigarElements().contains(CigarOperator.INSERTION)) { + ++numWithIndels; + } + } + + return new Pair(numWithIndels,numReads); + } + + public boolean usePreviouslySeenReads() { return false; } +} diff --git a/scala/qscript/playground/BatchMerge.q b/scala/qscript/playground/BatchMerge.q index a2d6bcd08..fa4d9ee3f 100755 --- a/scala/qscript/playground/BatchMerge.q +++ b/scala/qscript/playground/BatchMerge.q @@ -53,7 +53,7 @@ class batchMergePipeline extends QScript { this.baq = BAQ.CalculationMode.CALCULATE_AS_NECESSARY } this.intervals :+= extractIntervals.listOut - this.alleleVCF = combineVCFs.outVCF + this.allelesVCF = combineVCFs.outVCF this.jarFile = new File(stingDir+"/dist/GenomeAnalysisTK.jar") this.memoryLimit = 4 this.scatterCount = 60