BatchMerge - AlleleVCF --> AllelesVCF, this (combined with Eric's fix) will solve James P.'s forum issue.

After viewing results on real case/control data from RAW -- it's really working quite well. ReadIndels, however, needs to use a T-test rather than a U-test, especially in deep coverage (at indel sites, the reads with indels will have mostly the same number of CIGAR indel elements -- one -- which doesn't really play nicely with the UTest when sample sets are large). Modified ReadsLargeInsertSize to be a two-way test (e.g. ReadsLarge and ReadsSmall). BaseQualityScore also suffers from the same issue as read indels, so switching over to a T-test in that case as well.




git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5653 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-04-15 22:03:16 +00:00
parent 1c32deb108
commit e5ef8388fc
7 changed files with 44 additions and 7 deletions

View File

@ -56,6 +56,7 @@ public class RAWSampleStats extends LocusWalker<MapHolder,RegionalAssociationHan
if ( ! first ) {
contextBuf.append(";");
} else {
contextBuf.append("\t");
first = false;
}
contextBuf.append(entry.getKey().getId());

View File

@ -25,4 +25,5 @@ public class BaseQualityScore extends ValueTest {
}
public boolean usePreviouslySeenReads() { return true; }
public boolean useTStatistic() { return true; }
}

View File

@ -52,6 +52,7 @@ public class MismatchRate extends ValueTest {
Object stats = sampleStats.get(sample.getId());
double mn;
double std;
double mmr = calcMMR(pileup);
if ( stats instanceof Pair ) {
mn = ((Pair<Double,Double>)stats).first;
std = ((Pair<Double,Double>)stats).second;
@ -62,10 +63,10 @@ public class MismatchRate extends ValueTest {
if ( std <= 0.0 ) {
std = 1.0;
}
ra.add(pileup.size());
ra.add(mmr);
}
return Arrays.asList((Number) ((calcMMR(pileup) - mn) / std));
return Arrays.asList((Number) ((mmr - mn) / std));
}
public double calcMMR(ReadBackedPileup rbp) {

View File

@ -32,4 +32,5 @@ public class ReadIndels extends ValueTest {
}
public boolean usePreviouslySeenReads() { return false; }
public boolean useTStatistic() { return true; }
}

View File

@ -13,9 +13,10 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
* Time: 3:31 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadsLargeInsertSize extends ProportionTest {
public class ReadsAberrantInsertSize extends ProportionTest {
private int THRESHOLD;
private int HI_THRESHOLD;
private int LO_THRESHOLD;
@Override
public Pair<Number,Number> map(ReadBackedPileup rbp) {
@ -24,7 +25,7 @@ public class ReadsLargeInsertSize extends ProportionTest {
for (PileupElement e : rbp ) {
if ( e.getRead().getReadPairedFlag() ) {
++total;
if ( e.getRead().getInferredInsertSize() > THRESHOLD ) {
if ( e.getRead().getInferredInsertSize() > HI_THRESHOLD || e.getRead().getInferredInsertSize() < LO_THRESHOLD ) {
++wonky;
}
}
@ -35,7 +36,8 @@ public class ReadsLargeInsertSize extends ProportionTest {
public void init(RegionalAssociationWalker parent) {
super.init(parent);
THRESHOLD = 150;
HI_THRESHOLD = 200;
LO_THRESHOLD = 50;
}
public boolean usePreviouslySeenReads() { return false; }

View File

@ -0,0 +1,31 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 4/15/11
* Time: 12:54 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadsWithIndels extends ProportionTest {
public Pair<Number,Number> map(ReadBackedPileup pileup) {
int numReads = 0;
int numWithIndels = 0;
for ( PileupElement e : pileup ) {
++numReads;
if ( e.getRead().getCigar().getCigarElements().contains(CigarOperator.DELETION) || e.getRead().getCigar().getCigarElements().contains(CigarOperator.INSERTION)) {
++numWithIndels;
}
}
return new Pair<Number,Number>(numWithIndels,numReads);
}
public boolean usePreviouslySeenReads() { return false; }
}

View File

@ -53,7 +53,7 @@ class batchMergePipeline extends QScript {
this.baq = BAQ.CalculationMode.CALCULATE_AS_NECESSARY
}
this.intervals :+= extractIntervals.listOut
this.alleleVCF = combineVCFs.outVCF
this.allelesVCF = combineVCFs.outVCF
this.jarFile = new File(stingDir+"/dist/GenomeAnalysisTK.jar")
this.memoryLimit = 4
this.scatterCount = 60