-Added walker to dump strand test results to file

-Refactored strand filter to handle calls from the walker


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1229 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-07-14 01:56:50 +00:00
parent bfe90af5e2
commit a1d33f8791
2 changed files with 195 additions and 44 deletions

View File

@ -0,0 +1,135 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.io.*;
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData={@RMD(name="hapmap",type=HapMapGenotypeROD.class),@RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="variant",type=rodVariants.class)})
public class StrandTestPrinter extends LocusWalker<Integer, Integer> {
@Argument(fullName="concordance_out", shortName="CO", doc="File to which concordant variant stats should be written", required=true)
File CONCORDANCE_OUT = null;
@Argument(fullName="sample_name", shortName="sample", doc="Sample name (e.g. NA12878)", required=true)
String SAMPLE_NAME = null;
private PrintWriter writer = null;
/**
* Prepare the output file and the list of available features.
*/
public void initialize() {
try {
writer = new PrintWriter(CONCORDANCE_OUT);
} catch (FileNotFoundException e) {
throw new StingException(String.format("Could not open file for writing"));
}
writer.println("location\tvariant\tlodBestVsRef\tStrandScore\tForwardCount\tReverseCount\tInHapmap\tIndbSNP\tOurCall\tHapMapCall");
}
/**
* Initialize the number of loci processed to zero.
*
* @return 0
*/
public Integer reduceInit() { return 0; }
/**
* For each site of interest, rescore the genotype likelihoods by applying the specified feature set.
*
* @param tracker the meta-data tracker
* @param ref the reference base
* @param context the context for the given locus
* @return 1 if the locus is a het site, 0 if otherwise
*/
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
HapMapGenotypeROD hapmap = (HapMapGenotypeROD)tracker.lookup("hapmap", null);
rodDbSNP dbsnp = (rodDbSNP)tracker.lookup("dbsnp", null);
rodVariants variant = (rodVariants)tracker.lookup("variant", null);
StringBuffer sb = new StringBuffer();
char refUpper = Character.toUpperCase(ref);
boolean realHetVariant = false, realVariant = false, calledHetVariant = false;
if ( hapmap != null ) {
String genotype = hapmap.get(SAMPLE_NAME).toUpperCase();
if ( genotype.charAt(0) != refUpper || genotype.charAt(1) != refUpper ) {
realVariant = true;
if ( genotype.charAt(0) != genotype.charAt(1) ) {
realHetVariant = true;
sb.append(context.getLocation() + "\t" + genotype + "\t");
}
}
}
if ( variant != null ) {
String genotype = variant.getBestGenotype().toUpperCase();
if ( (genotype.charAt(0) != refUpper || genotype.charAt(1) != refUpper) &&
(genotype.charAt(0) != genotype.charAt(1)) ) {
calledHetVariant = true;
if ( !realHetVariant )
sb.append(context.getLocation() + "\t" + genotype + "\t");
}
}
if ( !realHetVariant && !calledHetVariant )
return 0;
if ( variant != null )
sb.append(variant.getLodBtr() + "\t");
else
sb.append("-1\t");
int allele1, allele2;
if ( hapmap != null ) {
allele1 = BaseUtils.simpleBaseToBaseIndex(hapmap.get(SAMPLE_NAME).charAt(0));
allele2 = BaseUtils.simpleBaseToBaseIndex(hapmap.get(SAMPLE_NAME).charAt(1));
} else {
allele1 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(0));
allele2 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(1));
}
VECFisherStrand.strandTest(ref, context, allele1, allele2, -1, sb);
sb.append((hapmap != null ? "1" : "0") + "\t" + (dbsnp != null ? "1" : "0") + "\t");
if ( variant == null)
sb.append("REF\t");
else if ( calledHetVariant )
sb.append("HET\t");
else
sb.append("HOMNONREF\t");
if ( realHetVariant )
sb.append("HET\n");
else if ( realVariant )
sb.append("HOMNONREF\n");
else
sb.append("REF\n");
writer.print(sb.toString());
return 0;
}
/**
* Increment the number of loci processed.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return the new number of loci processed.
*/
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
/**
* Tell the user the number of hets processed and close out the files.
*
* @param result the number of variants seen.
*/
public void onTraversalDone(Integer result) {
out.printf("Processed %d hets.\n", result);
writer.close();
}
}

View File

@ -3,7 +3,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import net.sf.samtools.SAMRecord;
import java.util.List;
@ -11,8 +10,13 @@ import java.util.List;
import cern.jet.math.Arithmetic;
public class VECFisherStrand implements VariantExclusionCriterion {
private double pvalueLimit = 0.0001;
public void initialize(String arguments) {
//if (arguments != null && !arguments.isEmpty()) {}
if (arguments != null && !arguments.isEmpty()) {
pvalueLimit = Double.valueOf(arguments);
}
}
public boolean exclude(char ref, LocusContext context, rodVariants variant) {
@ -20,48 +24,62 @@ public class VECFisherStrand implements VariantExclusionCriterion {
int allele2 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(1));
if (allele1 != allele2) {
int[][] table = getContingencyTable(context, variant);
double pCutoff = computePValue(table);
//printTable(table, pCutoff);
double pValue = 0.0;
while (rotateTable(table)) {
double pValuePiece = computePValue(table);
//printTable(table, pValuePiece);
if (pValuePiece <= pCutoff) {
pValue += pValuePiece;
}
}
table = getContingencyTable(context, variant);
while (unrotateTable(table)) {
double pValuePiece = computePValue(table);
//printTable(table, pValuePiece);
if (pValuePiece <= pCutoff) {
pValue += pValuePiece;
}
}
//System.out.printf("P-cutoff: %f\n", pCutoff);
//System.out.printf("P-value: %f\n\n", pValue);
return pValue < 0.05;
return strandTest(ref, context, allele1, allele2, pvalueLimit, null);
}
return false;
}
public static boolean strandTest(char ref, LocusContext context, int allele1, int allele2, double threshold, StringBuffer out) {
int[][] table = getContingencyTable(context, allele1, allele2);
double pCutoff = computePValue(table);
//printTable(table, pCutoff);
double pValue = 0.0;
while (rotateTable(table)) {
double pValuePiece = computePValue(table);
//printTable(table, pValuePiece);
if (pValuePiece <= pCutoff) {
pValue += pValuePiece;
}
}
table = getContingencyTable(context, allele1, allele2);
while (unrotateTable(table)) {
double pValuePiece = computePValue(table);
//printTable(table, pValuePiece);
if (pValuePiece <= pCutoff) {
pValue += pValuePiece;
}
}
//System.out.printf("P-cutoff: %f\n", pCutoff);
//System.out.printf("P-value: %f\n\n", pValue);
// optionally print out the pvalue and the alternate allele counts
if ( out != null ) {
int refBase = BaseUtils.simpleBaseToBaseIndex(ref);
table = getContingencyTable(context, allele1, allele2);
if ( allele1 != refBase )
out.append(pValue + "\t" + table[0][0] + "\t" + table[0][1] + "\t");
else
out.append(pValue + "\t" + table[1][0] + "\t" + table[1][1] + "\t");
}
return pValue < threshold;
}
private void printTable(int[][] table, double pValue) {
System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue);
}
private boolean rotateTable(int[][] table) {
private static boolean rotateTable(int[][] table) {
table[0][0] -= 1;
table[1][0] += 1;
@ -71,7 +89,7 @@ public class VECFisherStrand implements VariantExclusionCriterion {
return (table[0][0] >= 0 && table[1][1] >= 0) ? true : false;
}
private boolean unrotateTable(int[][] table) {
private static boolean unrotateTable(int[][] table) {
table[0][0] += 1;
table[1][0] -= 1;
@ -81,7 +99,7 @@ public class VECFisherStrand implements VariantExclusionCriterion {
return (table[0][1] >= 0 && table[1][0] >= 0) ? true : false;
}
private double computePValue(int[][] table) {
private static double computePValue(int[][] table) {
double pCutoff = 1.0;
int[] rowSums = { sumRow(table, 0), sumRow(table, 1) };
@ -102,7 +120,7 @@ public class VECFisherStrand implements VariantExclusionCriterion {
return pCutoff;
}
private int sumRow(int[][] table, int column) {
private static int sumRow(int[][] table, int column) {
int sum = 0;
for (int r = 0; r < table.length; r++) {
sum += table[r][column];
@ -111,7 +129,7 @@ public class VECFisherStrand implements VariantExclusionCriterion {
return sum;
}
private int sumColumn(int[][] table, int row) {
private static int sumColumn(int[][] table, int row) {
int sum = 0;
for (int c = 0; c < table[row].length; c++) {
sum += table[row][c];
@ -126,16 +144,14 @@ public class VECFisherStrand implements VariantExclusionCriterion {
* allele1 # #
* allele2 # #
* @param context the context for the locus
* @param variant information for the called variant
* @param allele1 information for the called variant
* @param allele2 information for the called variant
* @return a 2x2 contingency table
*/
private int[][] getContingencyTable(LocusContext context, rodVariants variant) {
private static int[][] getContingencyTable(LocusContext context, int allele1, int allele2) {
int[][] table = new int[2][2];
int allele1 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(0));
int allele2 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(1));
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();