switching the concordance walker over to the new Variation system

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1735 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-28 15:46:36 +00:00
parent bce2f0d7cf
commit 39598f1f0a
4 changed files with 35 additions and 33 deletions

View File

@ -13,11 +13,11 @@ import java.util.*;
* CallsetConcordanceWalker finds the concordance between multiple callsets (different tests are available).
*/
@Requires(value={DataSource.REFERENCE},
referenceMetaData={@RMD(name="callset1",type=AllelicVariant.class),
@RMD(name="callset2",type=AllelicVariant.class)})
referenceMetaData={@RMD(name="callset1",type=VariationRod.class),
@RMD(name="callset2",type=VariationRod.class)})
@Reference(window=@Window(start=-20,stop=20))
public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
@Argument(fullName="concordance_output_path", shortName="O", doc="File path to which split sets should be written", required=false)
@Argument(fullName="concordance_output_path", shortName="O", doc="File path to which split sets should be written", required=true)
private String OUTPUT_PATH = null;
@Argument(fullName="concordanceType", shortName="CT", doc="Concordance subset types to apply to given callsets. Syntax: 'type[:key1=arg1,key2=arg2,...]'", required=false)
private String[] TYPES = null;
@ -52,10 +52,6 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
out.println("\t" + types.next());
System.exit(0);
}
if ( OUTPUT_PATH == null )
throw new StingException("--concordance_output_path must be specified");
requestedTypes = new ArrayList<ConcordanceType>();
// initialize requested concordance types

View File

@ -1,12 +1,13 @@
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.PrintWriter;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.HashMap;
/**
@ -54,8 +55,8 @@ public class IndelSubsets implements ConcordanceType {
}
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
AllelicVariant indel1 = (AllelicVariant)tracker.lookup("callset1", null);
AllelicVariant indel2 = (AllelicVariant)tracker.lookup("callset2", null);
Variation indel1 = (Variation)tracker.lookup("callset1", null);
Variation indel2 = (Variation)tracker.lookup("callset2", null);
int set1 = ( indel1 != null && indel1.isIndel() ? 0 : 1 );
int set2 = ( indel2 != null && indel2.isIndel() ? 0 : 1 );
@ -65,21 +66,21 @@ public class IndelSubsets implements ConcordanceType {
return;
// only deal with a valid indel
AllelicVariant indel = ( indel1 != null ? indel1 : indel2 );
Variation indel = ( indel1 != null ? indel1 : indel2 );
int size = ( indel.length() <= sizeCutoff ? 0 : 1 );
int size = ( indel.getAlternateBases().length() <= sizeCutoff ? 0 : 1 );
int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 );
writers[set1][set2][size][homopol].println(indel.toString());
}
private int homopolymerRunSize(ReferenceContext ref, AllelicVariant indel) {
private int homopolymerRunSize(ReferenceContext ref, Variation indel) {
char[] bases = ref.getBases();
GenomeLoc window = ref.getWindow();
GenomeLoc locus = ref.getLocus();
int refBasePos = (int)(locus.getStart() - window.getStart());
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAltBasesFWD().charAt(0);
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAlternateBases().charAt(0);
int leftRun = 0;
for ( int i = refBasePos; i >= 0; i--) {
if ( bases[i] != indelBase )
@ -87,9 +88,9 @@ public class IndelSubsets implements ConcordanceType {
leftRun++;
}
indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.length(),bases.length-1)] : indel.getAltBasesFWD().charAt(indel.getAltBasesFWD().length()-1);
indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateBases().length(),bases.length-1)] : indel.getAlternateBases().charAt(indel.getAlternateBases().length()-1);
int rightRun = 0;
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.length() : 1); i < bases.length; i++) {
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateBases().length() : 1); i < bases.length; i++) {
if ( bases[i] != indelBase )
break;
rightRun++;

View File

@ -1,12 +1,14 @@
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.HashMap;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.HashMap;
/**
* Split up two call sets into their various concordance sets
@ -50,24 +52,26 @@ public class SNPGenotypeConcordance implements ConcordanceType {
}
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
AllelicVariant call1 = (AllelicVariant)tracker.lookup("callset1", null);
AllelicVariant call2 = (AllelicVariant)tracker.lookup("callset2", null);
Variation call1 = (Variation)tracker.lookup("callset1", null);
Variation call2 = (Variation)tracker.lookup("callset2", null);
// the only reason they would be null is a lack of coverage
if ( call1 == null || call2 == null ) {
if ( call1 != null && call1.isSNP() && call1.getVariationConfidence() >= LOD )
if ( call1 != null && call1.isSNP() && call1.getNegLog10PError() >= LOD )
printVariant(coverageVar1Writer, call1);
else if ( call2 != null && call2.isSNP() && call2.getVariationConfidence() >= LOD )
else if ( call2 != null && call2.isSNP() && call2.getNegLog10PError() >= LOD )
printVariant(coverageVar2Writer, call2);
return;
}
if (!(call1 instanceof VariantBackedByGenotype) || !(call2 instanceof VariantBackedByGenotype))
throw new StingException("Both parents ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information");
double bestVsRef1 = call1.getVariationConfidence();
double bestVsRef2 = call2.getVariationConfidence();
double bestVsRef1 = call1.getNegLog10PError();
double bestVsRef2 = call2.getNegLog10PError();
//double bestVsNext1 = call1.getConsensusConfidence();
//double bestVsNext2 = call2.getConsensusConfidence();
String genotype1 = call1.getGenotype().get(0);
String genotype2 = call2.getGenotype().get(0);
String genotype1 = ((VariantBackedByGenotype)call1).getCalledGenotype().getBases();
String genotype2 = ((VariantBackedByGenotype)call2).getCalledGenotype().getBases();
// are they both variant SNPs?
if ( call1.isSNP() && call2.isSNP() ) {
@ -113,7 +117,7 @@ public class SNPGenotypeConcordance implements ConcordanceType {
return altAllele1 == altAllele2;
}
private static void printVariant(PrintWriter writer, AllelicVariant variant) {
private static void printVariant(PrintWriter writer, Variation variant) {
writer.println(variant.toString());
}

View File

@ -1,8 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
@ -37,8 +38,8 @@ public class SimpleVenn implements ConcordanceType {
}
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
AllelicVariant call1 = (AllelicVariant)tracker.lookup("callset1", null);
AllelicVariant call2 = (AllelicVariant)tracker.lookup("callset2", null);
Variation call1 = (Variation)tracker.lookup("callset1", null);
Variation call2 = (Variation)tracker.lookup("callset2", null);
if ( call1 == null && call2 == null )
return;
@ -55,7 +56,7 @@ public class SimpleVenn implements ConcordanceType {
printVariant(set2_writer, call2);
// intersection (concordant)
else if ( call1.getAltBasesFWD().equalsIgnoreCase(call2.getAltBasesFWD()) )
else if ( call1.getAlternativeBaseForSNP() == call2.getAlternativeBaseForSNP())
printVariant(intersect_writer, call1);
// intersection (discordant)
@ -63,7 +64,7 @@ public class SimpleVenn implements ConcordanceType {
printVariant(discord_writer, call1);
}
private static void printVariant(PrintWriter writer, AllelicVariant variant) {
private static void printVariant(PrintWriter writer, Variation variant) {
writer.println(variant.toString());
}