From 39598f1f0a416c17db48a88714440772f6a2b780 Mon Sep 17 00:00:00 2001 From: aaron Date: Mon, 28 Sep 2009 15:46:36 +0000 Subject: [PATCH] switching the concordance walker over to the new Variation system git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1735 348d0f76-0448-11de-a6fe-93d51630548a --- .../concordance/CallsetConcordanceWalker.java | 10 +++---- .../walkers/concordance/IndelSubsets.java | 21 ++++++++------- .../concordance/SNPGenotypeConcordance.java | 26 +++++++++++-------- .../gatk/walkers/concordance/SimpleVenn.java | 11 ++++---- 4 files changed, 35 insertions(+), 33 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java index 201bb870a..ee1bff412 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java @@ -13,11 +13,11 @@ import java.util.*; * CallsetConcordanceWalker finds the concordance between multiple callsets (different tests are available). */ @Requires(value={DataSource.REFERENCE}, - referenceMetaData={@RMD(name="callset1",type=AllelicVariant.class), - @RMD(name="callset2",type=AllelicVariant.class)}) + referenceMetaData={@RMD(name="callset1",type=VariationRod.class), + @RMD(name="callset2",type=VariationRod.class)}) @Reference(window=@Window(start=-20,stop=20)) public class CallsetConcordanceWalker extends RefWalker { - @Argument(fullName="concordance_output_path", shortName="O", doc="File path to which split sets should be written", required=false) + @Argument(fullName="concordance_output_path", shortName="O", doc="File path to which split sets should be written", required=true) private String OUTPUT_PATH = null; @Argument(fullName="concordanceType", shortName="CT", doc="Concordance subset types to apply to given callsets. Syntax: 'type[:key1=arg1,key2=arg2,...]'", required=false) private String[] TYPES = null; @@ -52,10 +52,6 @@ public class CallsetConcordanceWalker extends RefWalker { out.println("\t" + types.next()); System.exit(0); } - - if ( OUTPUT_PATH == null ) - throw new StingException("--concordance_output_path must be specified"); - requestedTypes = new ArrayList(); // initialize requested concordance types diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/IndelSubsets.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/IndelSubsets.java index 4fb619df3..563dea3c4 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/IndelSubsets.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/IndelSubsets.java @@ -1,12 +1,13 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.Variation; -import java.io.PrintWriter; import java.io.FileNotFoundException; +import java.io.PrintWriter; import java.util.HashMap; /** @@ -54,8 +55,8 @@ public class IndelSubsets implements ConcordanceType { } public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) { - AllelicVariant indel1 = (AllelicVariant)tracker.lookup("callset1", null); - AllelicVariant indel2 = (AllelicVariant)tracker.lookup("callset2", null); + Variation indel1 = (Variation)tracker.lookup("callset1", null); + Variation indel2 = (Variation)tracker.lookup("callset2", null); int set1 = ( indel1 != null && indel1.isIndel() ? 0 : 1 ); int set2 = ( indel2 != null && indel2.isIndel() ? 0 : 1 ); @@ -65,21 +66,21 @@ public class IndelSubsets implements ConcordanceType { return; // only deal with a valid indel - AllelicVariant indel = ( indel1 != null ? indel1 : indel2 ); + Variation indel = ( indel1 != null ? indel1 : indel2 ); - int size = ( indel.length() <= sizeCutoff ? 0 : 1 ); + int size = ( indel.getAlternateBases().length() <= sizeCutoff ? 0 : 1 ); int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 ); writers[set1][set2][size][homopol].println(indel.toString()); } - private int homopolymerRunSize(ReferenceContext ref, AllelicVariant indel) { + private int homopolymerRunSize(ReferenceContext ref, Variation indel) { char[] bases = ref.getBases(); GenomeLoc window = ref.getWindow(); GenomeLoc locus = ref.getLocus(); int refBasePos = (int)(locus.getStart() - window.getStart()); - char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAltBasesFWD().charAt(0); + char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAlternateBases().charAt(0); int leftRun = 0; for ( int i = refBasePos; i >= 0; i--) { if ( bases[i] != indelBase ) @@ -87,9 +88,9 @@ public class IndelSubsets implements ConcordanceType { leftRun++; } - indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.length(),bases.length-1)] : indel.getAltBasesFWD().charAt(indel.getAltBasesFWD().length()-1); + indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateBases().length(),bases.length-1)] : indel.getAlternateBases().charAt(indel.getAlternateBases().length()-1); int rightRun = 0; - for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.length() : 1); i < bases.length; i++) { + for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateBases().length() : 1); i < bases.length; i++) { if ( bases[i] != indelBase ) break; rightRun++; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java index 13da5b6c8..7f42b6fb0 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java @@ -1,12 +1,14 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.HashMap; import java.io.FileNotFoundException; import java.io.PrintWriter; +import java.util.HashMap; /** * Split up two call sets into their various concordance sets @@ -50,24 +52,26 @@ public class SNPGenotypeConcordance implements ConcordanceType { } public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) { - AllelicVariant call1 = (AllelicVariant)tracker.lookup("callset1", null); - AllelicVariant call2 = (AllelicVariant)tracker.lookup("callset2", null); + Variation call1 = (Variation)tracker.lookup("callset1", null); + Variation call2 = (Variation)tracker.lookup("callset2", null); // the only reason they would be null is a lack of coverage if ( call1 == null || call2 == null ) { - if ( call1 != null && call1.isSNP() && call1.getVariationConfidence() >= LOD ) + if ( call1 != null && call1.isSNP() && call1.getNegLog10PError() >= LOD ) printVariant(coverageVar1Writer, call1); - else if ( call2 != null && call2.isSNP() && call2.getVariationConfidence() >= LOD ) + else if ( call2 != null && call2.isSNP() && call2.getNegLog10PError() >= LOD ) printVariant(coverageVar2Writer, call2); return; } + if (!(call1 instanceof VariantBackedByGenotype) || !(call2 instanceof VariantBackedByGenotype)) + throw new StingException("Both parents ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information"); - double bestVsRef1 = call1.getVariationConfidence(); - double bestVsRef2 = call2.getVariationConfidence(); + double bestVsRef1 = call1.getNegLog10PError(); + double bestVsRef2 = call2.getNegLog10PError(); //double bestVsNext1 = call1.getConsensusConfidence(); //double bestVsNext2 = call2.getConsensusConfidence(); - String genotype1 = call1.getGenotype().get(0); - String genotype2 = call2.getGenotype().get(0); + String genotype1 = ((VariantBackedByGenotype)call1).getCalledGenotype().getBases(); + String genotype2 = ((VariantBackedByGenotype)call2).getCalledGenotype().getBases(); // are they both variant SNPs? if ( call1.isSNP() && call2.isSNP() ) { @@ -113,7 +117,7 @@ public class SNPGenotypeConcordance implements ConcordanceType { return altAllele1 == altAllele2; } - private static void printVariant(PrintWriter writer, AllelicVariant variant) { + private static void printVariant(PrintWriter writer, Variation variant) { writer.println(variant.toString()); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SimpleVenn.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SimpleVenn.java index f454e1e52..08677b943 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SimpleVenn.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SimpleVenn.java @@ -1,8 +1,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.Variation; import java.io.FileNotFoundException; import java.io.PrintWriter; @@ -37,8 +38,8 @@ public class SimpleVenn implements ConcordanceType { } public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) { - AllelicVariant call1 = (AllelicVariant)tracker.lookup("callset1", null); - AllelicVariant call2 = (AllelicVariant)tracker.lookup("callset2", null); + Variation call1 = (Variation)tracker.lookup("callset1", null); + Variation call2 = (Variation)tracker.lookup("callset2", null); if ( call1 == null && call2 == null ) return; @@ -55,7 +56,7 @@ public class SimpleVenn implements ConcordanceType { printVariant(set2_writer, call2); // intersection (concordant) - else if ( call1.getAltBasesFWD().equalsIgnoreCase(call2.getAltBasesFWD()) ) + else if ( call1.getAlternativeBaseForSNP() == call2.getAlternativeBaseForSNP()) printVariant(intersect_writer, call1); // intersection (discordant) @@ -63,7 +64,7 @@ public class SimpleVenn implements ConcordanceType { printVariant(discord_writer, call1); } - private static void printVariant(PrintWriter writer, AllelicVariant variant) { + private static void printVariant(PrintWriter writer, Variation variant) { writer.println(variant.toString()); }