diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java index 76c25fdce..9a4547cba 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java @@ -4,28 +4,38 @@ import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.cmdLine.Argument; +import java.io.File; import java.util.*; /** - * CallsetConcordanceWalker finds the concordance between multiple callsets (different tests are available). + * CallsetConcordanceWalker finds the concordance between multiple VCF callsets (different tests are available). */ -@Requires(value={DataSource.REFERENCE}, - referenceMetaData={@RMD(name="callset1",type=VariationRod.class), - @RMD(name="callset2",type=VariationRod.class)}) +@Requires(value={DataSource.REFERENCE}) @Reference(window=@Window(start=-20,stop=20)) -public class CallsetConcordanceWalker extends RefWalker { - @Argument(fullName="concordance_output_path", shortName="O", doc="File path to which split sets should be written", required=true) - private String OUTPUT_PATH = null; +public class CallsetConcordanceWalker extends RodWalker { + @Argument(fullName="concordance_output", shortName="CO", doc="VCF file to which output should be written", required=true) + private File OUTPUT = null; @Argument(fullName="concordanceType", shortName="CT", doc="Concordance subset types to apply to given callsets. Syntax: 'type[:key1=arg1,key2=arg2,...]'", required=false) private String[] TYPES = null; @Argument(fullName="list", shortName="ls", doc="List the available concordance types and exit", required=false) private Boolean LIST_ONLY = false; + + // the concordance tests to run private ArrayList requestedTypes; + // VCF writer for the output of the concordance tests + private VCFWriter vcfWriter; + + // a map of rod name to uniquified sample name + HashMap, String> rodNamesToSampleNames = new HashMap, String>(); + + /** * Prepare the output file and the list of available features. */ @@ -35,7 +45,7 @@ public class CallsetConcordanceWalker extends RefWalker { List> classes = PackageUtils.getClassesImplementingInterface(ConcordanceType.class); // print and exit if that's what was requested - if (LIST_ONLY) { + if ( LIST_ONLY ) { out.println("\nAvailable concordance types:"); for (int i = 0; i < classes.size(); i++) out.println("\t" + classes.get(i).getSimpleName()); @@ -43,9 +53,26 @@ public class CallsetConcordanceWalker extends RefWalker { System.exit(0); } - requestedTypes = new ArrayList(); + // get the list of all sample names from the various input rods (the need to be uniquified in case there's overlap) + HashSet samples = new HashSet(); + VCFUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, rodNamesToSampleNames); + + for ( java.util.Map.Entry, String> entry : rodNamesToSampleNames.entrySet() ) { + logger.debug("Uniquified sample mapping: " + entry.getKey().first + "/" + entry.getKey().second + " -> " + entry.getValue()); + } + + // set up the header fields + Map hInfo = new HashMap(); + hInfo.put("format", VCFWriter.VERSION); + hInfo.put("source", "CallsetConcordance"); + hInfo.put("reference", getToolkit().getArguments().referenceFile.getName()); + hInfo.put("explanation", "This file represents a concordance test of various call sets - NOT the output from a multi-sample caller"); + VCFHeader header = new VCFHeader(hInfo, samples); + + vcfWriter = new VCFWriter(header, OUTPUT); // initialize requested concordance types + requestedTypes = new ArrayList(); if (TYPES != null) { for ( String requestedTypeString : TYPES ) { String[] requestedPieces = requestedTypeString.split(":"); @@ -68,7 +95,7 @@ public class CallsetConcordanceWalker extends RefWalker { } } - concordance.initialize(OUTPUT_PATH, requestedArgs); + concordance.initialize(requestedArgs, samples); requestedTypes.add(concordance); break; } catch (InstantiationException e) { @@ -83,25 +110,67 @@ public class CallsetConcordanceWalker extends RefWalker { throw new StingException("The requested concordance type (" + requestedType + ") isn't a valid concordance option"); } } - } + } - public Integer reduceInit() { return 0; } + public Integer map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { + if ( rodData == null ) // RodWalkers can make funky map calls + return 0; - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( ConcordanceType type : requestedTypes ) - type.computeConcordance(tracker, ref); + // get all of the vcf rods at this locus + ArrayList vcfRods = new ArrayList(); + Iterator rods = rodData.getAllRods().iterator(); + while (rods.hasNext()) { + ReferenceOrderedDatum rod = rods.next(); + if ( rod instanceof RodVCF ) + vcfRods.add((RodVCF)rod); + } + + if ( vcfRods.size() == 0 ) + return 0; + + // create a merged record from all input VCFs + VCFRecord record = VCFUtils.mergeRecords(vcfRods, rodNamesToSampleNames); + + // pull out all of the individual calls from the rods and insert into a map based on the + // mapping from rod/sample to uniquified name + HashMap samplesToRecords = new HashMap(); + for ( RodVCF rod : vcfRods ) { + List records = rod.getGenotypes(); + for ( Genotype g : records ) { + if ( !(g instanceof VCFGenotypeCall) ) + throw new StingException("Expected VCF rod Genotypes to be of type VCFGenotypeCall"); + + VCFGenotypeCall vcfCall = (VCFGenotypeCall)g; + String uniquifiedSample = rodNamesToSampleNames.get(new Pair(rod.getName(), vcfCall.getSampleName())); + if ( uniquifiedSample == null ) + throw new StingException("Unexpected sample encountered: " + vcfCall.getSampleName() + " in rod " + rod.getName()); + + samplesToRecords.put(uniquifiedSample, vcfCall); + } + } + + // add in the info fields to the new record based on the results of each of the relevant concordance tests + for ( ConcordanceType type : requestedTypes ) { + String result = type.computeConcordance(samplesToRecords, ref); + if ( result != null ) { + record.addInfoField(type.getInfoName(), result); + } + } + + // emit the new record + vcfWriter.addRecord(record); return 1; } + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { return sum + value; } public void onTraversalDone(Integer result) { - for ( ConcordanceType type : requestedTypes ) - type.cleanup(); - + vcfWriter.close(); out.printf("Processed %d loci.\n", result); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/ConcordanceType.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/ConcordanceType.java index 4222b721a..7b97c647d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/ConcordanceType.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/ConcordanceType.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; -import java.util.HashMap; +import java.util.Map; +import java.util.Set; public interface ConcordanceType { - public void initialize(String prefix, HashMap args); - public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref); - public void cleanup(); - + public void initialize(Map args, Set samples); + public String computeConcordance(Map samplesToRecords, ReferenceContext ref); + public String getInfoName(); } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/IndelSubsets.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/IndelSubsets.java index e8dd38f19..88d488e62 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/IndelSubsets.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/IndelSubsets.java @@ -1,14 +1,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; -import java.io.FileNotFoundException; -import java.io.PrintWriter; -import java.util.HashMap; +import java.util.*; /** * filter an indel callset based on given criteria @@ -22,54 +20,59 @@ public class IndelSubsets implements ConcordanceType { private int homopolymerCutoff = 1; private int sizeCutoff = 2; - private PrintWriter[][][][] writers = new PrintWriter[2][2][2][2]; + private String[][][][] tags = new String[2][2][2][2]; + private String sample1, sample2; public IndelSubsets() {} - public void initialize(String prefix, HashMap args) { + public void initialize(Map args, Set samples) { + if ( samples.size() != 2 ) + throw new StingException("IndelSubsets concordance test cannot handle anything other than 2 VCF records"); + if ( args.get("sizeCutoff") != null ) sizeCutoff = Integer.valueOf(args.get("sizeCutoff")); if ( args.get("homopolymerCutoff") != null ) homopolymerCutoff = Integer.valueOf(args.get("homopolymerCutoff")); - try { - for (int i = 0; i < 2; i++) { - String name1 = i == 0 ? "set1" : "noSet1"; - for (int j = 0; j < 2; j++) { - String name2 = j == 0 ? "set2" : "noSet2"; - for (int k = 0; k < 2; k++) { - String name3 = "size" + (k == 0 ? Integer.toString(sizeCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(sizeCutoff)); - for (int m = 0; m < 2; m++) { - String name4 = "homopolymer" + (m == 0 ? Integer.toString(homopolymerCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(homopolymerCutoff)); - writers[i][j][k][m] = new PrintWriter(prefix + "." + name1 + "." + name2 + "." + name3 + "." + name4 + ".calls"); - } + Iterator iter = samples.iterator(); + sample1 = iter.next(); + sample2 = iter.next(); + + for (int i = 0; i < 2; i++) { + String name1 = i == 0 ? sample1 : "no_" + sample1; + for (int j = 0; j < 2; j++) { + String name2 = j == 0 ? sample2 : "no_" + sample2; + for (int k = 0; k < 2; k++) { + String name3 = "size" + (k == 0 ? Integer.toString(sizeCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(sizeCutoff)); + for (int m = 0; m < 2; m++) { + String name4 = "homopolymer" + (m == 0 ? Integer.toString(homopolymerCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(homopolymerCutoff)); + tags[i][j][k][m] = name1 + "." + name2 + "." + name3 + "." + name4; } } } - } catch (FileNotFoundException e) { - throw new StingException(String.format("Could not open file(s) for writing")); } } - public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) { - Variation indel1 = (Variation)tracker.lookup("callset1", null); - Variation indel2 = (Variation)tracker.lookup("callset2", null); + public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { - int set1 = ( indel1 != null && indel1.isIndel() ? 0 : 1 ); - int set2 = ( indel2 != null && indel2.isIndel() ? 0 : 1 ); + VCFGenotypeCall indel1 = samplesToRecords.get(sample1); + VCFGenotypeCall indel2 = samplesToRecords.get(sample2); + + int set1 = ( indel1 != null && !indel1.isPointGenotype() ? 0 : 1 ); + int set2 = ( indel2 != null && !indel2.isPointGenotype() ? 0 : 1 ); // skip this locus if they're both not valid indels if ( set1 == 1 && set2 == 1 ) - return; + return null; // only deal with a valid indel - Variation indel = ( indel1 != null ? indel1 : indel2 ); + Variation indel = ( indel1 != null ? indel1.toVariation() : indel2.toVariation() ); // we only deal with the first allele int size = ( indel.getAlternateAlleleList().get(0).length() <= sizeCutoff ? 0 : 1 ); int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 ); - writers[set1][set2][size][homopol].println(indel.toString()); + return tags[set1][set2][size][homopol]; } private int homopolymerRunSize(ReferenceContext ref, Variation indel) { @@ -98,11 +101,5 @@ public class IndelSubsets implements ConcordanceType { return Math.max(leftRun, rightRun); } - public void cleanup() { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - for (int m = 0; m < 2; m++) - writers[i][j][k][m].close(); - } + public String getInfoName() { return "IS"; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/NWayVenn.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/NWayVenn.java index a3001c8da..c44fa21db 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/NWayVenn.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/NWayVenn.java @@ -1,16 +1,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; -import java.io.FileNotFoundException; -import java.io.PrintWriter; -import java.util.HashMap; +import java.util.*; import java.util.Map.Entry; /** @@ -21,86 +14,29 @@ import java.util.Map.Entry; */ public class NWayVenn implements ConcordanceType { - // TODO -- change this to use Ryan's generic map object when it's ready - private HashMap writers = new HashMap(); - private PrintWriter union_writer = null; - - private String prefix; - private int N; - public NWayVenn() {} - public void initialize(String prefix, HashMap args) { + public void initialize(Map args, Set samples) { } - if ( args.get("N") == null ) - throw new StingException("NWayVenn concordance module requires the 'N' argument"); - N = Integer.valueOf(args.get("N")); - if ( N < 1 ) - throw new StingException("NWayVenn concordance module requires that N be greater than 0"); + public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { + if ( samplesToRecords.size() == 0 ) + return null; - this.prefix = prefix; - - // create the list of file names - HashMap files = new HashMap(); - files.put("", ""); - for (int i = 1; i <= N; i++) { - // create 2 new list entries for each of the current file names: - // one with and one without this set appended to the end - HashMap appendedFiles = new HashMap(); - for ( Entry file : files.entrySet() ) { - appendedFiles.put(file.getKey() + "0", file.getValue()); - appendedFiles.put(file.getKey() + "1", file.getValue() + ".set" + i); - } - files = appendedFiles; - } - // remove the entry for no hits - files.remove(Utils.dupString('0', N)); - - try { - for ( Entry file : files.entrySet() ) - writers.put(file.getKey(), new PrintWriter(prefix + "." + N + "wayVenn" + file.getValue() + ".calls")); - union_writer = new PrintWriter(prefix + "." + N + "wayVenn.union.calls"); - } catch (FileNotFoundException e) { - throw new StingException(String.format("Could not open file(s) for writing")); - } - } - - public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) { - Variation[] calls = new Variation[N]; - int goodRods = 0; - for (int i = 0; i < N; i++) { - RODRecordList callList = tracker.getTrackData("callset" + (i+1), null); - if ( callList != null ) { - calls[i] = (Variation)callList.getRecords().get(0); - goodRods++; - } + TreeSet concordantSamples = new TreeSet(); + for ( Entry entry : samplesToRecords.entrySet() ) { + concordantSamples.add(entry.getKey()); } - if ( goodRods == 0 ) - return; - - // union - printVariant(union_writer, calls); - - // add to the correct set - StringBuilder hashString = new StringBuilder(); - for (int i = 0; i < N; i++) - hashString.append(calls[i] != null ? "1" : "0"); - printVariant(writers.get(hashString.toString()), calls); - } - - private static void printVariant(PrintWriter writer, Variation[] variants) { - for ( Variation variant : variants ) { - if ( variant != null ) { - writer.println(variant.toString()); - return; - } + StringBuffer tag = new StringBuffer(); + Iterator iter = concordantSamples.iterator(); + while ( iter.hasNext() ) { + tag.append(iter.next()); + if ( iter.hasNext() ) + tag.append("."); } + + return tag.toString(); } - public void cleanup() { - union_writer.close(); - for ( PrintWriter writer : writers.values() ) - writer.close(); - } + public String getInfoName() { return "NV"; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java index 147f69d5d..44ebddd56 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java @@ -1,16 +1,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; -import java.io.FileNotFoundException; -import java.io.PrintWriter; -import java.util.HashMap; +import java.util.*; /** * Split up two call sets into their various concordance sets @@ -19,58 +15,42 @@ public class SNPGenotypeConcordance implements ConcordanceType { private double LOD = 5.0; - private PrintWriter sameVarWriter = null; - private PrintWriter oneVarWriter = null; - private PrintWriter sameAlleleWriter = null; - private PrintWriter diffAlleleWriter = null; - private PrintWriter refVsVar1Writer = null; - private PrintWriter refVsVar2Writer = null; - private PrintWriter comboVarWriter = null; - private PrintWriter coverageVar1Writer = null; - private PrintWriter coverageVar2Writer = null; + private String sample1, sample2; public SNPGenotypeConcordance() {} - public void initialize(String prefix, HashMap args) { - try { - sameVarWriter = new PrintWriter(prefix + ".snp_concordance.sameConfidentVariant.calls"); - oneVarWriter = new PrintWriter(prefix + ".snp_concordance.bothVariantOnlyOneIsConfident.calls"); - sameAlleleWriter = new PrintWriter(prefix + ".snp_concordance.sameVariantAlleleDifferentGenotype.calls"); - diffAlleleWriter = new PrintWriter(prefix + ".snp_concordance.differentVariantAllele.calls"); - refVsVar1Writer = new PrintWriter(prefix + ".snp_concordance.set1VariantSet2Ref.calls"); - refVsVar2Writer = new PrintWriter(prefix + ".snp_concordance.set1RefSet2Variant.calls"); - comboVarWriter = new PrintWriter(prefix + ".snp_concordance.confidentVariantWhenCombined.calls"); - coverageVar1Writer = new PrintWriter(prefix + ".snp_concordance.set1ConfidentVariantSet2NoCoverage.calls"); - coverageVar2Writer = new PrintWriter(prefix + ".snp_concordance.set1NoCoverageSet2ConfidentVariant.calls"); - } catch (FileNotFoundException e) { - throw new StingException(String.format("Could not open file(s) for writing")); - } + public void initialize(Map args, Set samples) { + if ( samples.size() != 2 ) + throw new StingException("SNPGenotype concordance test cannot handle anything other than 2 VCF records"); if ( args.get("lod") != null ) LOD = Double.valueOf(args.get("lod")); + + Iterator iter = samples.iterator(); + sample1 = iter.next(); + sample2 = iter.next(); } - public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) { - RODRecordList call1List = tracker.getTrackData("callset1", null); - RODRecordList call2List = tracker.getTrackData("callset2", null); - Variation call1 = (call1List == null ? null : (Variation)call1List.getRecords().get(0)); - Variation call2 = (call2List == null ? null : (Variation)call2List.getRecords().get(0)); + public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { + + VCFGenotypeCall vcfCall1 = samplesToRecords.get(sample1); + VCFGenotypeCall vcfCall2 = samplesToRecords.get(sample2); + Variation call1 = (vcfCall1 == null ? null : vcfCall1.toVariation()); + Variation call2 = (vcfCall2 == null ? null : vcfCall2.toVariation()); // the only reason they would be null is a lack of coverage if ( call1 == null || call2 == null ) { if ( call1 != null && call1.isSNP() && call1.getNegLog10PError() >= LOD ) - printVariant(coverageVar1Writer, call1); + return "set1ConfidentVariantSet2NoCoverage"; else if ( call2 != null && call2.isSNP() && call2.getNegLog10PError() >= LOD ) - printVariant(coverageVar2Writer, call2); - return; + return "set1NoCoverageSet2ConfidentVariant"; + return null; } if (!(call1 instanceof VariantBackedByGenotype) || !(call2 instanceof VariantBackedByGenotype)) - throw new StingException("Both parents ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information"); + throw new StingException("Both ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information"); double bestVsRef1 = call1.getNegLog10PError(); double bestVsRef2 = call2.getNegLog10PError(); - //double bestVsNext1 = call1.getConsensusConfidence(); - //double bestVsNext2 = call2.getConsensusConfidence(); String genotype1 = ((VariantBackedByGenotype)call1).getCalledGenotype().getBases(); String genotype2 = ((VariantBackedByGenotype)call2).getCalledGenotype().getBases(); @@ -81,33 +61,35 @@ public class SNPGenotypeConcordance implements ConcordanceType { if ( bestVsRef1 >= LOD && bestVsRef2 >= LOD ) { // same genotype if ( genotype1.equals(genotype2) ) - printVariant(sameVarWriter, call1); + return "sameConfidentVariant"; // same allele, different genotype else if ( sameVariantAllele(genotype1, genotype2, ref.getBase()) ) - printVariant(sameAlleleWriter, call1); + return "sameVariantAlleleDifferentGenotype"; // different variant allele else - printVariant(diffAlleleWriter, call1); + return "differentVariantAllele"; } // confident only when combined else if ( bestVsRef1 < LOD && bestVsRef2 < LOD && bestVsRef1 + bestVsRef2 >= LOD ) { - printVariant(comboVarWriter, call1); + return "confidentVariantWhenCombined"; } // only one is confident variant else if ( (bestVsRef1 < LOD && bestVsRef2 >= LOD) || (bestVsRef1 >= LOD && bestVsRef2 < LOD) ) { - printVariant(oneVarWriter, call1); + return "bothVariantOnlyOneIsConfident"; } } // one is variant and the other is ref else if ( call1.isSNP() && call2.isReference() && bestVsRef1 >= LOD ) - printVariant(refVsVar1Writer, call1); + return "set1VariantSet2Ref"; else if ( call2.isSNP() && call1.isReference() && bestVsRef2 >= LOD ) - printVariant(refVsVar2Writer, call2); + return "set1RefSet2Variant"; + + return null; } private boolean sameVariantAllele(String genotype1, String genotype2, char ref) { @@ -118,19 +100,5 @@ public class SNPGenotypeConcordance implements ConcordanceType { return altAllele1 == altAllele2; } - private static void printVariant(PrintWriter writer, Variation variant) { - writer.println(variant.toString()); - } - - public void cleanup() { - sameVarWriter.close(); - oneVarWriter.close(); - sameAlleleWriter.close(); - diffAlleleWriter.close(); - refVsVar1Writer.close(); - refVsVar2Writer.close(); - comboVarWriter.close(); - coverageVar1Writer.close(); - coverageVar2Writer.close(); - } + public String getInfoName() { return "SG"; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SimpleVenn.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SimpleVenn.java index f6de692d8..ba73a0115 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SimpleVenn.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SimpleVenn.java @@ -1,81 +1,62 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; -import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; +import org.broadinstitute.sting.utils.StingException; -import java.io.FileNotFoundException; -import java.io.PrintWriter; -import java.util.HashMap; +import java.util.*; /** * Split up two call sets into their various "Venn diagram" sets */ public class SimpleVenn implements ConcordanceType { - private PrintWriter union_writer = null; - private PrintWriter intersect_writer = null; - private PrintWriter discord_writer = null; - private PrintWriter set1_writer = null; - private PrintWriter set2_writer = null; + private String sample1, sample2; public SimpleVenn() {} - public void initialize(String prefix, HashMap args) { - try { - union_writer = new PrintWriter(prefix + ".venn.union.calls"); - intersect_writer = new PrintWriter(prefix + ".venn.intersection.calls"); - discord_writer = new PrintWriter(prefix + ".venn.discordant.calls"); - set1_writer = new PrintWriter(prefix + ".venn.set1Only.calls"); - set2_writer = new PrintWriter(prefix + ".venn.set2Only.calls"); - } catch (FileNotFoundException e) { - throw new StingException(String.format("Could not open file(s) for writing")); - } + public void initialize(Map args, Set samples) { + if ( samples.size() != 2 ) + throw new StingException("SimpleVenn concordance test cannot handle anything other than 2 VCF records"); + + Iterator iter = samples.iterator(); + sample1 = iter.next(); + sample2 = iter.next(); } - public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) { - RODRecordList call1List = tracker.getTrackData("callset1", null); - RODRecordList call2List = tracker.getTrackData("callset2", null); - Variation call1 = (call1List == null ? null : (Variation)call1List.getRecords().get(0)); - Variation call2 = (call2List == null ? null : (Variation)call2List.getRecords().get(0)); + public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { + + VCFGenotypeCall call1 = samplesToRecords.get(sample1); + VCFGenotypeCall call2 = samplesToRecords.get(sample2); if ( call1 == null && call2 == null ) - return; - - // union - printVariant(union_writer, call1 != null ? call1 : call2); + return null; // set 1 only if ( call2 == null ) - printVariant(set1_writer, call1); + return sample1 + "_only"; // set 2 only else if ( call1 == null ) - printVariant(set2_writer, call2); + return sample2 + "_only"; + + // at this point we know that neither is null, so now we need to test for alternate allele concordance + Variation callV1 = call1.toVariation(); + Variation callV2 = call2.toVariation(); // we can't really deal with multi-allelic variants - else if ( call1.isBiallelic() && call2.isBiallelic() ) { + if ( callV1.isBiallelic() && callV2.isBiallelic() ) { // intersection (concordant) - if ( call1.getAlternativeBaseForSNP() == call2.getAlternativeBaseForSNP() ) - printVariant(intersect_writer, call1); + if ( callV1.getAlternativeBaseForSNP() == callV2.getAlternativeBaseForSNP() ) + return "concordant"; // intersection (discordant) else - printVariant(discord_writer, call1); + return "discordant"; } + + return "concordant"; } - private static void printVariant(PrintWriter writer, Variation variant) { - writer.println(variant.toString()); - } - - public void cleanup() { - union_writer.close(); - intersect_writer.close(); - discord_writer.close(); - set1_writer.close(); - set2_writer.close(); - } + public String getInfoName() { return "SV"; } }