Completely refactored the Callset Concordance code. Now, it takes in VCF rods and emits a single VCF file which has merged calls from all inputs and is annotated (in the INFO fields) with the appropriate concordance test(s).
Still needs a bit of polish... git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1999 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bc6f24e88f
commit
ab705565cf
|
|
@ -4,28 +4,38 @@ import org.broadinstitute.sting.gatk.contexts.*;
|
|||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* CallsetConcordanceWalker finds the concordance between multiple callsets (different tests are available).
|
||||
* CallsetConcordanceWalker finds the concordance between multiple VCF callsets (different tests are available).
|
||||
*/
|
||||
@Requires(value={DataSource.REFERENCE},
|
||||
referenceMetaData={@RMD(name="callset1",type=VariationRod.class),
|
||||
@RMD(name="callset2",type=VariationRod.class)})
|
||||
@Requires(value={DataSource.REFERENCE})
|
||||
@Reference(window=@Window(start=-20,stop=20))
|
||||
public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
|
||||
@Argument(fullName="concordance_output_path", shortName="O", doc="File path to which split sets should be written", required=true)
|
||||
private String OUTPUT_PATH = null;
|
||||
public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
|
||||
@Argument(fullName="concordance_output", shortName="CO", doc="VCF file to which output should be written", required=true)
|
||||
private File OUTPUT = null;
|
||||
@Argument(fullName="concordanceType", shortName="CT", doc="Concordance subset types to apply to given callsets. Syntax: 'type[:key1=arg1,key2=arg2,...]'", required=false)
|
||||
private String[] TYPES = null;
|
||||
@Argument(fullName="list", shortName="ls", doc="List the available concordance types and exit", required=false)
|
||||
private Boolean LIST_ONLY = false;
|
||||
|
||||
|
||||
// the concordance tests to run
|
||||
private ArrayList<ConcordanceType> requestedTypes;
|
||||
|
||||
// VCF writer for the output of the concordance tests
|
||||
private VCFWriter vcfWriter;
|
||||
|
||||
// a map of rod name to uniquified sample name
|
||||
HashMap<Pair<String, String>, String> rodNamesToSampleNames = new HashMap<Pair<String, String>, String>();
|
||||
|
||||
|
||||
/**
|
||||
* Prepare the output file and the list of available features.
|
||||
*/
|
||||
|
|
@ -35,7 +45,7 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
|
|||
List<Class<? extends ConcordanceType>> classes = PackageUtils.getClassesImplementingInterface(ConcordanceType.class);
|
||||
|
||||
// print and exit if that's what was requested
|
||||
if (LIST_ONLY) {
|
||||
if ( LIST_ONLY ) {
|
||||
out.println("\nAvailable concordance types:");
|
||||
for (int i = 0; i < classes.size(); i++)
|
||||
out.println("\t" + classes.get(i).getSimpleName());
|
||||
|
|
@ -43,9 +53,26 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
|
|||
System.exit(0);
|
||||
}
|
||||
|
||||
requestedTypes = new ArrayList<ConcordanceType>();
|
||||
// get the list of all sample names from the various input rods (the need to be uniquified in case there's overlap)
|
||||
HashSet<String> samples = new HashSet<String>();
|
||||
VCFUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, rodNamesToSampleNames);
|
||||
|
||||
for ( java.util.Map.Entry<Pair<String, String>, String> entry : rodNamesToSampleNames.entrySet() ) {
|
||||
logger.debug("Uniquified sample mapping: " + entry.getKey().first + "/" + entry.getKey().second + " -> " + entry.getValue());
|
||||
}
|
||||
|
||||
// set up the header fields
|
||||
Map<String, String> hInfo = new HashMap<String, String>();
|
||||
hInfo.put("format", VCFWriter.VERSION);
|
||||
hInfo.put("source", "CallsetConcordance");
|
||||
hInfo.put("reference", getToolkit().getArguments().referenceFile.getName());
|
||||
hInfo.put("explanation", "This file represents a concordance test of various call sets - NOT the output from a multi-sample caller");
|
||||
VCFHeader header = new VCFHeader(hInfo, samples);
|
||||
|
||||
vcfWriter = new VCFWriter(header, OUTPUT);
|
||||
|
||||
// initialize requested concordance types
|
||||
requestedTypes = new ArrayList<ConcordanceType>();
|
||||
if (TYPES != null) {
|
||||
for ( String requestedTypeString : TYPES ) {
|
||||
String[] requestedPieces = requestedTypeString.split(":");
|
||||
|
|
@ -68,7 +95,7 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
concordance.initialize(OUTPUT_PATH, requestedArgs);
|
||||
concordance.initialize(requestedArgs, samples);
|
||||
requestedTypes.add(concordance);
|
||||
break;
|
||||
} catch (InstantiationException e) {
|
||||
|
|
@ -83,25 +110,67 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
|
|||
throw new StingException("The requested concordance type (" + requestedType + ") isn't a valid concordance option");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
public Integer map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( rodData == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
for ( ConcordanceType type : requestedTypes )
|
||||
type.computeConcordance(tracker, ref);
|
||||
// get all of the vcf rods at this locus
|
||||
ArrayList<RodVCF> vcfRods = new ArrayList<RodVCF>();
|
||||
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
|
||||
while (rods.hasNext()) {
|
||||
ReferenceOrderedDatum rod = rods.next();
|
||||
if ( rod instanceof RodVCF )
|
||||
vcfRods.add((RodVCF)rod);
|
||||
}
|
||||
|
||||
if ( vcfRods.size() == 0 )
|
||||
return 0;
|
||||
|
||||
// create a merged record from all input VCFs
|
||||
VCFRecord record = VCFUtils.mergeRecords(vcfRods, rodNamesToSampleNames);
|
||||
|
||||
// pull out all of the individual calls from the rods and insert into a map based on the
|
||||
// mapping from rod/sample to uniquified name
|
||||
HashMap<String, VCFGenotypeCall> samplesToRecords = new HashMap<String, VCFGenotypeCall>();
|
||||
for ( RodVCF rod : vcfRods ) {
|
||||
List<Genotype> records = rod.getGenotypes();
|
||||
for ( Genotype g : records ) {
|
||||
if ( !(g instanceof VCFGenotypeCall) )
|
||||
throw new StingException("Expected VCF rod Genotypes to be of type VCFGenotypeCall");
|
||||
|
||||
VCFGenotypeCall vcfCall = (VCFGenotypeCall)g;
|
||||
String uniquifiedSample = rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), vcfCall.getSampleName()));
|
||||
if ( uniquifiedSample == null )
|
||||
throw new StingException("Unexpected sample encountered: " + vcfCall.getSampleName() + " in rod " + rod.getName());
|
||||
|
||||
samplesToRecords.put(uniquifiedSample, vcfCall);
|
||||
}
|
||||
}
|
||||
|
||||
// add in the info fields to the new record based on the results of each of the relevant concordance tests
|
||||
for ( ConcordanceType type : requestedTypes ) {
|
||||
String result = type.computeConcordance(samplesToRecords, ref);
|
||||
if ( result != null ) {
|
||||
record.addInfoField(type.getInfoName(), result);
|
||||
}
|
||||
}
|
||||
|
||||
// emit the new record
|
||||
vcfWriter.addRecord(record);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum + value;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
for ( ConcordanceType type : requestedTypes )
|
||||
type.cleanup();
|
||||
|
||||
vcfWriter.close();
|
||||
out.printf("Processed %d loci.\n", result);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public interface ConcordanceType {
|
||||
|
||||
public void initialize(String prefix, HashMap<String,String> args);
|
||||
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref);
|
||||
public void cleanup();
|
||||
|
||||
public void initialize(Map<String,String> args, Set<String> samples);
|
||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref);
|
||||
public String getInfoName();
|
||||
}
|
||||
|
|
@ -1,14 +1,12 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* filter an indel callset based on given criteria
|
||||
|
|
@ -22,54 +20,59 @@ public class IndelSubsets implements ConcordanceType {
|
|||
private int homopolymerCutoff = 1;
|
||||
private int sizeCutoff = 2;
|
||||
|
||||
private PrintWriter[][][][] writers = new PrintWriter[2][2][2][2];
|
||||
private String[][][][] tags = new String[2][2][2][2];
|
||||
private String sample1, sample2;
|
||||
|
||||
public IndelSubsets() {}
|
||||
|
||||
public void initialize(String prefix, HashMap<String,String> args) {
|
||||
public void initialize(Map<String, String> args, Set<String> samples) {
|
||||
if ( samples.size() != 2 )
|
||||
throw new StingException("IndelSubsets concordance test cannot handle anything other than 2 VCF records");
|
||||
|
||||
if ( args.get("sizeCutoff") != null )
|
||||
sizeCutoff = Integer.valueOf(args.get("sizeCutoff"));
|
||||
if ( args.get("homopolymerCutoff") != null )
|
||||
homopolymerCutoff = Integer.valueOf(args.get("homopolymerCutoff"));
|
||||
|
||||
try {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
String name1 = i == 0 ? "set1" : "noSet1";
|
||||
for (int j = 0; j < 2; j++) {
|
||||
String name2 = j == 0 ? "set2" : "noSet2";
|
||||
for (int k = 0; k < 2; k++) {
|
||||
String name3 = "size" + (k == 0 ? Integer.toString(sizeCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(sizeCutoff));
|
||||
for (int m = 0; m < 2; m++) {
|
||||
String name4 = "homopolymer" + (m == 0 ? Integer.toString(homopolymerCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(homopolymerCutoff));
|
||||
writers[i][j][k][m] = new PrintWriter(prefix + "." + name1 + "." + name2 + "." + name3 + "." + name4 + ".calls");
|
||||
}
|
||||
Iterator<String> iter = samples.iterator();
|
||||
sample1 = iter.next();
|
||||
sample2 = iter.next();
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
String name1 = i == 0 ? sample1 : "no_" + sample1;
|
||||
for (int j = 0; j < 2; j++) {
|
||||
String name2 = j == 0 ? sample2 : "no_" + sample2;
|
||||
for (int k = 0; k < 2; k++) {
|
||||
String name3 = "size" + (k == 0 ? Integer.toString(sizeCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(sizeCutoff));
|
||||
for (int m = 0; m < 2; m++) {
|
||||
String name4 = "homopolymer" + (m == 0 ? Integer.toString(homopolymerCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(homopolymerCutoff));
|
||||
tags[i][j][k][m] = name1 + "." + name2 + "." + name3 + "." + name4;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException(String.format("Could not open file(s) for writing"));
|
||||
}
|
||||
}
|
||||
|
||||
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
||||
Variation indel1 = (Variation)tracker.lookup("callset1", null);
|
||||
Variation indel2 = (Variation)tracker.lookup("callset2", null);
|
||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
|
||||
|
||||
int set1 = ( indel1 != null && indel1.isIndel() ? 0 : 1 );
|
||||
int set2 = ( indel2 != null && indel2.isIndel() ? 0 : 1 );
|
||||
VCFGenotypeCall indel1 = samplesToRecords.get(sample1);
|
||||
VCFGenotypeCall indel2 = samplesToRecords.get(sample2);
|
||||
|
||||
int set1 = ( indel1 != null && !indel1.isPointGenotype() ? 0 : 1 );
|
||||
int set2 = ( indel2 != null && !indel2.isPointGenotype() ? 0 : 1 );
|
||||
|
||||
// skip this locus if they're both not valid indels
|
||||
if ( set1 == 1 && set2 == 1 )
|
||||
return;
|
||||
return null;
|
||||
|
||||
// only deal with a valid indel
|
||||
Variation indel = ( indel1 != null ? indel1 : indel2 );
|
||||
Variation indel = ( indel1 != null ? indel1.toVariation() : indel2.toVariation() );
|
||||
|
||||
// we only deal with the first allele
|
||||
int size = ( indel.getAlternateAlleleList().get(0).length() <= sizeCutoff ? 0 : 1 );
|
||||
int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 );
|
||||
|
||||
writers[set1][set2][size][homopol].println(indel.toString());
|
||||
return tags[set1][set2][size][homopol];
|
||||
}
|
||||
|
||||
private int homopolymerRunSize(ReferenceContext ref, Variation indel) {
|
||||
|
|
@ -98,11 +101,5 @@ public class IndelSubsets implements ConcordanceType {
|
|||
return Math.max(leftRun, rightRun);
|
||||
}
|
||||
|
||||
public void cleanup() {
|
||||
for (int i = 0; i < 2; i++)
|
||||
for (int j = 0; j < 2; j++)
|
||||
for (int k = 0; k < 2; k++)
|
||||
for (int m = 0; m < 2; m++)
|
||||
writers[i][j][k][m].close();
|
||||
}
|
||||
public String getInfoName() { return "IS"; }
|
||||
}
|
||||
|
|
@ -1,16 +1,9 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.*;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
/**
|
||||
|
|
@ -21,86 +14,29 @@ import java.util.Map.Entry;
|
|||
*/
|
||||
public class NWayVenn implements ConcordanceType {
|
||||
|
||||
// TODO -- change this to use Ryan's generic map object when it's ready
|
||||
private HashMap<String, PrintWriter> writers = new HashMap<String, PrintWriter>();
|
||||
private PrintWriter union_writer = null;
|
||||
|
||||
private String prefix;
|
||||
private int N;
|
||||
|
||||
public NWayVenn() {}
|
||||
|
||||
public void initialize(String prefix, HashMap<String,String> args) {
|
||||
public void initialize(Map<String, String> args, Set<String> samples) { }
|
||||
|
||||
if ( args.get("N") == null )
|
||||
throw new StingException("NWayVenn concordance module requires the 'N' argument");
|
||||
N = Integer.valueOf(args.get("N"));
|
||||
if ( N < 1 )
|
||||
throw new StingException("NWayVenn concordance module requires that N be greater than 0");
|
||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
|
||||
if ( samplesToRecords.size() == 0 )
|
||||
return null;
|
||||
|
||||
this.prefix = prefix;
|
||||
|
||||
// create the list of file names
|
||||
HashMap<String, String> files = new HashMap<String, String>();
|
||||
files.put("", "");
|
||||
for (int i = 1; i <= N; i++) {
|
||||
// create 2 new list entries for each of the current file names:
|
||||
// one with and one without this set appended to the end
|
||||
HashMap<String, String> appendedFiles = new HashMap<String, String>();
|
||||
for ( Entry<String, String> file : files.entrySet() ) {
|
||||
appendedFiles.put(file.getKey() + "0", file.getValue());
|
||||
appendedFiles.put(file.getKey() + "1", file.getValue() + ".set" + i);
|
||||
}
|
||||
files = appendedFiles;
|
||||
}
|
||||
// remove the entry for no hits
|
||||
files.remove(Utils.dupString('0', N));
|
||||
|
||||
try {
|
||||
for ( Entry<String, String> file : files.entrySet() )
|
||||
writers.put(file.getKey(), new PrintWriter(prefix + "." + N + "wayVenn" + file.getValue() + ".calls"));
|
||||
union_writer = new PrintWriter(prefix + "." + N + "wayVenn.union.calls");
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException(String.format("Could not open file(s) for writing"));
|
||||
}
|
||||
}
|
||||
|
||||
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
||||
Variation[] calls = new Variation[N];
|
||||
int goodRods = 0;
|
||||
for (int i = 0; i < N; i++) {
|
||||
RODRecordList<ReferenceOrderedDatum> callList = tracker.getTrackData("callset" + (i+1), null);
|
||||
if ( callList != null ) {
|
||||
calls[i] = (Variation)callList.getRecords().get(0);
|
||||
goodRods++;
|
||||
}
|
||||
TreeSet<String> concordantSamples = new TreeSet<String>();
|
||||
for ( Entry<String, VCFGenotypeCall> entry : samplesToRecords.entrySet() ) {
|
||||
concordantSamples.add(entry.getKey());
|
||||
}
|
||||
|
||||
if ( goodRods == 0 )
|
||||
return;
|
||||
|
||||
// union
|
||||
printVariant(union_writer, calls);
|
||||
|
||||
// add to the correct set
|
||||
StringBuilder hashString = new StringBuilder();
|
||||
for (int i = 0; i < N; i++)
|
||||
hashString.append(calls[i] != null ? "1" : "0");
|
||||
printVariant(writers.get(hashString.toString()), calls);
|
||||
}
|
||||
|
||||
private static void printVariant(PrintWriter writer, Variation[] variants) {
|
||||
for ( Variation variant : variants ) {
|
||||
if ( variant != null ) {
|
||||
writer.println(variant.toString());
|
||||
return;
|
||||
}
|
||||
StringBuffer tag = new StringBuffer();
|
||||
Iterator<String> iter = concordantSamples.iterator();
|
||||
while ( iter.hasNext() ) {
|
||||
tag.append(iter.next());
|
||||
if ( iter.hasNext() )
|
||||
tag.append(".");
|
||||
}
|
||||
|
||||
return tag.toString();
|
||||
}
|
||||
|
||||
public void cleanup() {
|
||||
union_writer.close();
|
||||
for ( PrintWriter writer : writers.values() )
|
||||
writer.close();
|
||||
}
|
||||
public String getInfoName() { return "NV"; }
|
||||
}
|
||||
|
|
@ -1,16 +1,12 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Split up two call sets into their various concordance sets
|
||||
|
|
@ -19,58 +15,42 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
|||
|
||||
private double LOD = 5.0;
|
||||
|
||||
private PrintWriter sameVarWriter = null;
|
||||
private PrintWriter oneVarWriter = null;
|
||||
private PrintWriter sameAlleleWriter = null;
|
||||
private PrintWriter diffAlleleWriter = null;
|
||||
private PrintWriter refVsVar1Writer = null;
|
||||
private PrintWriter refVsVar2Writer = null;
|
||||
private PrintWriter comboVarWriter = null;
|
||||
private PrintWriter coverageVar1Writer = null;
|
||||
private PrintWriter coverageVar2Writer = null;
|
||||
private String sample1, sample2;
|
||||
|
||||
public SNPGenotypeConcordance() {}
|
||||
|
||||
public void initialize(String prefix, HashMap<String,String> args) {
|
||||
try {
|
||||
sameVarWriter = new PrintWriter(prefix + ".snp_concordance.sameConfidentVariant.calls");
|
||||
oneVarWriter = new PrintWriter(prefix + ".snp_concordance.bothVariantOnlyOneIsConfident.calls");
|
||||
sameAlleleWriter = new PrintWriter(prefix + ".snp_concordance.sameVariantAlleleDifferentGenotype.calls");
|
||||
diffAlleleWriter = new PrintWriter(prefix + ".snp_concordance.differentVariantAllele.calls");
|
||||
refVsVar1Writer = new PrintWriter(prefix + ".snp_concordance.set1VariantSet2Ref.calls");
|
||||
refVsVar2Writer = new PrintWriter(prefix + ".snp_concordance.set1RefSet2Variant.calls");
|
||||
comboVarWriter = new PrintWriter(prefix + ".snp_concordance.confidentVariantWhenCombined.calls");
|
||||
coverageVar1Writer = new PrintWriter(prefix + ".snp_concordance.set1ConfidentVariantSet2NoCoverage.calls");
|
||||
coverageVar2Writer = new PrintWriter(prefix + ".snp_concordance.set1NoCoverageSet2ConfidentVariant.calls");
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException(String.format("Could not open file(s) for writing"));
|
||||
}
|
||||
public void initialize(Map<String, String> args, Set<String> samples) {
|
||||
if ( samples.size() != 2 )
|
||||
throw new StingException("SNPGenotype concordance test cannot handle anything other than 2 VCF records");
|
||||
|
||||
if ( args.get("lod") != null )
|
||||
LOD = Double.valueOf(args.get("lod"));
|
||||
|
||||
Iterator<String> iter = samples.iterator();
|
||||
sample1 = iter.next();
|
||||
sample2 = iter.next();
|
||||
}
|
||||
|
||||
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
||||
RODRecordList<ReferenceOrderedDatum> call1List = tracker.getTrackData("callset1", null);
|
||||
RODRecordList<ReferenceOrderedDatum> call2List = tracker.getTrackData("callset2", null);
|
||||
Variation call1 = (call1List == null ? null : (Variation)call1List.getRecords().get(0));
|
||||
Variation call2 = (call2List == null ? null : (Variation)call2List.getRecords().get(0));
|
||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
|
||||
|
||||
VCFGenotypeCall vcfCall1 = samplesToRecords.get(sample1);
|
||||
VCFGenotypeCall vcfCall2 = samplesToRecords.get(sample2);
|
||||
Variation call1 = (vcfCall1 == null ? null : vcfCall1.toVariation());
|
||||
Variation call2 = (vcfCall2 == null ? null : vcfCall2.toVariation());
|
||||
|
||||
// the only reason they would be null is a lack of coverage
|
||||
if ( call1 == null || call2 == null ) {
|
||||
if ( call1 != null && call1.isSNP() && call1.getNegLog10PError() >= LOD )
|
||||
printVariant(coverageVar1Writer, call1);
|
||||
return "set1ConfidentVariantSet2NoCoverage";
|
||||
else if ( call2 != null && call2.isSNP() && call2.getNegLog10PError() >= LOD )
|
||||
printVariant(coverageVar2Writer, call2);
|
||||
return;
|
||||
return "set1NoCoverageSet2ConfidentVariant";
|
||||
return null;
|
||||
}
|
||||
if (!(call1 instanceof VariantBackedByGenotype) || !(call2 instanceof VariantBackedByGenotype))
|
||||
throw new StingException("Both parents ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information");
|
||||
throw new StingException("Both ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information");
|
||||
|
||||
double bestVsRef1 = call1.getNegLog10PError();
|
||||
double bestVsRef2 = call2.getNegLog10PError();
|
||||
//double bestVsNext1 = call1.getConsensusConfidence();
|
||||
//double bestVsNext2 = call2.getConsensusConfidence();
|
||||
String genotype1 = ((VariantBackedByGenotype)call1).getCalledGenotype().getBases();
|
||||
String genotype2 = ((VariantBackedByGenotype)call2).getCalledGenotype().getBases();
|
||||
|
||||
|
|
@ -81,33 +61,35 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
|||
if ( bestVsRef1 >= LOD && bestVsRef2 >= LOD ) {
|
||||
// same genotype
|
||||
if ( genotype1.equals(genotype2) )
|
||||
printVariant(sameVarWriter, call1);
|
||||
return "sameConfidentVariant";
|
||||
|
||||
// same allele, different genotype
|
||||
else if ( sameVariantAllele(genotype1, genotype2, ref.getBase()) )
|
||||
printVariant(sameAlleleWriter, call1);
|
||||
return "sameVariantAlleleDifferentGenotype";
|
||||
|
||||
// different variant allele
|
||||
else
|
||||
printVariant(diffAlleleWriter, call1);
|
||||
return "differentVariantAllele";
|
||||
}
|
||||
|
||||
// confident only when combined
|
||||
else if ( bestVsRef1 < LOD && bestVsRef2 < LOD && bestVsRef1 + bestVsRef2 >= LOD ) {
|
||||
printVariant(comboVarWriter, call1);
|
||||
return "confidentVariantWhenCombined";
|
||||
}
|
||||
|
||||
// only one is confident variant
|
||||
else if ( (bestVsRef1 < LOD && bestVsRef2 >= LOD) || (bestVsRef1 >= LOD && bestVsRef2 < LOD) ) {
|
||||
printVariant(oneVarWriter, call1);
|
||||
return "bothVariantOnlyOneIsConfident";
|
||||
}
|
||||
}
|
||||
|
||||
// one is variant and the other is ref
|
||||
else if ( call1.isSNP() && call2.isReference() && bestVsRef1 >= LOD )
|
||||
printVariant(refVsVar1Writer, call1);
|
||||
return "set1VariantSet2Ref";
|
||||
else if ( call2.isSNP() && call1.isReference() && bestVsRef2 >= LOD )
|
||||
printVariant(refVsVar2Writer, call2);
|
||||
return "set1RefSet2Variant";
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean sameVariantAllele(String genotype1, String genotype2, char ref) {
|
||||
|
|
@ -118,19 +100,5 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
|||
return altAllele1 == altAllele2;
|
||||
}
|
||||
|
||||
private static void printVariant(PrintWriter writer, Variation variant) {
|
||||
writer.println(variant.toString());
|
||||
}
|
||||
|
||||
public void cleanup() {
|
||||
sameVarWriter.close();
|
||||
oneVarWriter.close();
|
||||
sameAlleleWriter.close();
|
||||
diffAlleleWriter.close();
|
||||
refVsVar1Writer.close();
|
||||
refVsVar2Writer.close();
|
||||
comboVarWriter.close();
|
||||
coverageVar1Writer.close();
|
||||
coverageVar2Writer.close();
|
||||
}
|
||||
public String getInfoName() { return "SG"; }
|
||||
}
|
||||
|
|
@ -1,81 +1,62 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Split up two call sets into their various "Venn diagram" sets
|
||||
*/
|
||||
public class SimpleVenn implements ConcordanceType {
|
||||
|
||||
private PrintWriter union_writer = null;
|
||||
private PrintWriter intersect_writer = null;
|
||||
private PrintWriter discord_writer = null;
|
||||
private PrintWriter set1_writer = null;
|
||||
private PrintWriter set2_writer = null;
|
||||
private String sample1, sample2;
|
||||
|
||||
public SimpleVenn() {}
|
||||
|
||||
public void initialize(String prefix, HashMap<String,String> args) {
|
||||
try {
|
||||
union_writer = new PrintWriter(prefix + ".venn.union.calls");
|
||||
intersect_writer = new PrintWriter(prefix + ".venn.intersection.calls");
|
||||
discord_writer = new PrintWriter(prefix + ".venn.discordant.calls");
|
||||
set1_writer = new PrintWriter(prefix + ".venn.set1Only.calls");
|
||||
set2_writer = new PrintWriter(prefix + ".venn.set2Only.calls");
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException(String.format("Could not open file(s) for writing"));
|
||||
}
|
||||
public void initialize(Map<String, String> args, Set<String> samples) {
|
||||
if ( samples.size() != 2 )
|
||||
throw new StingException("SimpleVenn concordance test cannot handle anything other than 2 VCF records");
|
||||
|
||||
Iterator<String> iter = samples.iterator();
|
||||
sample1 = iter.next();
|
||||
sample2 = iter.next();
|
||||
}
|
||||
|
||||
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
||||
RODRecordList<ReferenceOrderedDatum> call1List = tracker.getTrackData("callset1", null);
|
||||
RODRecordList<ReferenceOrderedDatum> call2List = tracker.getTrackData("callset2", null);
|
||||
Variation call1 = (call1List == null ? null : (Variation)call1List.getRecords().get(0));
|
||||
Variation call2 = (call2List == null ? null : (Variation)call2List.getRecords().get(0));
|
||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
|
||||
|
||||
VCFGenotypeCall call1 = samplesToRecords.get(sample1);
|
||||
VCFGenotypeCall call2 = samplesToRecords.get(sample2);
|
||||
|
||||
if ( call1 == null && call2 == null )
|
||||
return;
|
||||
|
||||
// union
|
||||
printVariant(union_writer, call1 != null ? call1 : call2);
|
||||
return null;
|
||||
|
||||
// set 1 only
|
||||
if ( call2 == null )
|
||||
printVariant(set1_writer, call1);
|
||||
return sample1 + "_only";
|
||||
|
||||
// set 2 only
|
||||
else if ( call1 == null )
|
||||
printVariant(set2_writer, call2);
|
||||
return sample2 + "_only";
|
||||
|
||||
// at this point we know that neither is null, so now we need to test for alternate allele concordance
|
||||
Variation callV1 = call1.toVariation();
|
||||
Variation callV2 = call2.toVariation();
|
||||
|
||||
// we can't really deal with multi-allelic variants
|
||||
else if ( call1.isBiallelic() && call2.isBiallelic() ) {
|
||||
if ( callV1.isBiallelic() && callV2.isBiallelic() ) {
|
||||
// intersection (concordant)
|
||||
if ( call1.getAlternativeBaseForSNP() == call2.getAlternativeBaseForSNP() )
|
||||
printVariant(intersect_writer, call1);
|
||||
if ( callV1.getAlternativeBaseForSNP() == callV2.getAlternativeBaseForSNP() )
|
||||
return "concordant";
|
||||
// intersection (discordant)
|
||||
else
|
||||
printVariant(discord_writer, call1);
|
||||
return "discordant";
|
||||
}
|
||||
|
||||
return "concordant";
|
||||
}
|
||||
|
||||
private static void printVariant(PrintWriter writer, Variation variant) {
|
||||
writer.println(variant.toString());
|
||||
}
|
||||
|
||||
public void cleanup() {
|
||||
union_writer.close();
|
||||
intersect_writer.close();
|
||||
discord_writer.close();
|
||||
set1_writer.close();
|
||||
set2_writer.close();
|
||||
}
|
||||
public String getInfoName() { return "SV"; }
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue