Bunch of oneoff stuff that I don't want to lose. Also:
VCFRecord - "." dbsnp-ID entries now taken into account (thought these were represented as null; but I guess not) VCFGenotypeRecord - added a replaceFormat option; since intersecting Broad/BC call sets required genotype formats also be intersected (no changing on-the-fly) VCFCombine - altered doc to instruct user to give complete priority list (was throwing exception if not) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2760 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
421282cfa3
commit
2c4f709f6f
|
|
@ -0,0 +1,55 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotation;
|
||||
import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||
*
|
||||
* @Author chartl
|
||||
* @Date Feb 1, 2010
|
||||
*/
|
||||
public class ThousandGenomesAnnotator implements VariantAnnotation {
|
||||
|
||||
public String getKeyName() {
|
||||
return "1KG";
|
||||
}
|
||||
|
||||
public VCFInfoHeaderLine getDescription() {
|
||||
return new VCFInfoHeaderLine(getKeyName(),
|
||||
1,VCFInfoHeaderLine.INFO_TYPE.String,"Is this site seen in Pilot1 or Pilot2 of 1KG?");
|
||||
}
|
||||
|
||||
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, Variation variation) {
|
||||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
RODRecordList pilot1 = tracker.getTrackData("pilot1",null);
|
||||
RODRecordList pilot2 = tracker.getTrackData("pilot2",null);
|
||||
|
||||
if ( pilot1 == null && pilot2 == null) {
|
||||
return "0";
|
||||
} else {
|
||||
if ( pilot1 != null && ! ( (HapmapVCFROD) pilot1.getRecords().get(0)).getRecord().isFiltered() ) {
|
||||
return "1";
|
||||
} else if ( pilot2 != null && ! ( (HapmapVCFROD) pilot2.getRecords().get(0)).getRecord().isFiltered() ) {
|
||||
return "1";
|
||||
} else {
|
||||
return "0";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -19,7 +19,7 @@ import java.util.Set;
|
|||
class LocusConcordanceInfo {
|
||||
|
||||
public enum ConcordanceType {
|
||||
TRUTH_SET,VARIANT_SET,BOTH_SETS
|
||||
TRUTH_SET,TRUTH_SET_VARIANT_FILTERED,VARIANT_SET,BOTH_SETS
|
||||
}
|
||||
|
||||
private ConcordanceType concordanceType;
|
||||
|
|
@ -70,8 +70,12 @@ class LocusConcordanceInfo {
|
|||
return false;
|
||||
}
|
||||
|
||||
public boolean isVariantFiltered() {
|
||||
return this.concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED;
|
||||
}
|
||||
|
||||
public GenomeLoc getLoc() {
|
||||
if ( concordanceType == ConcordanceType.TRUTH_SET || concordanceType == ConcordanceType.BOTH_SETS) {
|
||||
if ( concordanceType == ConcordanceType.TRUTH_SET || concordanceType == ConcordanceType.BOTH_SETS || concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED) {
|
||||
return truthVCFRecord.getLocation();
|
||||
} else {
|
||||
return variantVCFRecord.getLocation();
|
||||
|
|
|
|||
|
|
@ -19,14 +19,21 @@ class MultiSampleConcordanceSet {
|
|||
private long truthOnlyVariantSites;
|
||||
private long variantOnlySites;
|
||||
private long overlappingSites;
|
||||
private long truthSitesFilteredOut;
|
||||
private int genotypeQuality;
|
||||
|
||||
private int truePositiveLoci;
|
||||
private int trueNegativeLoci;
|
||||
private int falsePositiveLoci;
|
||||
private int falseNegativeLoci;
|
||||
|
||||
public MultiSampleConcordanceSet(int minDepth, boolean assumeRef, int genotypeQuality) {
|
||||
concordanceSet = new HashSet<VCFConcordanceCalculator>();
|
||||
truthOnlySites = 0l;
|
||||
truthOnlyVariantSites = 0l;
|
||||
variantOnlySites = 0l;
|
||||
overlappingSites = 0l;
|
||||
truthSitesFilteredOut = 0l;
|
||||
minimumDepthForTest = minDepth;
|
||||
treatTruthOnlyAsFalseNegative = assumeRef;
|
||||
this.genotypeQuality = genotypeQuality;
|
||||
|
|
@ -59,7 +66,12 @@ class MultiSampleConcordanceSet {
|
|||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
} else if ( info.isVariantFiltered() ) {
|
||||
for ( VCFConcordanceCalculator concordance : concordanceSet ) {
|
||||
concordance.updateFilteredLocus(info);
|
||||
truthSitesFilteredOut++;
|
||||
}
|
||||
} else{
|
||||
variantOnlySites++;
|
||||
}
|
||||
}
|
||||
|
|
@ -83,4 +95,8 @@ class MultiSampleConcordanceSet {
|
|||
public long numberOfOverlappingSites() {
|
||||
return overlappingSites;
|
||||
}
|
||||
|
||||
public long numberOfFilteredTrueSites() {
|
||||
return truthSitesFilteredOut;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
|||
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
|
||||
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
|
||||
@Argument(fullName="genotypeConfidence", shortName="GC", doc="The quality score for genotypes below which to count genotyping as a no-call", required=false)
|
||||
int genotypeQuality = 0;
|
||||
int genotypeQuality = Integer.MIN_VALUE;
|
||||
@Argument(fullName = "ignoreKnownSites", shortName = "novel", doc="Only run concordance over novel sites (sites marked in the VCF as being in dbSNP or Hapmap 2 or 3)", required=false )
|
||||
boolean ignoreKnownSites = false;
|
||||
@Argument(fullName="missingLocusAsConfidentRef", shortName="assumeRef", doc="Assume a missing locus in the variant VCF is a confident ref call with sufficient depth"+
|
||||
|
|
@ -45,9 +45,16 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
|||
}
|
||||
|
||||
public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) {
|
||||
if ( tracker == null || ( ignoreKnownSites && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) ) {
|
||||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if ( ignoreKnownSites ) { // ignoreKnownSites && tracker.lookup("variants",null) != null && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) )
|
||||
if ( tracker.lookup("variants",null) != null && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) {
|
||||
//logger.info("Not novel: "+( (RodVCF) tracker.lookup("variants",null)).getID());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
ReferenceOrderedDatum truthData = tracker.lookup("truth", null);
|
||||
ReferenceOrderedDatum variantData = tracker.lookup("variants",null);
|
||||
LocusConcordanceInfo concordance;
|
||||
|
|
@ -77,7 +84,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
|||
} else if ( truth_filter ) {
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref);
|
||||
} else if ( call_filter ) {
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,( (RodVCF) truthData).getRecord(),null,ref);
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,( (RodVCF) truthData).getRecord(), null ,ref);
|
||||
} else {
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,( (RodVCF) truthData).getRecord(),( (RodVCF) variantData).getRecord(),ref);
|
||||
}
|
||||
|
|
@ -102,12 +109,12 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
|||
}
|
||||
|
||||
public void onTraversalDone(MultiSampleConcordanceSet cSet) {
|
||||
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Correct_But_Low_Genotype_Qual","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call");
|
||||
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Correct_But_Low_Genotype_Qual","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call","False_Negatives_Due_To_Filtration");
|
||||
for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) {
|
||||
out.print(String.format("%s%n",sample));
|
||||
}
|
||||
logger.info("Overlapping="+cSet.numberOfOverlappingSites()+"\tTruthOnly="+cSet.numberOfTruthOnlySites()+"\tTruthOnlyVariantSites="+
|
||||
cSet.numberOfTruthOnlyVariantSites()+"\tVariantOnly="+cSet.numberOfVariantOnlySites());
|
||||
cSet.numberOfTruthOnlyVariantSites()+"\tVariantOnly="+cSet.numberOfVariantOnlySites()+"\tTruthSitesFilteredOut="+cSet.numberOfFilteredTrueSites());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ class VCFConcordanceCalculator {
|
|||
private Set<GenomeLoc> falsePositiveLoci;
|
||||
private Set<GenomeLoc> falseNegativeLoci;
|
||||
private Set<GenomeLoc> falseNegativeLociDueToNoCall;
|
||||
private Set<GenomeLoc> falseNegativeLociDueToFilters;
|
||||
private Set<GenomeLoc> hetsCalledHoms;
|
||||
private Set<GenomeLoc> homsCalledHets;
|
||||
private Set<GenomeLoc> nonConfidentGenotypeCalls;
|
||||
|
|
@ -37,6 +38,7 @@ class VCFConcordanceCalculator {
|
|||
falseNegativeLoci = new HashSet<GenomeLoc>();
|
||||
falseNegativeLociDueToNoCall = new HashSet<GenomeLoc>();
|
||||
falsePositiveLoci = new HashSet<GenomeLoc>();
|
||||
falseNegativeLociDueToFilters = new HashSet<GenomeLoc>();
|
||||
hetsCalledHoms = new HashSet<GenomeLoc>();
|
||||
homsCalledHets = new HashSet<GenomeLoc>();
|
||||
nonConfidentGenotypeCalls = new HashSet<GenomeLoc>();
|
||||
|
|
@ -61,19 +63,25 @@ class VCFConcordanceCalculator {
|
|||
}
|
||||
}
|
||||
|
||||
public void updateFilteredLocus(LocusConcordanceInfo info) {
|
||||
|
||||
if ( info.getTruthGenotype(name).isVariant( (char) info.getReferenceBase()) ) {
|
||||
falseNegativeLociDueToFilters.add(info.getLoc());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),
|
||||
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),
|
||||
concordantGenotypeReferenceCalls.size(),concordantHomCalls.size(),concordantHetCalls.size(),nonConfidentGenotypeCalls.size(),
|
||||
homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),
|
||||
falseNegativeLociDueToNoCall.size());
|
||||
falseNegativeLociDueToNoCall.size(),falseNegativeLociDueToFilters.size());
|
||||
}
|
||||
|
||||
private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) {
|
||||
if ( minimumDepthForUpdate > 0 && call.getReadCount() < minimumDepthForUpdate ) {
|
||||
ignoredDueToDepth.add(loc);
|
||||
return; // do not update; just go away
|
||||
}
|
||||
if ( truth.isNoCall() ) {
|
||||
} else if ( truth.isNoCall() ) {
|
||||
chipNoCalls.add(loc);
|
||||
} else if ( truth.isVariant(( char) ref) ) {
|
||||
if ( call.isNoCall() ) {
|
||||
|
|
|
|||
|
|
@ -3,40 +3,316 @@ package org.broadinstitute.sting.oneoffprojects.walkers.vcftools;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.ListUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.lang.Long;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||
* This walker intersects the samples for two VCF files and returns a set of calls determined
|
||||
* by command-line arguments. Intersect indicates that calls should be returned only where
|
||||
* the same locus appears in the two VCF files, while Union indicates that all calls should be
|
||||
* returned. Priority indicates which file to prefer when setting genotypes; if the "GQ" flag
|
||||
* is omitted, genotypes are merged according to highest genotype quality.
|
||||
*
|
||||
* @Author chartl
|
||||
* @Date Jan 29, 2010
|
||||
*/
|
||||
public abstract class SimpleVCFIntersectWalker extends RodWalker<VCFRecordPair,Long>{
|
||||
public class SimpleVCFIntersectWalker extends RodWalker<VCFRecordMerger,Long>{
|
||||
@Argument(fullName="locusUnion", shortName="union", doc="Will union loci rather than intersecting", required=false)
|
||||
boolean union = false;
|
||||
@Argument(fullName="mergeByQual", shortName="GQ", doc="Will merge genotypes according to quality rather than priority", required=false)
|
||||
boolean useQuality = false;
|
||||
|
||||
private VCFRecordMerger merger; // no cost of repeated instantiation
|
||||
private VCFWriter writer;
|
||||
private boolean headerNotWritten;
|
||||
|
||||
public void initialize() {
|
||||
|
||||
merger = new VCFRecordMerger();
|
||||
writer = new VCFWriter(out);
|
||||
headerNotWritten = true;
|
||||
}
|
||||
|
||||
public Long reduceInit() {
|
||||
return 0l;
|
||||
}
|
||||
|
||||
public VCFRecordPair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return null;
|
||||
public VCFRecordMerger map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
RodVCF priorityCall = ( RodVCF ) tracker.lookup("priority",null);
|
||||
RodVCF otherCall = ( RodVCF ) tracker.lookup("other",null);
|
||||
|
||||
if ( priorityCall == null && otherCall == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if ( priorityCall == null ) {
|
||||
|
||||
if ( ! union ) {
|
||||
return null;
|
||||
} else if ( merger.hasBeenInstantiated() ) {
|
||||
merger.update(null,otherCall.getRecord());
|
||||
} else {
|
||||
merger.hold(otherCall.getRecord());
|
||||
}
|
||||
|
||||
} else if ( otherCall == null ) {
|
||||
|
||||
if ( ! union ) {
|
||||
return null;
|
||||
} else if ( merger.hasBeenInstantiated() ) {
|
||||
merger.update(priorityCall.getRecord(),null);
|
||||
} else {
|
||||
merger.hold(priorityCall.getRecord());
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
if ( merger.hasBeenInstantiated() ) {
|
||||
merger.update(priorityCall.getRecord(), otherCall.getRecord());
|
||||
} else {
|
||||
merger = instantiateMerger(priorityCall.getRecord(),otherCall.getRecord());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return merger;
|
||||
}
|
||||
|
||||
public Long reduce(VCFRecordPair records, long prevReduce) {
|
||||
return 0l;
|
||||
public Long reduce(VCFRecordMerger records, Long prevReduce) {
|
||||
if ( records == null ) {
|
||||
return prevReduce;
|
||||
} else {
|
||||
if ( records.hasBeenInstantiated() ) {
|
||||
if ( headerNotWritten ) {
|
||||
writeHeader(records);
|
||||
}
|
||||
while ( records.hasHeldRecords() ) {
|
||||
writer.addRecord(records.getHeldRecord());
|
||||
prevReduce++;
|
||||
}
|
||||
|
||||
writer.addRecord(records.getMergedRecord());
|
||||
prevReduce++;
|
||||
}
|
||||
}
|
||||
|
||||
return prevReduce;
|
||||
}
|
||||
|
||||
public void onTraversalDone(long finalReduce) {
|
||||
return;
|
||||
|
||||
public void onTraversalDone(Long variantsAdded) {
|
||||
logger.info(variantsAdded);
|
||||
}
|
||||
|
||||
private VCFRecordMerger instantiateMerger(VCFRecord priority, VCFRecord other) {
|
||||
List<String> samples = Arrays.asList(priority.getSampleNames());
|
||||
samples.retainAll(Arrays.asList(other.getSampleNames()));
|
||||
|
||||
return new VCFRecordMerger(priority,other,useQuality,true,samples,merger.getAllHeldRecords());
|
||||
}
|
||||
|
||||
private void writeHeader(VCFRecordMerger records) {
|
||||
VCFHeader header = new VCFHeader(new HashSet<VCFHeaderLine>(),records.getSamples());
|
||||
writer.writeHeader(header);
|
||||
headerNotWritten = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class VCFRecordPair {
|
||||
public VCFRecord rec1;
|
||||
public VCFRecord rec2;
|
||||
class VCFRecordMerger {
|
||||
private List<VCFRecord> holder;
|
||||
private VCFGenotypeMerger genotypeMerger;
|
||||
private VCFRecord record1;
|
||||
private VCFRecord record2;
|
||||
private boolean isMergeByQual;
|
||||
private boolean record1Priority;
|
||||
private Collection<String> samples;
|
||||
|
||||
public VCFRecordMerger(VCFRecord firstRecord, VCFRecord secondRecord, boolean useQual, boolean record1Priority, Collection<String> intersectedSamples, List<VCFRecord> heldRecords) {
|
||||
this.record1 = firstRecord;
|
||||
this.record2 = secondRecord;
|
||||
this.isMergeByQual = useQual;
|
||||
this.record1Priority = record1Priority;
|
||||
this.samples = intersectedSamples;
|
||||
instantiateGenotypeMerger();
|
||||
holder = heldRecords;
|
||||
}
|
||||
|
||||
public Set<String> getSamples() {
|
||||
Set<String> sam = new HashSet<String>();
|
||||
for ( String s : samples ) {
|
||||
sam.add(s);
|
||||
}
|
||||
return sam;
|
||||
}
|
||||
|
||||
public VCFRecordMerger() {
|
||||
holder = new ArrayList<VCFRecord>();
|
||||
}
|
||||
|
||||
public List<VCFRecord> getAllHeldRecords() {
|
||||
return holder;
|
||||
}
|
||||
|
||||
public void update(VCFRecord firstRecord, VCFRecord secondRecord) {
|
||||
record1 = firstRecord;
|
||||
record2 = secondRecord;
|
||||
}
|
||||
|
||||
public void hold(VCFRecord record) {
|
||||
holder.add(record);
|
||||
}
|
||||
|
||||
public boolean hasHeldRecords() {
|
||||
return holder.size() > 0;
|
||||
}
|
||||
|
||||
public VCFRecord getHeldRecord() {
|
||||
if ( samples == null ) {
|
||||
throw new IllegalStateException("Held records are being accessed before intersection sample set have been created");
|
||||
}
|
||||
|
||||
return downsample(holder.remove(0));
|
||||
}
|
||||
|
||||
private void instantiateGenotypeMerger() {
|
||||
if ( record1 != null && record2 != null ) {
|
||||
ArrayList<String> keyset1 = new ArrayList<String>(Arrays.asList(record1.getGenotypeFormatString().split(":")));
|
||||
ArrayList<String> keyset2 = new ArrayList<String>(Arrays.asList(record2.getGenotypeFormatString().split(":")));
|
||||
keyset2.retainAll(keyset1);
|
||||
genotypeMerger = new VCFGenotypeMerger(keyset2);
|
||||
}
|
||||
}
|
||||
|
||||
public VCFRecord getMergedRecord() {
|
||||
if ( record2 == null ) {
|
||||
return downsample(record1);
|
||||
} else if ( record1 == null ) {
|
||||
return downsample(record2);
|
||||
} else {
|
||||
HashMap<VCFHeader.HEADER_FIELDS,String> newFields = getFields(record1,record2);
|
||||
List<VCFGenotypeRecord> newGenotypes = mergeGenotypes(record1,record2);
|
||||
return new VCFRecord(newFields,genotypeMerger.getFormatString(),newGenotypes);
|
||||
}
|
||||
}
|
||||
|
||||
private VCFRecord downsample(VCFRecord record) {
|
||||
HashMap<VCFHeader.HEADER_FIELDS,String> newFields = getFields(record,null);
|
||||
List<VCFGenotypeRecord> newGenotypes = new ArrayList<VCFGenotypeRecord>();
|
||||
for ( String s : samples ) {
|
||||
newGenotypes.add(record.getGenotype(s) );
|
||||
}
|
||||
|
||||
return new VCFRecord(newFields,record.getGenotypeFormatString(),newGenotypes);
|
||||
}
|
||||
|
||||
private List<VCFGenotypeRecord> mergeGenotypes(VCFRecord first, VCFRecord second) {
|
||||
List<VCFGenotypeRecord> genotypes = new ArrayList<VCFGenotypeRecord>();
|
||||
for ( String s : samples ) {
|
||||
if ( isMergeByQual ) {
|
||||
genotypes.add(genotypeMerger.mergeByQual(first.getGenotype(s), second.getGenotype(s)));
|
||||
} else if ( record1Priority ) {
|
||||
genotypes.add(genotypeMerger.resetKeys(first.getGenotype(s)));
|
||||
} else {
|
||||
genotypes.add(genotypeMerger.resetKeys(second.getGenotype(s)));
|
||||
}
|
||||
}
|
||||
|
||||
return genotypes;
|
||||
}
|
||||
|
||||
private HashMap<VCFHeader.HEADER_FIELDS,String> getFields(VCFRecord record1, VCFRecord record2) {
|
||||
HashMap<VCFHeader.HEADER_FIELDS,String> fields = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
fields.put(VCFHeader.HEADER_FIELDS.CHROM, record1.getLocation().getContig());
|
||||
fields.put(VCFHeader.HEADER_FIELDS.POS, String.format("%d",record1.getLocation().getStart()));
|
||||
fields.put(VCFHeader.HEADER_FIELDS.ID, record1.getID());
|
||||
fields.put(VCFHeader.HEADER_FIELDS.ALT, listAsString(record1.getAlternateAlleleList()));
|
||||
fields.put(VCFHeader.HEADER_FIELDS.REF, record1.getReference());
|
||||
fields.put(VCFHeader.HEADER_FIELDS.QUAL, String.format("%f", record1.getQual()));
|
||||
if ( record2 != null && record1.isFiltered() && record2.isFiltered() ) {
|
||||
fields.put(VCFHeader.HEADER_FIELDS.FILTER,"BOTH_FILTERED");
|
||||
} else if ( record1.isFiltered() || ( record2 != null && record2.isFiltered() ) ) {
|
||||
fields.put(VCFHeader.HEADER_FIELDS.FILTER,"ONE_FILTERED");
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
private String listAsString(List<String> lString) {
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append(lString.remove(0));
|
||||
for ( String s : lString ) {
|
||||
b.append(",");
|
||||
b.append(s);
|
||||
}
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
public boolean hasBeenInstantiated() {
|
||||
return genotypeMerger != null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class VCFGenotypeMerger {
|
||||
private List<String> intersectKeys;
|
||||
private String intersectFormatString;
|
||||
|
||||
public VCFGenotypeMerger(List<String> keys) {
|
||||
intersectKeys = keys;
|
||||
intersectFormatString = keyJoin(keys);
|
||||
}
|
||||
|
||||
public String getFormatString() {
|
||||
return intersectFormatString;
|
||||
}
|
||||
|
||||
public VCFGenotypeRecord mergeByQual(VCFGenotypeRecord record1, VCFGenotypeRecord record2) {
|
||||
VCFGenotypeRecord toReturn;
|
||||
if ( record2 == null || record2.isNoCall() || record2.isFiltered() || record1.getNegLog10PError() > record2.getNegLog10PError() ) {
|
||||
toReturn = record1;
|
||||
} else {
|
||||
toReturn = record2;
|
||||
}
|
||||
|
||||
resetKeys(toReturn);
|
||||
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public VCFGenotypeRecord resetKeys(VCFGenotypeRecord r) {
|
||||
|
||||
HashMap<String,String> newFields = new HashMap<String,String>();
|
||||
|
||||
for ( String key : intersectKeys ) {
|
||||
newFields.put(key,r.getFields().get(key));
|
||||
}
|
||||
|
||||
r.replaceFields(newFields);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
private String keyJoin(List<String> sList) {
|
||||
StringBuffer format = new StringBuffer(sList.get(0));
|
||||
for ( int i = 1; i < sList.size(); i ++ ) {
|
||||
format.append(":");
|
||||
format.append(sList.get(i));
|
||||
}
|
||||
|
||||
return format.toString();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -30,7 +30,7 @@ public class VCFCombine extends RodWalker<VCFRecord, VCFWriter> {
|
|||
@Argument(fullName="combination_type", shortName="type", doc="combination type; currently UNION and MERGE are supported", required=true)
|
||||
protected COMBINATION_TYPE COMBO_TYPE;
|
||||
|
||||
@Argument(fullName="rod_priority_list", shortName="priority", doc="For the UNION combination type: a comma-separated string describing the priority ordering for the rods as far as which record gets emitted; if not specified, then the first rod seen will be used", required=false)
|
||||
@Argument(fullName="rod_priority_list", shortName="priority", doc="For the UNION combination type: a comma-separated string describing the priority ordering for the rods as far as which record gets emitted; a complete priority list MUST be provided", required=false)
|
||||
protected String PRIORITY_STRING = null;
|
||||
|
||||
@Argument(fullName="annotateUnion", shortName="A", doc="For the UNION combination type: if provided, the output union VCF will contain venn information about where each call came from", required=false)
|
||||
|
|
|
|||
|
|
@ -307,4 +307,11 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
|
|||
//result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality"));
|
||||
return result;
|
||||
}
|
||||
|
||||
public void replaceFields(HashMap<String,String> newFields) {
|
||||
mFields.clear();
|
||||
for ( String s : newFields.keySet() ) {
|
||||
mFields.put(s,newFields.get(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -329,7 +329,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
}
|
||||
|
||||
public boolean isInDBSNP() {
|
||||
return ( mID != null || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) );
|
||||
return ( ( mID != null && ! mID.equals(".") ) || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) );
|
||||
}
|
||||
|
||||
public boolean isInHapmap() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue