Deleting/archiving as instructed
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3779 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7e7da75d27
commit
6442dabf94
|
|
@ -1,343 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers.vcftools;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This walker intersects the samples for two VCF files and returns a set of calls determined
|
||||
* by command-line arguments. Intersect indicates that calls should be returned only where
|
||||
* the same locus appears in the two VCF files, while Union indicates that all calls should be
|
||||
* returned. Priority indicates which file to prefer when setting genotypes; if the "GQ" flag
|
||||
* is omitted, genotypes are merged according to highest genotype quality.
|
||||
*
|
||||
* @Author chartl
|
||||
* @Date Jan 29, 2010
|
||||
*/
|
||||
public class SimpleVCFIntersectWalker extends RodWalker<VCFRecordMerger,Long>{
|
||||
@Argument(fullName="locusUnion", shortName="union", doc="Will union loci rather than intersecting", required=false)
|
||||
boolean union = false;
|
||||
@Argument(fullName="mergeByQual", shortName="GQ", doc="Will merge genotypes according to quality rather than priority", required=false)
|
||||
boolean useQuality = false;
|
||||
|
||||
private VCFRecordMerger merger; // no cost of repeated instantiation
|
||||
private VCFWriter writer;
|
||||
private boolean headerNotWritten;
|
||||
|
||||
public void initialize() {
|
||||
merger = new VCFRecordMerger();
|
||||
writer = new VCFWriter(out);
|
||||
headerNotWritten = true;
|
||||
}
|
||||
|
||||
public Long reduceInit() {
|
||||
return 0l;
|
||||
}
|
||||
|
||||
public VCFRecordMerger map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
VCFRecord priorityCall = tracker.lookup("priority",VCFRecord.class);
|
||||
VCFRecord otherCall = tracker.lookup("other",VCFRecord.class);
|
||||
|
||||
if ( priorityCall == null && otherCall == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if ( priorityCall == null ) {
|
||||
|
||||
if ( ! union ) {
|
||||
return null;
|
||||
} else if ( merger.hasBeenInstantiated() ) {
|
||||
merger.update(null,otherCall);
|
||||
} else {
|
||||
merger.hold(otherCall);
|
||||
}
|
||||
|
||||
} else if ( otherCall == null ) {
|
||||
|
||||
if ( ! union ) {
|
||||
return null;
|
||||
} else if ( merger.hasBeenInstantiated() ) {
|
||||
merger.update(priorityCall,null);
|
||||
} else {
|
||||
merger.hold(priorityCall);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
if ( merger.hasBeenInstantiated() ) {
|
||||
merger.update(priorityCall, otherCall);
|
||||
} else {
|
||||
merger = instantiateMerger(priorityCall,otherCall);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return merger;
|
||||
}
|
||||
|
||||
public Long reduce(VCFRecordMerger records, Long prevReduce) {
|
||||
if ( records == null ) {
|
||||
return prevReduce;
|
||||
} else {
|
||||
if ( records.hasBeenInstantiated() ) {
|
||||
if ( headerNotWritten ) {
|
||||
writeHeader(records);
|
||||
}
|
||||
while ( records.hasHeldRecords() ) {
|
||||
writer.addRecord(records.getHeldRecord());
|
||||
prevReduce++;
|
||||
}
|
||||
|
||||
writer.addRecord(records.getMergedRecord());
|
||||
prevReduce++;
|
||||
}
|
||||
}
|
||||
|
||||
return prevReduce;
|
||||
}
|
||||
|
||||
|
||||
public void onTraversalDone(Long variantsAdded) {
|
||||
logger.info(variantsAdded);
|
||||
}
|
||||
|
||||
private VCFRecordMerger instantiateMerger(VCFRecord priority, VCFRecord other) {
|
||||
List<String> samples = Arrays.asList(priority.getSampleNames());
|
||||
samples.retainAll(Arrays.asList(other.getSampleNames()));
|
||||
|
||||
return new VCFRecordMerger(priority,other,useQuality,true,samples,merger.getAllHeldRecords());
|
||||
}
|
||||
|
||||
private void writeHeader(VCFRecordMerger records) {
|
||||
VCFHeader header = new VCFHeader(new HashSet<VCFHeaderLine>(),records.getSamples());
|
||||
writer.writeHeader(header);
|
||||
headerNotWritten = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class VCFRecordMerger {
|
||||
private List<VCFRecord> holder;
|
||||
private VCFGenotypeMerger genotypeMerger;
|
||||
private VCFRecord record1;
|
||||
private VCFRecord record2;
|
||||
private boolean isMergeByQual;
|
||||
private boolean record1Priority;
|
||||
private Collection<String> samples;
|
||||
|
||||
public VCFRecordMerger(VCFRecord firstRecord, VCFRecord secondRecord, boolean useQual, boolean record1Priority, Collection<String> intersectedSamples, List<VCFRecord> heldRecords) {
|
||||
this.record1 = firstRecord;
|
||||
this.record2 = secondRecord;
|
||||
this.isMergeByQual = useQual;
|
||||
this.record1Priority = record1Priority;
|
||||
this.samples = intersectedSamples;
|
||||
instantiateGenotypeMerger();
|
||||
holder = heldRecords;
|
||||
}
|
||||
|
||||
public Set<String> getSamples() {
|
||||
Set<String> sam = new HashSet<String>();
|
||||
for ( String s : samples ) {
|
||||
sam.add(s);
|
||||
}
|
||||
return sam;
|
||||
}
|
||||
|
||||
public VCFRecordMerger() {
|
||||
holder = new ArrayList<VCFRecord>();
|
||||
}
|
||||
|
||||
public List<VCFRecord> getAllHeldRecords() {
|
||||
return holder;
|
||||
}
|
||||
|
||||
public void update(VCFRecord firstRecord, VCFRecord secondRecord) {
|
||||
record1 = firstRecord;
|
||||
record2 = secondRecord;
|
||||
}
|
||||
|
||||
public void hold(VCFRecord record) {
|
||||
holder.add(record);
|
||||
}
|
||||
|
||||
public boolean hasHeldRecords() {
|
||||
return holder.size() > 0;
|
||||
}
|
||||
|
||||
public VCFRecord getHeldRecord() {
|
||||
if ( samples == null ) {
|
||||
throw new IllegalStateException("Held records are being accessed before intersection sample set have been created");
|
||||
}
|
||||
|
||||
return downsample(holder.remove(0));
|
||||
}
|
||||
|
||||
private void instantiateGenotypeMerger() {
|
||||
if ( record1 != null && record2 != null ) {
|
||||
ArrayList<String> keyset1 = new ArrayList<String>(Arrays.asList(record1.getGenotypeFormatString().split(":")));
|
||||
ArrayList<String> keyset2 = new ArrayList<String>(Arrays.asList(record2.getGenotypeFormatString().split(":")));
|
||||
keyset2.retainAll(keyset1);
|
||||
genotypeMerger = new VCFGenotypeMerger(keyset2);
|
||||
}
|
||||
}
|
||||
|
||||
public VCFRecord getMergedRecord() {
|
||||
if ( record2 == null ) {
|
||||
return downsample(record1);
|
||||
} else if ( record1 == null ) {
|
||||
return downsample(record2);
|
||||
} else {
|
||||
HashMap<VCFHeader.HEADER_FIELDS,String> newFields = getFields(record1,record2);
|
||||
List<VCFGenotypeRecord> newGenotypes = mergeGenotypes(record1,record2);
|
||||
return new VCFRecord(newFields,genotypeMerger.getFormatString(),newGenotypes);
|
||||
}
|
||||
}
|
||||
|
||||
private VCFRecord downsample(VCFRecord record) {
|
||||
HashMap<VCFHeader.HEADER_FIELDS,String> newFields = getFields(record,null);
|
||||
List<VCFGenotypeRecord> newGenotypes = new ArrayList<VCFGenotypeRecord>();
|
||||
for ( String s : samples ) {
|
||||
newGenotypes.add(record.getGenotype(s) );
|
||||
}
|
||||
|
||||
return new VCFRecord(newFields,record.getGenotypeFormatString(),newGenotypes);
|
||||
}
|
||||
|
||||
private List<VCFGenotypeRecord> mergeGenotypes(VCFRecord first, VCFRecord second) {
|
||||
List<VCFGenotypeRecord> genotypes = new ArrayList<VCFGenotypeRecord>();
|
||||
for ( String s : samples ) {
|
||||
if ( isMergeByQual ) {
|
||||
genotypes.add(genotypeMerger.mergeByQual(first.getGenotype(s), second.getGenotype(s)));
|
||||
} else if ( record1Priority ) {
|
||||
genotypes.add(genotypeMerger.resetKeys(first.getGenotype(s)));
|
||||
} else {
|
||||
genotypes.add(genotypeMerger.resetKeys(second.getGenotype(s)));
|
||||
}
|
||||
}
|
||||
|
||||
return genotypes;
|
||||
}
|
||||
|
||||
private HashMap<VCFHeader.HEADER_FIELDS,String> getFields(VCFRecord record1, VCFRecord record2) {
|
||||
HashMap<VCFHeader.HEADER_FIELDS,String> fields = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
fields.put(VCFHeader.HEADER_FIELDS.CHROM, record1.getChr());
|
||||
fields.put(VCFHeader.HEADER_FIELDS.POS, String.format("%d",record1.getStart()));
|
||||
fields.put(VCFHeader.HEADER_FIELDS.ID, record1.getID());
|
||||
fields.put(VCFHeader.HEADER_FIELDS.ALT, listAsString(record1.getAlternateAlleleList()));
|
||||
fields.put(VCFHeader.HEADER_FIELDS.REF, record1.getReference());
|
||||
fields.put(VCFHeader.HEADER_FIELDS.QUAL, String.format("%f", record1.getQual()));
|
||||
if ( record2 != null && record1.isFiltered() && record2.isFiltered() ) {
|
||||
fields.put(VCFHeader.HEADER_FIELDS.FILTER,"BOTH_FILTERED");
|
||||
} else if ( record1.isFiltered() || ( record2 != null && record2.isFiltered() ) ) {
|
||||
fields.put(VCFHeader.HEADER_FIELDS.FILTER,"ONE_FILTERED");
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
private String listAsString(List<String> lString) {
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append(lString.remove(0));
|
||||
for ( String s : lString ) {
|
||||
b.append(",");
|
||||
b.append(s);
|
||||
}
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
public boolean hasBeenInstantiated() {
|
||||
return genotypeMerger != null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class VCFGenotypeMerger {
|
||||
private List<String> intersectKeys;
|
||||
private String intersectFormatString;
|
||||
|
||||
public VCFGenotypeMerger(List<String> keys) {
|
||||
intersectKeys = keys;
|
||||
intersectFormatString = keyJoin(keys);
|
||||
}
|
||||
|
||||
public String getFormatString() {
|
||||
return intersectFormatString;
|
||||
}
|
||||
|
||||
public VCFGenotypeRecord mergeByQual(VCFGenotypeRecord record1, VCFGenotypeRecord record2) {
|
||||
VCFGenotypeRecord toReturn;
|
||||
if ( record2 == null || record2.isNoCall() || record2.isFiltered() || record1.getNegLog10PError() > record2.getNegLog10PError() ) {
|
||||
toReturn = record1;
|
||||
} else {
|
||||
toReturn = record2;
|
||||
}
|
||||
|
||||
resetKeys(toReturn);
|
||||
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public VCFGenotypeRecord resetKeys(VCFGenotypeRecord r) {
|
||||
|
||||
HashMap<String,String> newFields = new HashMap<String,String>();
|
||||
|
||||
for ( String key : intersectKeys ) {
|
||||
newFields.put(key,r.getFields().get(key));
|
||||
}
|
||||
|
||||
r.replaceFields(newFields);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
private String keyJoin(List<String> sList) {
|
||||
StringBuffer format = new StringBuffer(sList.get(0));
|
||||
for ( int i = 1; i < sList.size(); i ++ ) {
|
||||
format.append(":");
|
||||
format.append(sList.get(i));
|
||||
}
|
||||
|
||||
return format.toString();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue