Cleanup of old VCFRecord code
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3883 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7dd55fbf13
commit
1d9ed1e214
|
|
@ -30,9 +30,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -70,64 +67,8 @@ public class VCFUtils {
|
|||
return fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges various vcf records into a single one using the mapping from rodNamesToSampleNames to get unique sample names
|
||||
*
|
||||
* @param rods the vcf rods
|
||||
* @param rodNamesToSampleNames mapping of rod/sample pairs to new uniquified sample names
|
||||
* @return the new merged vcf record
|
||||
*/
|
||||
public static VCFRecord mergeRecords(Map<VCFRecord,String> rods, Map<Pair<String, String>, String> rodNamesToSampleNames) {
|
||||
|
||||
VCFParameters params = new VCFParameters();
|
||||
params.addFormatItem(VCFConstants.GENOTYPE_KEY);
|
||||
|
||||
// keep track of the data so we can merge them intelligently
|
||||
double maxConfidence = 0.0;
|
||||
String id = null;
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
List<String> filters = new ArrayList<String>();
|
||||
|
||||
for ( VCFRecord rod : rods.keySet() ) {
|
||||
List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords();
|
||||
for ( VCFGenotypeRecord call : myGenotypes ) {
|
||||
// set the name to be the new uniquified name and add it to the list of genotypes
|
||||
call.setSampleName(rodNamesToSampleNames.get(new Pair<String, String>(rods.get(rod), call.getSampleName())));
|
||||
if ( params.getPosition() < 1 )
|
||||
params.setLocations(GenomeLocParser.createGenomeLoc(rod.getChr(), rod.getStart()), call.getReference());
|
||||
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod));
|
||||
}
|
||||
|
||||
// set the overall confidence to be the max entry we see
|
||||
double confidence = 10.0 * rod.getNegLog10PError();
|
||||
if ( confidence > maxConfidence )
|
||||
maxConfidence = confidence;
|
||||
|
||||
if ( rod.getID() != null )
|
||||
id = rod.getID();
|
||||
|
||||
if ( rod.isFiltered() )
|
||||
filters.add(rod.getFilterString());
|
||||
|
||||
// just take the last value we see for a given key
|
||||
infoFields.putAll(rod.getInfoValues());
|
||||
}
|
||||
|
||||
return new VCFRecord(params.getReferenceBases(),
|
||||
params.getContig(),
|
||||
params.getPosition(),
|
||||
(id != null ? id : "."),
|
||||
params.getAlternateBases(),
|
||||
maxConfidence,
|
||||
filters.size() == 0 ? "0" : Utils.join(";", filters),
|
||||
infoFields,
|
||||
params.getFormatString(),
|
||||
params.getGenotypeRecords());
|
||||
}
|
||||
|
||||
public static Set<VCFHeaderLine> smartMergeHeaders(Collection<VCFHeader> headers, Logger logger) throws IllegalStateException {
|
||||
HashMap<String, VCFHeaderLine> map = new HashMap<String, VCFHeaderLine>(); // from KEY.NAME -> line
|
||||
HashSet<VCFHeaderLine> lines = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// todo -- needs to remove all version headers from sources and add its own VCF version line
|
||||
for ( VCFHeader source : headers ) {
|
||||
|
|
@ -182,45 +123,4 @@ public class VCFUtils {
|
|||
|
||||
return new HashSet<VCFHeaderLine>(map.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* create the VCF genotype record
|
||||
*
|
||||
* @param params the VCF parameters object
|
||||
* @param gtype the genotype
|
||||
* @param vcfrecord the VCF record
|
||||
*
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
public static VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, VCFGenotypeRecord gtype, VCFRecord vcfrecord) {
|
||||
|
||||
List<VCFGenotypeEncoding> alleles = createAlleleArray(gtype);
|
||||
for (VCFGenotypeEncoding allele : alleles) {
|
||||
params.addAlternateBase(allele);
|
||||
}
|
||||
|
||||
VCFGenotypeRecord record = new VCFGenotypeRecord(gtype.getSampleName(), alleles, VCFGenotypeRecord.PHASE.UNPHASED);
|
||||
for ( Map.Entry<String, String> entry : gtype.getFields().entrySet() ) {
|
||||
record.setField(entry.getKey(), entry.getValue());
|
||||
params.addFormatItem(entry.getKey());
|
||||
}
|
||||
|
||||
record.setVCFRecord(vcfrecord);
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* create the allele array?
|
||||
*
|
||||
* @param gtype the gentoype object
|
||||
*
|
||||
* @return a list of string representing the string array of alleles
|
||||
*/
|
||||
private static List<VCFGenotypeEncoding> createAlleleArray(VCFGenotypeRecord gtype) {
|
||||
List<VCFGenotypeEncoding> alleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
for (char allele : gtype.getBases().toCharArray()) {
|
||||
alleles.add(new VCFGenotypeEncoding(String.valueOf(allele)));
|
||||
}
|
||||
return alleles;
|
||||
}
|
||||
}
|
||||
|
|
@ -101,26 +101,6 @@ public class VCFWriter {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* output a record to the VCF file
|
||||
*
|
||||
* @param record the record to output
|
||||
*/
|
||||
@Deprecated
|
||||
public void addRecord(VCFRecord record) {
|
||||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
||||
|
||||
String vcfString = record.toStringEncoding(mHeader);
|
||||
try {
|
||||
mWriter.write(vcfString + "\n");
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to write the VCF object to a file");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* attempt to close the VCF file
|
||||
*/
|
||||
|
|
|
|||
Loading…
Reference in New Issue