a fix for a bug Eric found: if your first call contains fewer samples than calls at other loci, your VCFHeader got setup incorrectly.
Also moved a buch of Lists over to Sets for consistancy. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1859 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a69ea9b57c
commit
96972c3a5c
|
|
@ -98,10 +98,12 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
|||
if ( VARIANTS_FILE != null )
|
||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
|
||||
"UnifiedGenotyper",
|
||||
this.getToolkit().getArguments().referenceFile.getName());
|
||||
this.getToolkit().getArguments().referenceFile.getName(),
|
||||
samples);
|
||||
else
|
||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
|
||||
this.getToolkit().getArguments().referenceFile.getName());
|
||||
this.getToolkit().getArguments().referenceFile.getName(),
|
||||
samples);
|
||||
callsMetrics = new CallMetrics();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
|
||||
public static VCFHeader getHeader(GATKArgumentCollection args, Set<String> sampleNames) {
|
||||
Map<String, String> metaData = new HashMap<String, String>();
|
||||
List<String> additionalColumns = new ArrayList<String>();
|
||||
Set<String> additionalColumns = new HashSet<String>();
|
||||
|
||||
// Don't output the data for now because it kills our unit test MD5s and is optional
|
||||
// TODO - figure out what to do here
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriterAdapter;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -31,7 +33,12 @@ public class GenotypeWriterFactory {
|
|||
* @param destination the destination file
|
||||
* @return the genotype writer object
|
||||
*/
|
||||
public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination, String source, String referenceName ) {
|
||||
public static GenotypeWriter create(GENOTYPE_FORMAT format,
|
||||
SAMFileHeader header,
|
||||
File destination,
|
||||
String source,
|
||||
String referenceName,
|
||||
Set<String> sampleNames ) {
|
||||
switch (format) {
|
||||
case GLF:
|
||||
return new GLFWriter(header.toString(), destination);
|
||||
|
|
@ -40,20 +47,25 @@ public class GenotypeWriterFactory {
|
|||
case GELI_BINARY:
|
||||
return new GeliAdapter(destination, header);
|
||||
case VCF:
|
||||
return new VCFGenotypeWriterAdapter(source, referenceName, destination);
|
||||
return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames);
|
||||
default:
|
||||
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, PrintStream destination, String source, String referenceName ) {
|
||||
public static GenotypeWriter create(GENOTYPE_FORMAT format,
|
||||
SAMFileHeader header,
|
||||
PrintStream destination,
|
||||
String source,
|
||||
String referenceName,
|
||||
Set<String> sampleNames ) {
|
||||
switch (format) {
|
||||
case GELI:
|
||||
return new GeliTextWriter(destination);
|
||||
case GLF:
|
||||
return new GLFWriter(header.toString(), destination);
|
||||
case VCF:
|
||||
return new VCFGenotypeWriterAdapter(source, referenceName, destination);
|
||||
return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames);
|
||||
default:
|
||||
throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,23 +21,26 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
private String mSource;
|
||||
private String mReferenceName;
|
||||
private boolean mInitialized = false;
|
||||
private final Set<String> mSampleNames = new HashSet<String>();
|
||||
private final File mFile;
|
||||
private final OutputStream mStream;
|
||||
|
||||
public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo) {
|
||||
public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo, Set<String> sampleNames) {
|
||||
mReferenceName = referenceName;
|
||||
mSource = source;
|
||||
mFile = writeTo;
|
||||
if (mFile == null) throw new RuntimeException("VCF output file must not be null");
|
||||
mStream = null;
|
||||
mSampleNames.addAll(sampleNames);
|
||||
}
|
||||
|
||||
public VCFGenotypeWriterAdapter(String source, String referenceName, OutputStream writeTo) {
|
||||
public VCFGenotypeWriterAdapter(String source, String referenceName, OutputStream writeTo, Set<String> sampleNames) {
|
||||
mReferenceName = referenceName;
|
||||
mSource = source;
|
||||
mFile = null;
|
||||
mStream = writeTo;
|
||||
if (mStream == null) throw new RuntimeException("VCF output stream must not be null");
|
||||
mSampleNames.addAll(sampleNames);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -49,7 +52,6 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
*/
|
||||
private void lazyInitialize(List<Genotype> genotypes, File file, OutputStream stream) {
|
||||
Map<String, String> hInfo = new HashMap<String, String>();
|
||||
List<String> sampleNames = getSampleNames(genotypes);
|
||||
|
||||
// setup the header fields
|
||||
hInfo.put("format", "VCRv3.2");
|
||||
|
|
@ -57,7 +59,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
hInfo.put("reference", mReferenceName);
|
||||
|
||||
// setup the sample names
|
||||
mHeader = new VCFHeader(hInfo, sampleNames);
|
||||
mHeader = new VCFHeader(hInfo, mSampleNames);
|
||||
if (mFile == null)
|
||||
mWriter = new VCFWriter(mHeader, stream);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ public class VCFHeader {
|
|||
private final Map<String, String> mMetaData = new HashMap<String, String>();
|
||||
|
||||
// the list of auxillary tags
|
||||
private final List<String> mGenotypeSampleNames = new ArrayList<String>();
|
||||
private final Set<String> mGenotypeSampleNames = new HashSet<String>();
|
||||
|
||||
// the character string that indicates meta data
|
||||
public static final String METADATA_INDICATOR = "##";
|
||||
|
|
@ -56,7 +56,7 @@ public class VCFHeader {
|
|||
* @param metaData the meta data associated with this header
|
||||
* @param genotypeSampleNames the genotype format field, and the sample names
|
||||
*/
|
||||
public VCFHeader(Map<String, String> metaData, List<String> genotypeSampleNames) {
|
||||
public VCFHeader(Map<String, String> metaData, Set<String> genotypeSampleNames) {
|
||||
for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key));
|
||||
for (String col : genotypeSampleNames) {
|
||||
if (!col.equals("FORMAT"))
|
||||
|
|
@ -107,7 +107,7 @@ public class VCFHeader {
|
|||
*
|
||||
* @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
|
||||
*/
|
||||
public List<String> getGenotypeSamples() {
|
||||
public Set<String> getGenotypeSamples() {
|
||||
return mGenotypeSampleNames;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
protected VCFHeader createHeader(List<String> headerStrings) {
|
||||
|
||||
Map<String, String> metaData = new HashMap<String, String>();
|
||||
List<String> auxTags = new ArrayList<String>();
|
||||
Set<String> auxTags = new HashSet<String>();
|
||||
// iterate over all the passed in strings
|
||||
for (String str : headerStrings) {
|
||||
Matcher matcher = pMeta.matcher(str);
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class VCFHeaderTest extends BaseTest {
|
|||
|
||||
private Set<VCFHeader.HEADER_FIELDS> headerFields = new LinkedHashSet<VCFHeader.HEADER_FIELDS>();
|
||||
private Map<String, String> metaData = new HashMap();
|
||||
private List<String> additionalColumns = new ArrayList<String>();
|
||||
private Set<String> additionalColumns = new HashSet<String>();
|
||||
|
||||
/**
|
||||
* give it fake data, and make sure we get back the right fake data
|
||||
|
|
|
|||
|
|
@ -4,10 +4,7 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -126,7 +123,7 @@ public class VCFRecordTest extends BaseTest {
|
|||
*/
|
||||
public static VCFHeader createFakeHeader() {
|
||||
Map<String, String> metaData = new HashMap();
|
||||
List<String> additionalColumns = new ArrayList<String>();
|
||||
Set<String> additionalColumns = new HashSet<String>();
|
||||
metaData.put("format", "VCRv3.2"); // required
|
||||
metaData.put("two", "2");
|
||||
additionalColumns.add("FORMAT");
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ import java.util.*;
|
|||
public class VCFWriterTest extends BaseTest {
|
||||
private Set<VCFHeader.HEADER_FIELDS> headerFields = new LinkedHashSet<VCFHeader.HEADER_FIELDS>();
|
||||
private Map<String, String> metaData = new HashMap();
|
||||
private List<String> additionalColumns = new ArrayList<String>();
|
||||
private Set<String> additionalColumns = new HashSet<String>();
|
||||
private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
|
||||
|
||||
/** test, using the writer and reader, that we can output and input a VCF file without problems */
|
||||
|
|
@ -45,7 +45,7 @@ public class VCFWriterTest extends BaseTest {
|
|||
* create a fake header of known quantity
|
||||
* @return a fake VCF header
|
||||
*/
|
||||
public static VCFHeader createFakeHeader(Map<String, String> metaData, List<String> additionalColumns) {
|
||||
public static VCFHeader createFakeHeader(Map<String, String> metaData, Set<String> additionalColumns) {
|
||||
metaData.put("format", "VCRv3.2"); // required
|
||||
metaData.put("two", "2");
|
||||
additionalColumns.add("FORMAT");
|
||||
|
|
|
|||
Loading…
Reference in New Issue