a) Fix bug when multi-base reference is homopolymeric when writing a VCF4.0 variant context: computation of number of trailing bases was incorrect and we ended up with incorrect position.
b) Updated VCF4WriterTestWalker to take either VCF3 or VCF4 as inputs (this walker can also be used to convert from 3.3 to 4.0). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3711 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
61e2b2e39b
commit
b6bdd61283
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.util.AsciiLineReader;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -74,6 +75,7 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
|
|||
final TreeSet<String> samples = new TreeSet<String>();
|
||||
VCF4Codec vcf4codec = new VCF4Codec();
|
||||
|
||||
|
||||
/**
|
||||
* Initialize the number of loci processed to zero.
|
||||
*
|
||||
|
|
@ -84,30 +86,43 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
|
|||
public void initialize() {
|
||||
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
||||
|
||||
// Open output file specified by output VCF ROD
|
||||
|
||||
vcfWriter = new VCFWriter(new File(OUTPUT_FILE), true);
|
||||
|
||||
// Open output file specified by output VCF ROD
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
|
||||
|
||||
vcfWriter = new VCFWriter(new File(OUTPUT_FILE), true);
|
||||
VCFHeader header = null;
|
||||
for( final ReferenceOrderedDataSource source : dataSources ) {
|
||||
final RMDTrack rod = source.getReferenceOrderedData();
|
||||
Object p = rod.getRecordType();
|
||||
if(rod.getType().equals(vcf4codec.getClass()) && rod.getName().equalsIgnoreCase(INPUT_ROD_NAME)) {
|
||||
try {
|
||||
AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath()));
|
||||
int lineNumber = vcf4codec.readHeader(lineReader);
|
||||
out.printf("Read %d header lines%n", lineNumber);
|
||||
if(rod.getName().equalsIgnoreCase(INPUT_ROD_NAME)) {
|
||||
if (rod.getType().equals(vcf4codec.getClass())) {
|
||||
|
||||
VCFHeader header = vcf4codec.getHeader(VCFHeader.class);
|
||||
vcfWriter.writeHeader(header);
|
||||
}
|
||||
catch (FileNotFoundException e ) {
|
||||
throw new StingException(e.getMessage());
|
||||
try {
|
||||
AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath()));
|
||||
int lineNumber = vcf4codec.readHeader(lineReader);
|
||||
out.printf("Read %d header lines%n", lineNumber);
|
||||
|
||||
header = vcf4codec.getHeader(VCFHeader.class);
|
||||
}
|
||||
catch (FileNotFoundException e ) {
|
||||
throw new StingException(e.getMessage());
|
||||
}
|
||||
} else {
|
||||
final VCFReader reader = new VCFReader(rod.getFile());
|
||||
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
|
||||
samples.addAll(vcfSamples);
|
||||
reader.close();
|
||||
header = new VCFHeader(hInfo, samples);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
header.setVersion(VCFHeaderVersion.VCF4_0);
|
||||
|
||||
vcfWriter.writeHeader(header);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -279,7 +279,7 @@ public class VCFWriter {
|
|||
// scan through all alleles and look for maximum common subsequence at beginning and at end for all alleles
|
||||
numTrailingBases = 1;
|
||||
} else
|
||||
numTrailingBases = originalRef.indexOf(refString);
|
||||
numTrailingBases = originalRef.lastIndexOf(refString);
|
||||
|
||||
numPaddingBases = originalRef.length()-refString.length()-numTrailingBases;
|
||||
if (numTrailingBases > 0) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue