Renaming GVCF -> GCF

This commit is contained in:
Mark DePristo 2011-09-07 17:10:56 -04:00
parent b220ed0d75
commit 01b6177ce1
4 changed files with 41 additions and 46 deletions

View File

@ -22,12 +22,9 @@
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.utils.gvcf; package org.broadinstitute.sting.utils.gcf;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
@ -42,7 +39,7 @@ import java.util.*;
* @author Your Name * @author Your Name
* @since Date created * @since Date created
*/ */
public class GVCF { public class GCF {
private final static int RECORD_TERMINATOR = 123456789; private final static int RECORD_TERMINATOR = 123456789;
private int chromOffset; private int chromOffset;
private int start, stop; private int start, stop;
@ -54,10 +51,10 @@ public class GVCF {
private String info; private String info;
private int filterOffset; private int filterOffset;
private List<GVCFGenotype> genotypes = Collections.emptyList(); private List<GCFGenotype> genotypes = Collections.emptyList();
public GVCF(final GVCFHeaderBuilder gvcfHeaderBuilder, final VariantContext vc, boolean skipGenotypes) { public GCF(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc, boolean skipGenotypes) {
chromOffset = gvcfHeaderBuilder.encodeString(vc.getChr()); chromOffset = GCFHeaderBuilder.encodeString(vc.getChr());
start = vc.getStart(); start = vc.getStart();
stop = vc.getEnd(); stop = vc.getEnd();
refPad = vc.hasReferenceBaseForIndel() ? vc.getReferenceBaseForIndel() : 0; refPad = vc.hasReferenceBaseForIndel() ? vc.getReferenceBaseForIndel() : 0;
@ -67,22 +64,22 @@ public class GVCF {
alleleMap = new ArrayList<Allele>(vc.getNAlleles()); alleleMap = new ArrayList<Allele>(vc.getNAlleles());
alleleOffsets = new int[vc.getNAlleles()]; alleleOffsets = new int[vc.getNAlleles()];
alleleMap.add(vc.getReference()); alleleMap.add(vc.getReference());
alleleOffsets[0] = gvcfHeaderBuilder.encodeAllele(vc.getReference()); alleleOffsets[0] = GCFHeaderBuilder.encodeAllele(vc.getReference());
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
alleleMap.add(vc.getAlternateAllele(i)); alleleMap.add(vc.getAlternateAllele(i));
alleleOffsets[i+1] = gvcfHeaderBuilder.encodeAllele(vc.getAlternateAllele(i)); alleleOffsets[i+1] = GCFHeaderBuilder.encodeAllele(vc.getAlternateAllele(i));
} }
qual = (float)vc.getNegLog10PError(); //qualToByte(vc.getPhredScaledQual()); qual = (float)vc.getNegLog10PError(); //qualToByte(vc.getPhredScaledQual());
info = infoFieldString(vc, gvcfHeaderBuilder); info = infoFieldString(vc, GCFHeaderBuilder);
filterOffset = gvcfHeaderBuilder.encodeString(StandardVCFWriter.getFilterString(vc)); filterOffset = GCFHeaderBuilder.encodeString(StandardVCFWriter.getFilterString(vc));
if ( ! skipGenotypes ) { if ( ! skipGenotypes ) {
genotypes = encodeGenotypes(gvcfHeaderBuilder, vc); genotypes = encodeGenotypes(GCFHeaderBuilder, vc);
} }
} }
public GVCF(DataInputStream inputStream, boolean skipGenotypes) throws IOException { public GCF(DataInputStream inputStream, boolean skipGenotypes) throws IOException {
chromOffset = inputStream.readInt(); chromOffset = inputStream.readInt();
start = inputStream.readInt(); start = inputStream.readInt();
stop = inputStream.readInt(); stop = inputStream.readInt();
@ -99,9 +96,9 @@ public class GVCF {
genotypes = Collections.emptyList(); genotypes = Collections.emptyList();
inputStream.skipBytes(sizeOfGenotypes); inputStream.skipBytes(sizeOfGenotypes);
} else { } else {
genotypes = new ArrayList<GVCFGenotype>(nGenotypes); genotypes = new ArrayList<GCFGenotype>(nGenotypes);
for ( int i = 0; i < nGenotypes; i++ ) for ( int i = 0; i < nGenotypes; i++ )
genotypes.add(new GVCFGenotype(this, inputStream)); genotypes.add(new GCFGenotype(this, inputStream));
} }
int recordDone = inputStream.readInt(); int recordDone = inputStream.readInt();
@ -109,7 +106,7 @@ public class GVCF {
throw new UserException.MalformedFile("Record not terminated by RECORD_TERMINATOR key"); throw new UserException.MalformedFile("Record not terminated by RECORD_TERMINATOR key");
} }
public VariantContext decode(final String source, final GVCFHeader header) { public VariantContext decode(final String source, final GCFHeader header) {
final String contig = header.getString(chromOffset); final String contig = header.getString(chromOffset);
alleleMap = header.getAlleles(alleleOffsets); alleleMap = header.getAlleles(alleleOffsets);
double negLog10PError = qual; // QualityUtils.qualToErrorProb(qual); double negLog10PError = qual; // QualityUtils.qualToErrorProb(qual);
@ -122,7 +119,7 @@ public class GVCF {
return new VariantContext(source, contig, start, stop, alleleMap, genotypes, negLog10PError, filters, attributes, refPadByte); return new VariantContext(source, contig, start, stop, alleleMap, genotypes, negLog10PError, filters, attributes, refPadByte);
} }
private Map<String, Genotype> decodeGenotypes(final GVCFHeader header) { private Map<String, Genotype> decodeGenotypes(final GCFHeader header) {
if ( genotypes.isEmpty() ) if ( genotypes.isEmpty() )
return VariantContext.NO_GENOTYPES; return VariantContext.NO_GENOTYPES;
else { else {
@ -138,15 +135,15 @@ public class GVCF {
} }
} }
private List<GVCFGenotype> encodeGenotypes(final GVCFHeaderBuilder gvcfHeaderBuilder, final VariantContext vc) { private List<GCFGenotype> encodeGenotypes(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc) {
int nGenotypes = vc.getNSamples(); int nGenotypes = vc.getNSamples();
if ( nGenotypes > 0 ) { if ( nGenotypes > 0 ) {
List<GVCFGenotype> genotypes = new ArrayList<GVCFGenotype>(nGenotypes); List<GCFGenotype> genotypes = new ArrayList<GCFGenotype>(nGenotypes);
for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null); for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null);
for ( Genotype g : vc.getGenotypes().values() ) { for ( Genotype g : vc.getGenotypes().values() ) {
int i = gvcfHeaderBuilder.encodeSample(g.getSampleName()); int i = GCFHeaderBuilder.encodeSample(g.getSampleName());
genotypes.set(i, new GVCFGenotype(gvcfHeaderBuilder, alleleMap, g)); genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g));
} }
return genotypes; return genotypes;
@ -174,7 +171,7 @@ public class GVCF {
outputStream.writeInt(nGenotypes); outputStream.writeInt(nGenotypes);
outputStream.writeInt(expectedSizeOfGenotypes); outputStream.writeInt(expectedSizeOfGenotypes);
int obsSizeOfGenotypes = 0; int obsSizeOfGenotypes = 0;
for ( GVCFGenotype g : genotypes ) for ( GCFGenotype g : genotypes )
obsSizeOfGenotypes += g.write(outputStream); obsSizeOfGenotypes += g.write(outputStream);
if ( obsSizeOfGenotypes != expectedSizeOfGenotypes ) if ( obsSizeOfGenotypes != expectedSizeOfGenotypes )
throw new RuntimeException("Expect and observed genotype sizes disagree! expect = " + expectedSizeOfGenotypes + " obs =" + obsSizeOfGenotypes); throw new RuntimeException("Expect and observed genotype sizes disagree! expect = " + expectedSizeOfGenotypes + " obs =" + obsSizeOfGenotypes);
@ -183,7 +180,7 @@ public class GVCF {
return outputStream.size() - startSize; return outputStream.size() - startSize;
} }
private final String infoFieldString(VariantContext vc, final GVCFHeaderBuilder gvcfHeaderBuilder) { private final String infoFieldString(VariantContext vc, final GCFHeaderBuilder GCFHeaderBuilder) {
StringBuilder s = new StringBuilder(); StringBuilder s = new StringBuilder();
boolean first = true; boolean first = true;
@ -191,7 +188,7 @@ public class GVCF {
String key = field.getKey(); String key = field.getKey();
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) ) if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
continue; continue;
int stringIndex = gvcfHeaderBuilder.encodeString(key); int stringIndex = GCFHeaderBuilder.encodeString(key);
String outputValue = StandardVCFWriter.formatVCFField(field.getValue()); String outputValue = StandardVCFWriter.formatVCFField(field.getValue());
if ( outputValue != null ) { if ( outputValue != null ) {
if ( ! first ) s.append(";"); if ( ! first ) s.append(";");

View File

@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.utils.gvcf; package org.broadinstitute.sting.utils.gcf;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
@ -38,7 +38,7 @@ import java.util.*;
* @author Your Name * @author Your Name
* @since Date created * @since Date created
*/ */
public class GVCFGenotype { public class GCFGenotype {
private byte gq; private byte gq;
private int gt; private int gt;
private int dp; private int dp;
@ -48,8 +48,8 @@ public class GVCFGenotype {
// todo -- what to do about phasing? Perhaps we shouldn't support it // todo -- what to do about phasing? Perhaps we shouldn't support it
// todo -- is the FL field generic or just a flag? Should we even support per sample filtering? // todo -- is the FL field generic or just a flag? Should we even support per sample filtering?
public GVCFGenotype(final GVCFHeaderBuilder gvcfHeaderBuilder, final List<Allele> allAlleles, Genotype genotype) { public GCFGenotype(final GCFHeaderBuilder GCFHeaderBuilder, final List<Allele> allAlleles, Genotype genotype) {
gq = GVCF.qualToByte(genotype.getPhredScaledQual()); gq = GCF.qualToByte(genotype.getPhredScaledQual());
gt = encodeAlleles(genotype.getAlleles(), allAlleles); gt = encodeAlleles(genotype.getAlleles(), allAlleles);
dp = genotype.getAttributeAsInt("DP", 0); dp = genotype.getAttributeAsInt("DP", 0);
@ -65,13 +65,13 @@ public class GVCFGenotype {
return nAlleles*(nAlleles+1) / 2; return nAlleles*(nAlleles+1) / 2;
} }
public GVCFGenotype(GVCF gvcf, DataInputStream inputStream) throws IOException { public GCFGenotype(GCF GCF, DataInputStream inputStream) throws IOException {
int gqInt = inputStream.readUnsignedByte(); int gqInt = inputStream.readUnsignedByte();
gq = (byte)gqInt; gq = (byte)gqInt;
gt = inputStream.readInt(); gt = inputStream.readInt();
dp = inputStream.readInt(); dp = inputStream.readInt();
ad = GVCF.readIntArray(inputStream, gvcf.getNAlleles()); ad = GCF.readIntArray(inputStream, GCF.getNAlleles());
pl = GVCF.readByteArray(inputStream, nAllelesToNPls(gvcf.getNAlleles())); pl = GCF.readByteArray(inputStream, nAllelesToNPls(GCF.getNAlleles()));
} }
// 2 alleles => 1 + 8 + 8 + 3 => 20 // 2 alleles => 1 + 8 + 8 + 3 => 20
@ -82,7 +82,7 @@ public class GVCFGenotype {
+ 1 * pl.length; // pl + 1 * pl.length; // pl
} }
public Genotype decode(final String sampleName, final GVCFHeader header, GVCF gvcf, List<Allele> alleleIndex) { public Genotype decode(final String sampleName, final GCFHeader header, GCF GCF, List<Allele> alleleIndex) {
final List<Allele> alleles = decodeAlleles(gt, alleleIndex); final List<Allele> alleles = decodeAlleles(gt, alleleIndex);
final double negLog10PError = gq / 10.0; final double negLog10PError = gq / 10.0;
final Set<String> filters = Collections.emptySet(); final Set<String> filters = Collections.emptySet();
@ -140,8 +140,8 @@ public class GVCFGenotype {
outputStream.writeByte(gq); outputStream.writeByte(gq);
outputStream.writeInt(gt); outputStream.writeInt(gt);
outputStream.writeInt(dp); outputStream.writeInt(dp);
GVCF.writeIntArray(ad, outputStream, false); GCF.writeIntArray(ad, outputStream, false);
GVCF.writeByteArray(pl, outputStream, false); GCF.writeByteArray(pl, outputStream, false);
return outputStream.size() - startSize; return outputStream.size() - startSize;
} }
} }

View File

@ -22,11 +22,9 @@
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.utils.gvcf; package org.broadinstitute.sting.utils.gcf;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -64,8 +62,8 @@ import java.util.*;
* @author Your Name * @author Your Name
* @since Date created * @since Date created
*/ */
public class GVCFHeader { public class GCFHeader {
final protected static Logger logger = Logger.getLogger(GVCFHeader.class); final protected static Logger logger = Logger.getLogger(GCFHeader.class);
private static byte[] MAGIC_HEADER = "GVCF0.1\1".getBytes(); private static byte[] MAGIC_HEADER = "GVCF0.1\1".getBytes();
final List<Allele> alleles; final List<Allele> alleles;
@ -73,14 +71,14 @@ public class GVCFHeader {
final List<String> samples; final List<String> samples;
final List<Set<String>> filters; final List<Set<String>> filters;
public GVCFHeader(final Map<Allele, Integer> allelesIn, final Map<String, Integer> stringIn, final Map<String, Integer> samplesIn) { public GCFHeader(final Map<Allele, Integer> allelesIn, final Map<String, Integer> stringIn, final Map<String, Integer> samplesIn) {
this.alleles = linearize(allelesIn); this.alleles = linearize(allelesIn);
this.strings = linearize(stringIn); this.strings = linearize(stringIn);
this.samples = linearize(samplesIn); this.samples = linearize(samplesIn);
this.filters = null; // not used with this constructor this.filters = null; // not used with this constructor
} }
public GVCFHeader(DataInputStream inputStream) throws IOException { public GCFHeader(DataInputStream inputStream) throws IOException {
byte[] headerTest = new byte[MAGIC_HEADER.length]; byte[] headerTest = new byte[MAGIC_HEADER.length];
inputStream.read(headerTest); inputStream.read(headerTest);
if ( ! Arrays.equals(headerTest, MAGIC_HEADER) ) { if ( ! Arrays.equals(headerTest, MAGIC_HEADER) ) {

View File

@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.utils.gvcf; package org.broadinstitute.sting.utils.gcf;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -56,13 +56,13 @@ import java.util.Map;
* @author Your Name * @author Your Name
* @since Date created * @since Date created
*/ */
public class GVCFHeaderBuilder { public class GCFHeaderBuilder {
Map<Allele, Integer> alleles = new HashMap<Allele, Integer>(); Map<Allele, Integer> alleles = new HashMap<Allele, Integer>();
Map<String, Integer> strings = new HashMap<String, Integer>(); Map<String, Integer> strings = new HashMap<String, Integer>();
Map<String, Integer> samples = new HashMap<String, Integer>(); Map<String, Integer> samples = new HashMap<String, Integer>();
public GVCFHeader createHeader() { public GCFHeader createHeader() {
return new GVCFHeader(alleles, strings, samples); return new GCFHeader(alleles, strings, samples);
} }
public int encodeString(final String chr) { return encode(strings, chr); } public int encodeString(final String chr) { return encode(strings, chr); }