Renaming GVCF -> GCF
This commit is contained in:
parent
b220ed0d75
commit
01b6177ce1
|
|
@ -22,12 +22,9 @@
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.gvcf;
|
package org.broadinstitute.sting.utils.gcf;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
|
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
|
@ -42,7 +39,7 @@ import java.util.*;
|
||||||
* @author Your Name
|
* @author Your Name
|
||||||
* @since Date created
|
* @since Date created
|
||||||
*/
|
*/
|
||||||
public class GVCF {
|
public class GCF {
|
||||||
private final static int RECORD_TERMINATOR = 123456789;
|
private final static int RECORD_TERMINATOR = 123456789;
|
||||||
private int chromOffset;
|
private int chromOffset;
|
||||||
private int start, stop;
|
private int start, stop;
|
||||||
|
|
@ -54,10 +51,10 @@ public class GVCF {
|
||||||
private String info;
|
private String info;
|
||||||
private int filterOffset;
|
private int filterOffset;
|
||||||
|
|
||||||
private List<GVCFGenotype> genotypes = Collections.emptyList();
|
private List<GCFGenotype> genotypes = Collections.emptyList();
|
||||||
|
|
||||||
public GVCF(final GVCFHeaderBuilder gvcfHeaderBuilder, final VariantContext vc, boolean skipGenotypes) {
|
public GCF(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc, boolean skipGenotypes) {
|
||||||
chromOffset = gvcfHeaderBuilder.encodeString(vc.getChr());
|
chromOffset = GCFHeaderBuilder.encodeString(vc.getChr());
|
||||||
start = vc.getStart();
|
start = vc.getStart();
|
||||||
stop = vc.getEnd();
|
stop = vc.getEnd();
|
||||||
refPad = vc.hasReferenceBaseForIndel() ? vc.getReferenceBaseForIndel() : 0;
|
refPad = vc.hasReferenceBaseForIndel() ? vc.getReferenceBaseForIndel() : 0;
|
||||||
|
|
@ -67,22 +64,22 @@ public class GVCF {
|
||||||
alleleMap = new ArrayList<Allele>(vc.getNAlleles());
|
alleleMap = new ArrayList<Allele>(vc.getNAlleles());
|
||||||
alleleOffsets = new int[vc.getNAlleles()];
|
alleleOffsets = new int[vc.getNAlleles()];
|
||||||
alleleMap.add(vc.getReference());
|
alleleMap.add(vc.getReference());
|
||||||
alleleOffsets[0] = gvcfHeaderBuilder.encodeAllele(vc.getReference());
|
alleleOffsets[0] = GCFHeaderBuilder.encodeAllele(vc.getReference());
|
||||||
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||||
alleleMap.add(vc.getAlternateAllele(i));
|
alleleMap.add(vc.getAlternateAllele(i));
|
||||||
alleleOffsets[i+1] = gvcfHeaderBuilder.encodeAllele(vc.getAlternateAllele(i));
|
alleleOffsets[i+1] = GCFHeaderBuilder.encodeAllele(vc.getAlternateAllele(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
qual = (float)vc.getNegLog10PError(); //qualToByte(vc.getPhredScaledQual());
|
qual = (float)vc.getNegLog10PError(); //qualToByte(vc.getPhredScaledQual());
|
||||||
info = infoFieldString(vc, gvcfHeaderBuilder);
|
info = infoFieldString(vc, GCFHeaderBuilder);
|
||||||
filterOffset = gvcfHeaderBuilder.encodeString(StandardVCFWriter.getFilterString(vc));
|
filterOffset = GCFHeaderBuilder.encodeString(StandardVCFWriter.getFilterString(vc));
|
||||||
|
|
||||||
if ( ! skipGenotypes ) {
|
if ( ! skipGenotypes ) {
|
||||||
genotypes = encodeGenotypes(gvcfHeaderBuilder, vc);
|
genotypes = encodeGenotypes(GCFHeaderBuilder, vc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public GVCF(DataInputStream inputStream, boolean skipGenotypes) throws IOException {
|
public GCF(DataInputStream inputStream, boolean skipGenotypes) throws IOException {
|
||||||
chromOffset = inputStream.readInt();
|
chromOffset = inputStream.readInt();
|
||||||
start = inputStream.readInt();
|
start = inputStream.readInt();
|
||||||
stop = inputStream.readInt();
|
stop = inputStream.readInt();
|
||||||
|
|
@ -99,9 +96,9 @@ public class GVCF {
|
||||||
genotypes = Collections.emptyList();
|
genotypes = Collections.emptyList();
|
||||||
inputStream.skipBytes(sizeOfGenotypes);
|
inputStream.skipBytes(sizeOfGenotypes);
|
||||||
} else {
|
} else {
|
||||||
genotypes = new ArrayList<GVCFGenotype>(nGenotypes);
|
genotypes = new ArrayList<GCFGenotype>(nGenotypes);
|
||||||
for ( int i = 0; i < nGenotypes; i++ )
|
for ( int i = 0; i < nGenotypes; i++ )
|
||||||
genotypes.add(new GVCFGenotype(this, inputStream));
|
genotypes.add(new GCFGenotype(this, inputStream));
|
||||||
}
|
}
|
||||||
|
|
||||||
int recordDone = inputStream.readInt();
|
int recordDone = inputStream.readInt();
|
||||||
|
|
@ -109,7 +106,7 @@ public class GVCF {
|
||||||
throw new UserException.MalformedFile("Record not terminated by RECORD_TERMINATOR key");
|
throw new UserException.MalformedFile("Record not terminated by RECORD_TERMINATOR key");
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContext decode(final String source, final GVCFHeader header) {
|
public VariantContext decode(final String source, final GCFHeader header) {
|
||||||
final String contig = header.getString(chromOffset);
|
final String contig = header.getString(chromOffset);
|
||||||
alleleMap = header.getAlleles(alleleOffsets);
|
alleleMap = header.getAlleles(alleleOffsets);
|
||||||
double negLog10PError = qual; // QualityUtils.qualToErrorProb(qual);
|
double negLog10PError = qual; // QualityUtils.qualToErrorProb(qual);
|
||||||
|
|
@ -122,7 +119,7 @@ public class GVCF {
|
||||||
return new VariantContext(source, contig, start, stop, alleleMap, genotypes, negLog10PError, filters, attributes, refPadByte);
|
return new VariantContext(source, contig, start, stop, alleleMap, genotypes, negLog10PError, filters, attributes, refPadByte);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, Genotype> decodeGenotypes(final GVCFHeader header) {
|
private Map<String, Genotype> decodeGenotypes(final GCFHeader header) {
|
||||||
if ( genotypes.isEmpty() )
|
if ( genotypes.isEmpty() )
|
||||||
return VariantContext.NO_GENOTYPES;
|
return VariantContext.NO_GENOTYPES;
|
||||||
else {
|
else {
|
||||||
|
|
@ -138,15 +135,15 @@ public class GVCF {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<GVCFGenotype> encodeGenotypes(final GVCFHeaderBuilder gvcfHeaderBuilder, final VariantContext vc) {
|
private List<GCFGenotype> encodeGenotypes(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc) {
|
||||||
int nGenotypes = vc.getNSamples();
|
int nGenotypes = vc.getNSamples();
|
||||||
if ( nGenotypes > 0 ) {
|
if ( nGenotypes > 0 ) {
|
||||||
List<GVCFGenotype> genotypes = new ArrayList<GVCFGenotype>(nGenotypes);
|
List<GCFGenotype> genotypes = new ArrayList<GCFGenotype>(nGenotypes);
|
||||||
for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null);
|
for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null);
|
||||||
|
|
||||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||||
int i = gvcfHeaderBuilder.encodeSample(g.getSampleName());
|
int i = GCFHeaderBuilder.encodeSample(g.getSampleName());
|
||||||
genotypes.set(i, new GVCFGenotype(gvcfHeaderBuilder, alleleMap, g));
|
genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g));
|
||||||
}
|
}
|
||||||
|
|
||||||
return genotypes;
|
return genotypes;
|
||||||
|
|
@ -174,7 +171,7 @@ public class GVCF {
|
||||||
outputStream.writeInt(nGenotypes);
|
outputStream.writeInt(nGenotypes);
|
||||||
outputStream.writeInt(expectedSizeOfGenotypes);
|
outputStream.writeInt(expectedSizeOfGenotypes);
|
||||||
int obsSizeOfGenotypes = 0;
|
int obsSizeOfGenotypes = 0;
|
||||||
for ( GVCFGenotype g : genotypes )
|
for ( GCFGenotype g : genotypes )
|
||||||
obsSizeOfGenotypes += g.write(outputStream);
|
obsSizeOfGenotypes += g.write(outputStream);
|
||||||
if ( obsSizeOfGenotypes != expectedSizeOfGenotypes )
|
if ( obsSizeOfGenotypes != expectedSizeOfGenotypes )
|
||||||
throw new RuntimeException("Expect and observed genotype sizes disagree! expect = " + expectedSizeOfGenotypes + " obs =" + obsSizeOfGenotypes);
|
throw new RuntimeException("Expect and observed genotype sizes disagree! expect = " + expectedSizeOfGenotypes + " obs =" + obsSizeOfGenotypes);
|
||||||
|
|
@ -183,7 +180,7 @@ public class GVCF {
|
||||||
return outputStream.size() - startSize;
|
return outputStream.size() - startSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final String infoFieldString(VariantContext vc, final GVCFHeaderBuilder gvcfHeaderBuilder) {
|
private final String infoFieldString(VariantContext vc, final GCFHeaderBuilder GCFHeaderBuilder) {
|
||||||
StringBuilder s = new StringBuilder();
|
StringBuilder s = new StringBuilder();
|
||||||
|
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
|
|
@ -191,7 +188,7 @@ public class GVCF {
|
||||||
String key = field.getKey();
|
String key = field.getKey();
|
||||||
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
|
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
|
||||||
continue;
|
continue;
|
||||||
int stringIndex = gvcfHeaderBuilder.encodeString(key);
|
int stringIndex = GCFHeaderBuilder.encodeString(key);
|
||||||
String outputValue = StandardVCFWriter.formatVCFField(field.getValue());
|
String outputValue = StandardVCFWriter.formatVCFField(field.getValue());
|
||||||
if ( outputValue != null ) {
|
if ( outputValue != null ) {
|
||||||
if ( ! first ) s.append(";");
|
if ( ! first ) s.append(";");
|
||||||
|
|
@ -22,7 +22,7 @@
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.gvcf;
|
package org.broadinstitute.sting.utils.gcf;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
|
@ -38,7 +38,7 @@ import java.util.*;
|
||||||
* @author Your Name
|
* @author Your Name
|
||||||
* @since Date created
|
* @since Date created
|
||||||
*/
|
*/
|
||||||
public class GVCFGenotype {
|
public class GCFGenotype {
|
||||||
private byte gq;
|
private byte gq;
|
||||||
private int gt;
|
private int gt;
|
||||||
private int dp;
|
private int dp;
|
||||||
|
|
@ -48,8 +48,8 @@ public class GVCFGenotype {
|
||||||
// todo -- what to do about phasing? Perhaps we shouldn't support it
|
// todo -- what to do about phasing? Perhaps we shouldn't support it
|
||||||
// todo -- is the FL field generic or just a flag? Should we even support per sample filtering?
|
// todo -- is the FL field generic or just a flag? Should we even support per sample filtering?
|
||||||
|
|
||||||
public GVCFGenotype(final GVCFHeaderBuilder gvcfHeaderBuilder, final List<Allele> allAlleles, Genotype genotype) {
|
public GCFGenotype(final GCFHeaderBuilder GCFHeaderBuilder, final List<Allele> allAlleles, Genotype genotype) {
|
||||||
gq = GVCF.qualToByte(genotype.getPhredScaledQual());
|
gq = GCF.qualToByte(genotype.getPhredScaledQual());
|
||||||
gt = encodeAlleles(genotype.getAlleles(), allAlleles);
|
gt = encodeAlleles(genotype.getAlleles(), allAlleles);
|
||||||
|
|
||||||
dp = genotype.getAttributeAsInt("DP", 0);
|
dp = genotype.getAttributeAsInt("DP", 0);
|
||||||
|
|
@ -65,13 +65,13 @@ public class GVCFGenotype {
|
||||||
return nAlleles*(nAlleles+1) / 2;
|
return nAlleles*(nAlleles+1) / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
public GVCFGenotype(GVCF gvcf, DataInputStream inputStream) throws IOException {
|
public GCFGenotype(GCF GCF, DataInputStream inputStream) throws IOException {
|
||||||
int gqInt = inputStream.readUnsignedByte();
|
int gqInt = inputStream.readUnsignedByte();
|
||||||
gq = (byte)gqInt;
|
gq = (byte)gqInt;
|
||||||
gt = inputStream.readInt();
|
gt = inputStream.readInt();
|
||||||
dp = inputStream.readInt();
|
dp = inputStream.readInt();
|
||||||
ad = GVCF.readIntArray(inputStream, gvcf.getNAlleles());
|
ad = GCF.readIntArray(inputStream, GCF.getNAlleles());
|
||||||
pl = GVCF.readByteArray(inputStream, nAllelesToNPls(gvcf.getNAlleles()));
|
pl = GCF.readByteArray(inputStream, nAllelesToNPls(GCF.getNAlleles()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2 alleles => 1 + 8 + 8 + 3 => 20
|
// 2 alleles => 1 + 8 + 8 + 3 => 20
|
||||||
|
|
@ -82,7 +82,7 @@ public class GVCFGenotype {
|
||||||
+ 1 * pl.length; // pl
|
+ 1 * pl.length; // pl
|
||||||
}
|
}
|
||||||
|
|
||||||
public Genotype decode(final String sampleName, final GVCFHeader header, GVCF gvcf, List<Allele> alleleIndex) {
|
public Genotype decode(final String sampleName, final GCFHeader header, GCF GCF, List<Allele> alleleIndex) {
|
||||||
final List<Allele> alleles = decodeAlleles(gt, alleleIndex);
|
final List<Allele> alleles = decodeAlleles(gt, alleleIndex);
|
||||||
final double negLog10PError = gq / 10.0;
|
final double negLog10PError = gq / 10.0;
|
||||||
final Set<String> filters = Collections.emptySet();
|
final Set<String> filters = Collections.emptySet();
|
||||||
|
|
@ -140,8 +140,8 @@ public class GVCFGenotype {
|
||||||
outputStream.writeByte(gq);
|
outputStream.writeByte(gq);
|
||||||
outputStream.writeInt(gt);
|
outputStream.writeInt(gt);
|
||||||
outputStream.writeInt(dp);
|
outputStream.writeInt(dp);
|
||||||
GVCF.writeIntArray(ad, outputStream, false);
|
GCF.writeIntArray(ad, outputStream, false);
|
||||||
GVCF.writeByteArray(pl, outputStream, false);
|
GCF.writeByteArray(pl, outputStream, false);
|
||||||
return outputStream.size() - startSize;
|
return outputStream.size() - startSize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -22,11 +22,9 @@
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.gvcf;
|
package org.broadinstitute.sting.utils.gcf;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -64,8 +62,8 @@ import java.util.*;
|
||||||
* @author Your Name
|
* @author Your Name
|
||||||
* @since Date created
|
* @since Date created
|
||||||
*/
|
*/
|
||||||
public class GVCFHeader {
|
public class GCFHeader {
|
||||||
final protected static Logger logger = Logger.getLogger(GVCFHeader.class);
|
final protected static Logger logger = Logger.getLogger(GCFHeader.class);
|
||||||
|
|
||||||
private static byte[] MAGIC_HEADER = "GVCF0.1\1".getBytes();
|
private static byte[] MAGIC_HEADER = "GVCF0.1\1".getBytes();
|
||||||
final List<Allele> alleles;
|
final List<Allele> alleles;
|
||||||
|
|
@ -73,14 +71,14 @@ public class GVCFHeader {
|
||||||
final List<String> samples;
|
final List<String> samples;
|
||||||
final List<Set<String>> filters;
|
final List<Set<String>> filters;
|
||||||
|
|
||||||
public GVCFHeader(final Map<Allele, Integer> allelesIn, final Map<String, Integer> stringIn, final Map<String, Integer> samplesIn) {
|
public GCFHeader(final Map<Allele, Integer> allelesIn, final Map<String, Integer> stringIn, final Map<String, Integer> samplesIn) {
|
||||||
this.alleles = linearize(allelesIn);
|
this.alleles = linearize(allelesIn);
|
||||||
this.strings = linearize(stringIn);
|
this.strings = linearize(stringIn);
|
||||||
this.samples = linearize(samplesIn);
|
this.samples = linearize(samplesIn);
|
||||||
this.filters = null; // not used with this constructor
|
this.filters = null; // not used with this constructor
|
||||||
}
|
}
|
||||||
|
|
||||||
public GVCFHeader(DataInputStream inputStream) throws IOException {
|
public GCFHeader(DataInputStream inputStream) throws IOException {
|
||||||
byte[] headerTest = new byte[MAGIC_HEADER.length];
|
byte[] headerTest = new byte[MAGIC_HEADER.length];
|
||||||
inputStream.read(headerTest);
|
inputStream.read(headerTest);
|
||||||
if ( ! Arrays.equals(headerTest, MAGIC_HEADER) ) {
|
if ( ! Arrays.equals(headerTest, MAGIC_HEADER) ) {
|
||||||
|
|
@ -22,7 +22,7 @@
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.gvcf;
|
package org.broadinstitute.sting.utils.gcf;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
|
|
@ -56,13 +56,13 @@ import java.util.Map;
|
||||||
* @author Your Name
|
* @author Your Name
|
||||||
* @since Date created
|
* @since Date created
|
||||||
*/
|
*/
|
||||||
public class GVCFHeaderBuilder {
|
public class GCFHeaderBuilder {
|
||||||
Map<Allele, Integer> alleles = new HashMap<Allele, Integer>();
|
Map<Allele, Integer> alleles = new HashMap<Allele, Integer>();
|
||||||
Map<String, Integer> strings = new HashMap<String, Integer>();
|
Map<String, Integer> strings = new HashMap<String, Integer>();
|
||||||
Map<String, Integer> samples = new HashMap<String, Integer>();
|
Map<String, Integer> samples = new HashMap<String, Integer>();
|
||||||
|
|
||||||
public GVCFHeader createHeader() {
|
public GCFHeader createHeader() {
|
||||||
return new GVCFHeader(alleles, strings, samples);
|
return new GCFHeader(alleles, strings, samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int encodeString(final String chr) { return encode(strings, chr); }
|
public int encodeString(final String chr) { return encode(strings, chr); }
|
||||||
Loading…
Reference in New Issue