diff --git a/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java b/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java deleted file mode 100644 index b4ad81c02..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.gcf; - -import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; - -import java.io.*; -import java.util.*; - -/** - * GATK binary VCF record - * - * @author Your Name - * @since Date created - */ -public class GCF { - private final static int RECORD_TERMINATOR = 123456789; - private int chromOffset; - private int start, stop; - private String id; - private List alleleMap; - private int alleleOffsets[]; - private float qual; - private byte refPad; - private String info; - private int filterOffset; - - private List genotypes = Collections.emptyList(); - - public GCF(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc, boolean skipGenotypes) { - chromOffset = GCFHeaderBuilder.encodeString(vc.getChr()); - start = vc.getStart(); - stop = vc.getEnd(); - refPad = vc.hasReferenceBaseForIndel() ? vc.getReferenceBaseForIndel() : 0; - id = vc.getID(); - - // encode alleles - alleleMap = new ArrayList(vc.getNAlleles()); - alleleOffsets = new int[vc.getNAlleles()]; - alleleMap.add(vc.getReference()); - alleleOffsets[0] = GCFHeaderBuilder.encodeAllele(vc.getReference()); - for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { - alleleMap.add(vc.getAlternateAllele(i)); - alleleOffsets[i+1] = GCFHeaderBuilder.encodeAllele(vc.getAlternateAllele(i)); - } - - qual = (float)vc.getLog10PError(); //qualToByte(vc.getPhredScaledQual()); - info = infoFieldString(vc, GCFHeaderBuilder); - filterOffset = GCFHeaderBuilder.encodeString(StandardVCFWriter.getFilterString(vc)); - - if ( ! skipGenotypes ) { - genotypes = encodeGenotypes(GCFHeaderBuilder, vc); - } - } - - public GCF(DataInputStream inputStream, boolean skipGenotypes) throws IOException, EOFException { - chromOffset = inputStream.readInt(); - - // have we reached the footer? - if ( chromOffset == GCFHeader.FOOTER_START_MARKER ) - throw new EOFException(); - - start = inputStream.readInt(); - stop = inputStream.readInt(); - id = inputStream.readUTF(); - refPad = inputStream.readByte(); - alleleOffsets = readIntArray(inputStream); - qual = inputStream.readFloat(); - info = inputStream.readUTF(); - filterOffset = inputStream.readInt(); - - int nGenotypes = inputStream.readInt(); - int sizeOfGenotypes = inputStream.readInt(); - if ( skipGenotypes ) { - genotypes = Collections.emptyList(); - inputStream.skipBytes(sizeOfGenotypes); - } else { - genotypes = new ArrayList(nGenotypes); - for ( int i = 0; i < nGenotypes; i++ ) - genotypes.add(new GCFGenotype(this, inputStream)); - } - - int recordDone = inputStream.readInt(); - if ( recordDone != RECORD_TERMINATOR ) - throw new UserException.MalformedFile("Record not terminated by RECORD_TERMINATOR key"); - } - - public int write(DataOutputStream outputStream) throws IOException { - int startSize = outputStream.size(); - outputStream.writeInt(chromOffset); - outputStream.writeInt(start); - outputStream.writeInt(stop); - outputStream.writeUTF(id); - outputStream.writeByte(refPad); - writeIntArray(alleleOffsets, outputStream, true); - outputStream.writeFloat(qual); - outputStream.writeUTF(info); - outputStream.writeInt(filterOffset); - - int nGenotypes = genotypes.size(); - int expectedSizeOfGenotypes = nGenotypes == 0 ? 0 : genotypes.get(0).sizeInBytes() * nGenotypes; - outputStream.writeInt(nGenotypes); - outputStream.writeInt(expectedSizeOfGenotypes); - int obsSizeOfGenotypes = 0; - for ( GCFGenotype g : genotypes ) - obsSizeOfGenotypes += g.write(outputStream); - if ( obsSizeOfGenotypes != expectedSizeOfGenotypes ) - throw new RuntimeException("Expect and observed genotype sizes disagree! expect = " + expectedSizeOfGenotypes + " obs =" + obsSizeOfGenotypes); - - outputStream.writeInt(RECORD_TERMINATOR); - return outputStream.size() - startSize; - } - - public VariantContext decode(final String source, final GCFHeader header) { - final String contig = header.getString(chromOffset); - alleleMap = header.getAlleles(alleleOffsets); - - VariantContextBuilder builder = new VariantContextBuilder(source, contig, start, stop, alleleMap); - builder.genotypes(decodeGenotypes(header)); - builder.log10PError(qual); - builder.filters(header.getFilters(filterOffset)); - builder.attribute("INFO", info); - builder.referenceBaseForIndel(refPad == 0 ? null : refPad); - return builder.make(); - } - - private GenotypesContext decodeGenotypes(final GCFHeader header) { - if ( genotypes.isEmpty() ) - return VariantContext.NO_GENOTYPES; - else { - GenotypesContext map = GenotypesContext.create(genotypes.size()); - - for ( int i = 0; i < genotypes.size(); i++ ) { - final String sampleName = header.getSample(i); - final Genotype g = genotypes.get(i).decode(sampleName, header, this, alleleMap); - map.add(g); - } - - return map; - } - } - - private List encodeGenotypes(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc) { - int nGenotypes = vc.getNSamples(); - if ( nGenotypes > 0 ) { - List genotypes = new ArrayList(nGenotypes); - for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null); - - for ( Genotype g : vc.getGenotypes() ) { - int i = GCFHeaderBuilder.encodeSample(g.getSampleName()); - genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g)); - } - - return genotypes; - } else { - return Collections.emptyList(); - } - } - - public int getNAlleles() { return alleleOffsets.length; } - - - private final String infoFieldString(VariantContext vc, final GCFHeaderBuilder GCFHeaderBuilder) { - StringBuilder s = new StringBuilder(); - - boolean first = true; - for ( Map.Entry field : vc.getAttributes().entrySet() ) { - String key = field.getKey(); - int stringIndex = GCFHeaderBuilder.encodeString(key); - String outputValue = StandardVCFWriter.formatVCFField(field.getValue()); - if ( outputValue != null ) { - if ( ! first ) s.append(";"); - s.append(stringIndex).append("=").append(outputValue); - first = false; - } - } - - return s.toString(); - } - - protected final static int BUFFER_SIZE = 1048576; // 2**20 - - public static DataInputStream createDataInputStream(final InputStream stream) { - return new DataInputStream(new BufferedInputStream(stream, BUFFER_SIZE)); - } - - public static FileInputStream createFileInputStream(final File file) throws FileNotFoundException { - return new FileInputStream(file); - } - - protected final static int[] readIntArray(final DataInputStream inputStream) throws IOException { - return readIntArray(inputStream, inputStream.readInt()); - } - - protected final static int[] readIntArray(final DataInputStream inputStream, int size) throws IOException { - int[] array = new int[size]; - for ( int i = 0; i < array.length; i++ ) - array[i] = inputStream.readInt(); - return array; - } - - protected final static void writeIntArray(int[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException { - if ( writeSize ) outputStream.writeInt(array.length); - for ( int i : array ) - outputStream.writeInt(i); - } - - protected final static byte[] readByteArray(final DataInputStream inputStream) throws IOException { - return readByteArray(inputStream, inputStream.readInt()); - } - - protected final static byte[] readByteArray(final DataInputStream inputStream, int size) throws IOException { - byte[] array = new byte[size]; - for ( int i = 0; i < array.length; i++ ) - array[i] = inputStream.readByte(); - return array; - } - - protected final static void writeByteArray(byte[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException { - if ( writeSize ) outputStream.writeInt(array.length); - for ( byte i : array ) - outputStream.writeByte(i); - } - - protected final static byte qualToByte(double phredScaledQual) { - return (byte)Math.round(Math.min(phredScaledQual, 255)); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFGenotype.java b/public/java/src/org/broadinstitute/sting/utils/gcf/GCFGenotype.java deleted file mode 100644 index f8fdd9291..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFGenotype.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.gcf; - -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; - -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.*; - -/** - * GATK binary VCF record - * - * @author Your Name - * @since Date created - */ -public class GCFGenotype { - private byte gq; - private int gt; - private int dp; - private int ad[]; - private byte[] pl; - - // todo -- what to do about phasing? Perhaps we shouldn't support it - // todo -- is the FL field generic or just a flag? Should we even support per sample filtering? - - public GCFGenotype(final GCFHeaderBuilder GCFHeaderBuilder, final List allAlleles, Genotype genotype) { - gq = GCF.qualToByte(genotype.getPhredScaledQual()); - gt = encodeAlleles(genotype.getAlleles(), allAlleles); - - dp = genotype.getAttributeAsInt("DP", 0); - - int nAlleles = allAlleles.size(); - ad = new int[nAlleles]; - - int npls = nAllelesToNPls(nAlleles); - pl = new byte[npls]; - } - - private int nAllelesToNPls( int nAlleles ) { - return nAlleles*(nAlleles+1) / 2; - } - - public GCFGenotype(GCF GCF, DataInputStream inputStream) throws IOException { - int gqInt = inputStream.readUnsignedByte(); - gq = (byte)gqInt; - gt = inputStream.readInt(); - dp = inputStream.readInt(); - ad = GCF.readIntArray(inputStream, GCF.getNAlleles()); - pl = GCF.readByteArray(inputStream, nAllelesToNPls(GCF.getNAlleles())); - } - - // 2 alleles => 1 + 8 + 8 + 3 => 20 - protected int sizeInBytes() { - return 1 // gq - + 4 * 2 // gt + dp - + 4 * ad.length // ad - + 1 * pl.length; // pl - } - - public Genotype decode(final String sampleName, final GCFHeader header, GCF GCF, List alleleIndex) { - final List alleles = decodeAlleles(gt, alleleIndex); - final double log10PError = gq / -10.0; - final Set filters = Collections.emptySet(); - final Map attributes = new HashMap(); - attributes.put("DP", dp); - attributes.put("AD", ad); - attributes.put("PL", pl); - - return new Genotype(sampleName, alleles, log10PError, filters, attributes, false); - } - - private static int encodeAlleles(List gtList, List allAlleles) { - final int nAlleles = gtList.size(); - if ( nAlleles > 4 ) - throw new IllegalArgumentException("encodeAlleles doesn't support more than 4 alt alleles, but I saw " + gtList); - - int gtInt = 0; - for ( int i = 0; i < nAlleles ; i++ ) { - final int bitOffset = i * 8; - final int allelei = getAlleleIndex(gtList.get(i), allAlleles); - final int gti = (allelei + 1) << bitOffset; - gtInt = gtInt | gti; - } - - return gtInt; - } - - private static int getAlleleIndex(Allele q, List allAlleles) { - if ( q.isNoCall() ) - return 254; - for ( int i = 0; i < allAlleles.size(); i++ ) - if ( q.equals(allAlleles.get(i)) ) - return i; - throw new IllegalStateException("getAlleleIndex passed allele not in map! allele " + q + " allAlleles " + allAlleles); - } - - private static List decodeAlleles(int gtInt, List alleleIndex) { - List alleles = new ArrayList(4); - - for ( int i = 0; i < 32; i += 8 ) { - final int gi = (gtInt & (0x000000FF << i)) >> i; - if ( gi != 0 ) { - final int allelei = gi - 1; - alleles.add( allelei == 254 ? Allele.NO_CALL : alleleIndex.get(allelei) ); - } else { - break; - } - } - - return alleles; - } - - public int write(DataOutputStream outputStream) throws IOException { - int startSize = outputStream.size(); - outputStream.writeByte(gq); - outputStream.writeInt(gt); - outputStream.writeInt(dp); - GCF.writeIntArray(ad, outputStream, false); - GCF.writeByteArray(pl, outputStream, false); - return outputStream.size() - startSize; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/gcf/GCFHeader.java deleted file mode 100644 index 6d96eda56..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFHeader.java +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.gcf; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; - -import java.io.*; -import java.util.*; - -/** - * [Short one sentence description of this walker] - *

- *

- * [Functionality of this walker] - *

- *

- *

Input

- *

- * [Input description] - *

- *

- *

Output

- *

- * [Output description] - *

- *

- *

Examples

- *
- *    java
- *      -jar GenomeAnalysisTK.jar
- *      -T $WalkerName
- *  
- * - * @author Your Name - * @since Date created - */ -public class GCFHeader { - final protected static Logger logger = Logger.getLogger(GCFHeader.class); - - public final static int GCF_VERSION = 1; - public final static byte[] GCF_FILE_START_MARKER = "GCF\1".getBytes(); - public final static int FOOTER_START_MARKER = -1; - public final static long HEADER_FORWARD_REFERENCE_OFFSET = GCF_FILE_START_MARKER.length + 4; // for the version - - final int version; - long footerPosition; - final List alleles; - final List strings; - final List samples; - final List> filters; - - public GCFHeader(final Map allelesIn, final Map stringIn, final Map samplesIn) { - version = GCF_VERSION; - footerPosition = 0; - this.alleles = linearize(allelesIn); - this.strings = linearize(stringIn); - this.samples = linearize(samplesIn); - this.filters = null; // not used with this constructor - } - - public GCFHeader(FileInputStream fileInputStream) throws IOException { - DataInputStream inputStream = new DataInputStream(fileInputStream); - byte[] headerTest = new byte[GCF_FILE_START_MARKER.length]; - inputStream.read(headerTest); - if ( ! Arrays.equals(headerTest, GCF_FILE_START_MARKER) ) { - throw new UserException("Could not read GVCF file. GCF_FILE_START_MARKER missing. Saw " + new String(headerTest)); - } else { - version = inputStream.readInt(); - logger.info("Read GCF version " + version); - footerPosition = inputStream.readLong(); - logger.info("Read footer position of " + footerPosition); - long lastPos = fileInputStream.getChannel().position(); - logger.info(" Last position is " + lastPos); - - // seek to the footer - fileInputStream.getChannel().position(footerPosition); - if ( inputStream.readInt() != FOOTER_START_MARKER ) - throw new UserException.MalformedFile("Malformed GCF file: couldn't find the footer marker"); - alleles = stringsToAlleles(readStrings(inputStream)); - strings = readStrings(inputStream); - samples = readStrings(inputStream); - logger.info(String.format("Allele map of %d elements", alleles.size())); - logger.info(String.format("String map of %d elements", strings.size())); - logger.info(String.format("Sample map of %d elements", samples.size())); - filters = initializeFilterCache(); - fileInputStream.getChannel().position(lastPos); - } - } - - public static int writeHeader(final DataOutputStream outputStream) throws IOException { - int startBytes = outputStream.size(); - outputStream.write(GCF_FILE_START_MARKER); - outputStream.writeInt(GCF_VERSION); - outputStream.writeLong(0); - return outputStream.size() - startBytes; - } - - public int writeFooter(final DataOutputStream outputStream) throws IOException { - int startBytes = outputStream.size(); - outputStream.writeInt(FOOTER_START_MARKER); // has to be the same as chrom encoding - write(outputStream, allelesToStrings(alleles)); - write(outputStream, strings); - write(outputStream, samples); - return outputStream.size() - startBytes; - } - - private void write(DataOutputStream outputStream, List l) throws IOException { - outputStream.writeInt(l.size()); - for ( String elt : l ) outputStream.writeUTF(elt); - } - - private List allelesToStrings(List alleles) { - List strings = new ArrayList(alleles.size()); - for ( Allele allele : alleles ) strings.add(allele.toString()); - return strings; - } - - private List> initializeFilterCache() { - // required to allow offset -> set lookup - List> l = new ArrayList>(strings.size()); - for ( int i = 0; i < strings.size(); i++ ) l.add(null); - return l; - } - - private static List stringsToAlleles(final List strings) { - final List alleles = new ArrayList(strings.size()); - for ( String string : strings ) { - boolean isRef = string.endsWith("*"); - if ( isRef ) string = string.substring(0, string.length() - 1); - alleles.add(Allele.create(string, isRef)); - } - return alleles; - } - - private static List readStrings(final DataInputStream inputStream) throws IOException { - final int nStrings = inputStream.readInt(); - - final List strings = new ArrayList(nStrings); - for ( int i = 0; i < nStrings; i++ ) { - strings.add(inputStream.readUTF()); - } - - return strings; - } - - private static List linearize(final Map map) { - final ArrayList l = new ArrayList(map.size()); - for ( int i = 0; i < map.size(); i++ ) l.add(null); - for ( final Map.Entry elt : map.entrySet() ) - l.set(elt.getValue(), elt.getKey()); - return l; - } - - public String getSample(final int offset) { return samples.get(offset); } - public String getString(final int offset) { return strings.get(offset); } - public Allele getAllele(final int offset) { return alleles.get(offset); } - public List getAlleles(final int[] offsets) { - final List alleles = new ArrayList(offsets.length); - for ( int i : offsets ) alleles.add(getAllele(i)); - return alleles; - } - - public Set getFilters(final int offset) { - Set cached = filters.get(offset); - - if ( cached != null ) - return cached; - else { - final String filterString = getString(offset); - if ( filterString.equals(VCFConstants.UNFILTERED) ) - return null; // UNFILTERED records are represented by null - else { - Set set = VCFCodec.parseFilters(null, -1, filterString); - filters.set(offset, set); // remember the result - return set; - } - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFHeaderBuilder.java b/public/java/src/org/broadinstitute/sting/utils/gcf/GCFHeaderBuilder.java deleted file mode 100644 index 40e01ec72..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFHeaderBuilder.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.gcf; - -import org.broadinstitute.sting.utils.variantcontext.Allele; - -import java.util.HashMap; -import java.util.Map; - -/** - * [Short one sentence description of this walker] - *

- *

- * [Functionality of this walker] - *

- *

- *

Input

- *

- * [Input description] - *

- *

- *

Output

- *

- * [Output description] - *

- *

- *

Examples

- *
- *    java
- *      -jar GenomeAnalysisTK.jar
- *      -T $WalkerName
- *  
- * - * @author Your Name - * @since Date created - */ -public class GCFHeaderBuilder { - Map alleles = new HashMap(); - Map strings = new HashMap(); - Map samples = new HashMap(); - - public GCFHeader createHeader() { - return new GCFHeader(alleles, strings, samples); - } - - public int encodeString(final String chr) { return encode(strings, chr); } - public int encodeAllele(final Allele allele) { return encode(alleles, allele); } - public int encodeSample(final String sampleName) { return encode(samples, sampleName); } - - private int encode(Map map, T key) { - Integer v = map.get(key); - if ( v == null ) { - v = map.size(); - map.put(key, v); - } - return v; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/gcf/GCFWriter.java deleted file mode 100644 index 18fae18c4..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFWriter.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.gcf; - -import net.sf.samtools.SAMSequenceDictionary; -import org.broadinstitute.sting.utils.codecs.vcf.IndexingVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.io.*; - -/** - * GCFWriter implementing the VCFWriter interface - * @author Your Name - * @since Date created - */ -public class GCFWriter extends IndexingVCFWriter { - final boolean skipGenotypes; - final FileOutputStream fileOutputStream; - final DataOutputStream dataOutputStream; - final GCFHeaderBuilder gcfHeaderBuilder; - int nbytes = 0; - VCFHeader header = null; - File location; - - // -------------------------------------------------------------------------------- - // - // Constructors - // - // -------------------------------------------------------------------------------- - - public GCFWriter(final File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) { - super(writerName(location, null), location, null, refDict, enableOnTheFlyIndexing); - this.location = location; - this.skipGenotypes = doNotWriteGenotypes; - - // write the output - try { - fileOutputStream = new FileOutputStream(location); - dataOutputStream = createDataOutputStream(fileOutputStream); - gcfHeaderBuilder = new GCFHeaderBuilder(); - } catch ( FileNotFoundException e ) { - throw new UserException.CouldNotCreateOutputFile(location, e); - } - } - - // -------------------------------------------------------------------------------- - // - // VCFWriter interface functions - // - // -------------------------------------------------------------------------------- - - @Override - public void writeHeader(VCFHeader header) { - this.header = header; - try { - nbytes += GCFHeader.writeHeader(dataOutputStream); - } catch ( IOException e ) { - throw new UserException.CouldNotCreateOutputFile(getStreamName(), "Couldn't write header", e); - } - } - - @Override - public void add(VariantContext vc) { - super.add(vc); - GCF gcf = new GCF(gcfHeaderBuilder, vc, skipGenotypes); - try { - nbytes += gcf.write(dataOutputStream); - } catch ( IOException e ) { - throw new UserException.CouldNotCreateOutputFile(getStreamName(), "Failed to add gcf record " + gcf + " to stream " + getStreamName(), e); - } - } - - @Override - public void close() { - // todo -- write out VCF header lines - GCFHeader gcfHeader = gcfHeaderBuilder.createHeader(); - try { - long headerPosition = nbytes; - nbytes += gcfHeader.writeFooter(dataOutputStream); - dataOutputStream.close(); - //System.out.println("Writing forward reference to " + headerPosition); - - RandomAccessFile raFile = new RandomAccessFile(location, "rw"); - raFile.seek(GCFHeader.HEADER_FORWARD_REFERENCE_OFFSET); - raFile.writeLong(headerPosition); - raFile.close(); - } catch ( IOException e ) { - throw new ReviewedStingException("Failed to close GCFWriter " + getStreamName(), e); - } - - super.close(); - } - - private static final DataOutputStream createDataOutputStream(final OutputStream stream) { - return new DataOutputStream(new BufferedOutputStream(stream, GCF.BUFFER_SIZE)); - } - -}