diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index a519f9db1..1505f029c 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.refdata; +import edu.mit.broad.picard.genotype.DiploidGenotype; +import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; @@ -46,7 +48,8 @@ public class VariantContextAdaptors { adaptors.put(PlinkRod.class, new PlinkRodAdaptor()); adaptors.put(HapMapROD.class, new HapMapAdaptor()); adaptors.put(RodGLF.class, new GLFAdaptor()); - adaptors.put(RodGeliText.class, new GeliAdaptor()); + adaptors.put(RodGeliText.class, new GeliTextAdaptor()); + adaptors.put(rodGELI.class, new GeliAdaptor()); } public static boolean canBeConvertedToVariantContext(Object variantContainingObject) { @@ -593,7 +596,7 @@ public class VariantContextAdaptors { // // -------------------------------------------------------------------------------------------------------------- - private static class GeliAdaptor extends VCAdaptor { + private static class GeliTextAdaptor extends VCAdaptor { /** * convert to a Variant Context, given: * @param name the name of the ROD @@ -652,6 +655,79 @@ public class VariantContextAdaptors { } } + // -------------------------------------------------------------------------------------------------------------- + // + // GELI to VariantContext + // + // -------------------------------------------------------------------------------------------------------------- + + private static class GeliAdaptor extends VCAdaptor { + /** + * convert to a Variant Context, given: + * @param name the name of the ROD + * @param input the Rod object, in this case a RodGeliText + * @return a VariantContext object + */ + VariantContext convert(String name, Object input) { + return convert(name, input, null); + } + + /** + * convert to a Variant Context, given: + * @param name the name of the ROD + * @param input the Rod object, in this case a RodGeliText + * @param ref the reference context + * @return a VariantContext object + */ + VariantContext convert(String name, Object input, ReferenceContext ref) { + GenotypeLikelihoods geli = ((rodGELI)input).getGeliRecord(); + if ( ! Allele.acceptableAlleleBases(String.valueOf((char)geli.getReferenceBase())) ) + return null; + Allele refAllele = new Allele(String.valueOf((char)geli.getReferenceBase()), true); + + // add the reference allele + List alleles = new ArrayList(); + alleles.add(refAllele); + + // add the two alt alleles + if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele1()))) { + return null; + } + Allele allele1 = new Allele(String.valueOf((char) geli.getBestGenotype().getAllele1()), false); + if (!alleles.contains(allele1) && !allele1.equals(refAllele, true)) alleles.add(allele1); + + // add the two alt alleles + if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele2()))) { + return null; + } + Allele allele2 = new Allele(String.valueOf((char) geli.getBestGenotype().getAllele2()), false); + if (!alleles.contains(allele2) && !allele2.equals(refAllele, true)) alleles.add(allele2); + + + Map attributes = new HashMap(); + Collection genotypes = new ArrayList(); + MutableGenotype call = new MutableGenotype(name, alleles); + + // setup the genotype likelihoods + double[] post = new double[10]; + String[] gTypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"}; + for (int x = 0; x < 10; x++) + post[x] = Double.valueOf(geli.getLikelihood(DiploidGenotype.fromBases((byte) gTypes[x].charAt(0), (byte) gTypes[x].charAt(1)))); + + // set the likelihoods, depth, and RMS mapping quality values + call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY, post); + call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY, (int) geli.getMaxMappingQuality()); + call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY, geli.getNumReads()); + + // add the call to the genotype list, and then use this list to create a VariantContext + genotypes.add(call); + VariantContext vc = new VariantContext(name, ((rodGELI) input).getLocation(), alleles, genotypes, geli.getBestToReferenceLod(), null, attributes); + vc.validate(); + return vc; + + } + } + // -------------------------------------------------------------------------------------------------------------- // // HapMap to VariantContext diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodGELI.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodGELI.java index 87e6836f1..ed83035c1 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodGELI.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodGELI.java @@ -36,7 +36,11 @@ public class rodGELI extends BasicReferenceOrderedDatum { return GenomeLocParser.createGenomeLoc(gh.getSequenceIndex(), gh.getPosition()); } - /** Required by ReferenceOrderedDatum interface. This implementation provides its own iterator, + public GenotypeLikelihoods getGeliRecord() { + return gh; + } + + /** Required by ReferenceOrderedDatum interface. This implementation provides its own iterator, * so this method does nothing at all (always returns false). * */ diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 0a3c4f65c..373a0ea00 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -142,6 +142,11 @@ public class GeliAdapter implements GeliGenotypeWriter { if ( maxMappingQual < p.getMappingQual() ) maxMappingQual = p.getMappingQual(); } + } else { + if (genotype.hasAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY)) + readCount = genotype.getAttributeAsInt(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY); + if (genotype.hasAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY)) + maxMappingQual = Double.valueOf(genotype.getAttributeAsInt(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY)); } LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG); diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java index d6f926879..d72871bb7 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java @@ -1,5 +1,9 @@ package org.broadinstitute.sting.gatk.refdata; +import edu.mit.broad.picard.genotype.geli.GeliFileReader; +import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.util.AsciiLineReader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -7,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; +import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall; import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; @@ -115,7 +120,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { * the information that encodes a Geli makes it into the VC and then out to disk. */ @Test - public void testVariantContextGeliToGeli() { + public void testVariantContextGeliTextToGeliText() { // our input and output files File knownFile = new File(validationDataLocation + "/well_formed.geli"); // our known good geli @@ -139,7 +144,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { // while we have records, make a Variant Context and output it to a GLF file while (line != null && line != "") { - parseGeli(geliText, line); + parseGeliText(geliText, line); records.add(geliText); // we know they're all single calls in the reference file VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText); gw.addCall(vc,null); @@ -160,7 +165,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { // while we have records, make a Variant Context and output it to a GLF file while (line != null && line != "") { - parseGeli(geliText,line); + parseGeliText(geliText,line); records2.add(geliText); // we know they're all single calls in the reference file line = readLine(reader); @@ -169,18 +174,74 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { reader.close(); // compare sizes Assert.assertEquals("The input GLF file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size()); - // now compare each record TODO: uncomment out next two lines, fix equals so that rounding doesn't ruin our comparison + // now compare each record for (int x = 0; x < records.size(); x++) Assert.assertTrue("GLF Records were not preserved when cycling them to and from disc", records.get(x).equals(records2.get(x))); } + /** + * this test takes a known GeliBinary file, reads in the records (storing them into an array), + * and creates VariantContext records. These VC records are then outputted through a genotype writer, + * and then read back in off of disk and compared to the original records. This way we are positive all + * the information that encodes a Geli makes it into the VC and then out to disk. + */ + @Test + public void testVariantContextGeliBinaryToGeliBinary() { + + // our input and output files + File knownFile = new File(validationDataLocation + "large_test_geli_binary.geli"); // our known good geli + File tempFile = new File("temp_binary.geli"); // our temporary geli output -> input file + //tempFile.deleteOnExit(); // delete when we're done + + // create our genotype writer for GLFs + GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY,tempFile); + + // buffer the records we see + List records = new ArrayList(); + + // a little more complicated than the GLF example, we have to read the file in + GeliFileReader reader = new GeliFileReader(knownFile); + + ((GeliGenotypeWriter)gw).writeHeader(reader.getFileHeader()); // the write header command ignores the parameter + + CloseableIterator iterator = reader.iterator(); + // while we have records, make a Variant Context and output it to a GeliBinary file + while (iterator.hasNext()) { + rodGELI gel = new rodGELI("myROD",iterator.next()); + records.add(gel); + VariantContext vc = VariantContextAdaptors.toVariantContext("myROD",gel); + if (vc != null) gw.addCall(vc,null); + } + iterator.close(); + gw.close(); // close the file + reader.close(); + + // buffer the new records we see + List records2 = new ArrayList(); + + reader = new GeliFileReader(tempFile); + iterator = reader.iterator(); + while (iterator.hasNext()) { + rodGELI gel = new rodGELI("myROD",iterator.next()); + records2.add(gel); + } + iterator.close(); + reader.close(); + // compare sizes + Assert.assertEquals("The input GeliBinary file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size()); + // now compare each record + for (int x = 0; x < records.size(); x++) { + Assert.assertTrue("GeliBinary Records were not preserved when cycling them to and from disc", records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord())); + } + } + /** * parse the geli given a line representation * @param geliText the object to parse into * @param line the line to parse with */ - private void parseGeli(RodGeliText geliText, String line) { + private void parseGeliText(RodGeliText geliText, String line) { boolean parsed = false; try { parsed = geliText.parseLine(null,line.split(TabularROD.DEFAULT_DELIMITER_REGEX));