adding VC adaptor for GELI, along with unit tests.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3243 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-04-23 05:28:39 +00:00
parent 3d2c836db6
commit 536f22f3bd
4 changed files with 154 additions and 8 deletions

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.refdata;
import edu.mit.broad.picard.genotype.DiploidGenotype;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
@ -46,7 +48,8 @@ public class VariantContextAdaptors {
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
adaptors.put(HapMapROD.class, new HapMapAdaptor());
adaptors.put(RodGLF.class, new GLFAdaptor());
adaptors.put(RodGeliText.class, new GeliAdaptor());
adaptors.put(RodGeliText.class, new GeliTextAdaptor());
adaptors.put(rodGELI.class, new GeliAdaptor());
}
public static boolean canBeConvertedToVariantContext(Object variantContainingObject) {
@ -593,7 +596,7 @@ public class VariantContextAdaptors {
//
// --------------------------------------------------------------------------------------------------------------
private static class GeliAdaptor extends VCAdaptor {
private static class GeliTextAdaptor extends VCAdaptor {
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
@ -652,6 +655,79 @@ public class VariantContextAdaptors {
}
}
// --------------------------------------------------------------------------------------------------------------
//
// GELI to VariantContext
//
// --------------------------------------------------------------------------------------------------------------
private static class GeliAdaptor extends VCAdaptor {
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @return a VariantContext object
*/
VariantContext convert(String name, Object input) {
return convert(name, input, null);
}
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @param ref the reference context
* @return a VariantContext object
*/
VariantContext convert(String name, Object input, ReferenceContext ref) {
GenotypeLikelihoods geli = ((rodGELI)input).getGeliRecord();
if ( ! Allele.acceptableAlleleBases(String.valueOf((char)geli.getReferenceBase())) )
return null;
Allele refAllele = new Allele(String.valueOf((char)geli.getReferenceBase()), true);
// add the reference allele
List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
// add the two alt alleles
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele1()))) {
return null;
}
Allele allele1 = new Allele(String.valueOf((char) geli.getBestGenotype().getAllele1()), false);
if (!alleles.contains(allele1) && !allele1.equals(refAllele, true)) alleles.add(allele1);
// add the two alt alleles
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele2()))) {
return null;
}
Allele allele2 = new Allele(String.valueOf((char) geli.getBestGenotype().getAllele2()), false);
if (!alleles.contains(allele2) && !allele2.equals(refAllele, true)) alleles.add(allele2);
Map<String, String> attributes = new HashMap<String, String>();
Collection<Genotype> genotypes = new ArrayList<Genotype>();
MutableGenotype call = new MutableGenotype(name, alleles);
// setup the genotype likelihoods
double[] post = new double[10];
String[] gTypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"};
for (int x = 0; x < 10; x++)
post[x] = Double.valueOf(geli.getLikelihood(DiploidGenotype.fromBases((byte) gTypes[x].charAt(0), (byte) gTypes[x].charAt(1))));
// set the likelihoods, depth, and RMS mapping quality values
call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY, post);
call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY, (int) geli.getMaxMappingQuality());
call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY, geli.getNumReads());
// add the call to the genotype list, and then use this list to create a VariantContext
genotypes.add(call);
VariantContext vc = new VariantContext(name, ((rodGELI) input).getLocation(), alleles, genotypes, geli.getBestToReferenceLod(), null, attributes);
vc.validate();
return vc;
}
}
// --------------------------------------------------------------------------------------------------------------
//
// HapMap to VariantContext

View File

@ -36,7 +36,11 @@ public class rodGELI extends BasicReferenceOrderedDatum {
return GenomeLocParser.createGenomeLoc(gh.getSequenceIndex(), gh.getPosition());
}
/** Required by ReferenceOrderedDatum interface. This implementation provides its own iterator,
public GenotypeLikelihoods getGeliRecord() {
return gh;
}
/** Required by ReferenceOrderedDatum interface. This implementation provides its own iterator,
* so this method does nothing at all (always returns false).
*
*/

View File

@ -142,6 +142,11 @@ public class GeliAdapter implements GeliGenotypeWriter {
if ( maxMappingQual < p.getMappingQual() )
maxMappingQual = p.getMappingQual();
}
} else {
if (genotype.hasAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY))
readCount = genotype.getAttributeAsInt(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY);
if (genotype.hasAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY))
maxMappingQual = Double.valueOf(genotype.getAttributeAsInt(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY));
}
LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);

View File

@ -1,5 +1,9 @@
package org.broadinstitute.sting.gatk.refdata;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.util.AsciiLineReader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
@ -7,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
@ -115,7 +120,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
* the information that encodes a Geli makes it into the VC and then out to disk.
*/
@Test
public void testVariantContextGeliToGeli() {
public void testVariantContextGeliTextToGeliText() {
// our input and output files
File knownFile = new File(validationDataLocation + "/well_formed.geli"); // our known good geli
@ -139,7 +144,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
// while we have records, make a Variant Context and output it to a GLF file
while (line != null && line != "") {
parseGeli(geliText, line);
parseGeliText(geliText, line);
records.add(geliText); // we know they're all single calls in the reference file
VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText);
gw.addCall(vc,null);
@ -160,7 +165,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
// while we have records, make a Variant Context and output it to a GLF file
while (line != null && line != "") {
parseGeli(geliText,line);
parseGeliText(geliText,line);
records2.add(geliText); // we know they're all single calls in the reference file
line = readLine(reader);
@ -169,18 +174,74 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
reader.close();
// compare sizes
Assert.assertEquals("The input GLF file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
// now compare each record TODO: uncomment out next two lines, fix equals so that rounding doesn't ruin our comparison
// now compare each record
for (int x = 0; x < records.size(); x++)
Assert.assertTrue("GLF Records were not preserved when cycling them to and from disc", records.get(x).equals(records2.get(x)));
}
/**
* this test takes a known GeliBinary file, reads in the records (storing them into an array),
* and creates VariantContext records. These VC records are then outputted through a genotype writer,
* and then read back in off of disk and compared to the original records. This way we are positive all
* the information that encodes a Geli makes it into the VC and then out to disk.
*/
@Test
public void testVariantContextGeliBinaryToGeliBinary() {
// our input and output files
File knownFile = new File(validationDataLocation + "large_test_geli_binary.geli"); // our known good geli
File tempFile = new File("temp_binary.geli"); // our temporary geli output -> input file
//tempFile.deleteOnExit(); // delete when we're done
// create our genotype writer for GLFs
GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY,tempFile);
// buffer the records we see
List<rodGELI> records = new ArrayList<rodGELI>();
// a little more complicated than the GLF example, we have to read the file in
GeliFileReader reader = new GeliFileReader(knownFile);
((GeliGenotypeWriter)gw).writeHeader(reader.getFileHeader()); // the write header command ignores the parameter
CloseableIterator<GenotypeLikelihoods> iterator = reader.iterator();
// while we have records, make a Variant Context and output it to a GeliBinary file
while (iterator.hasNext()) {
rodGELI gel = new rodGELI("myROD",iterator.next());
records.add(gel);
VariantContext vc = VariantContextAdaptors.toVariantContext("myROD",gel);
if (vc != null) gw.addCall(vc,null);
}
iterator.close();
gw.close(); // close the file
reader.close();
// buffer the new records we see
List<rodGELI> records2 = new ArrayList<rodGELI>();
reader = new GeliFileReader(tempFile);
iterator = reader.iterator();
while (iterator.hasNext()) {
rodGELI gel = new rodGELI("myROD",iterator.next());
records2.add(gel);
}
iterator.close();
reader.close();
// compare sizes
Assert.assertEquals("The input GeliBinary file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
// now compare each record
for (int x = 0; x < records.size(); x++) {
Assert.assertTrue("GeliBinary Records were not preserved when cycling them to and from disc", records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord()));
}
}
/**
* parse the geli given a line representation
* @param geliText the object to parse into
* @param line the line to parse with
*/
private void parseGeli(RodGeliText geliText, String line) {
private void parseGeliText(RodGeliText geliText, String line) {
boolean parsed = false;
try {
parsed = geliText.parseLine(null,line.split(TabularROD.DEFAULT_DELIMITER_REGEX));