adding VC adaptor for GELI, along with unit tests.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3243 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3d2c836db6
commit
536f22f3bd
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import edu.mit.broad.picard.genotype.DiploidGenotype;
|
||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
|
||||
|
|
@ -46,7 +48,8 @@ public class VariantContextAdaptors {
|
|||
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
|
||||
adaptors.put(HapMapROD.class, new HapMapAdaptor());
|
||||
adaptors.put(RodGLF.class, new GLFAdaptor());
|
||||
adaptors.put(RodGeliText.class, new GeliAdaptor());
|
||||
adaptors.put(RodGeliText.class, new GeliTextAdaptor());
|
||||
adaptors.put(rodGELI.class, new GeliAdaptor());
|
||||
}
|
||||
|
||||
public static boolean canBeConvertedToVariantContext(Object variantContainingObject) {
|
||||
|
|
@ -593,7 +596,7 @@ public class VariantContextAdaptors {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private static class GeliAdaptor extends VCAdaptor {
|
||||
private static class GeliTextAdaptor extends VCAdaptor {
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
|
|
@ -652,6 +655,79 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// GELI to VariantContext
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private static class GeliAdaptor extends VCAdaptor {
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGeliText
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input) {
|
||||
return convert(name, input, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGeliText
|
||||
* @param ref the reference context
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
GenotypeLikelihoods geli = ((rodGELI)input).getGeliRecord();
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf((char)geli.getReferenceBase())) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(String.valueOf((char)geli.getReferenceBase()), true);
|
||||
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
||||
// add the two alt alleles
|
||||
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele1()))) {
|
||||
return null;
|
||||
}
|
||||
Allele allele1 = new Allele(String.valueOf((char) geli.getBestGenotype().getAllele1()), false);
|
||||
if (!alleles.contains(allele1) && !allele1.equals(refAllele, true)) alleles.add(allele1);
|
||||
|
||||
// add the two alt alleles
|
||||
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele2()))) {
|
||||
return null;
|
||||
}
|
||||
Allele allele2 = new Allele(String.valueOf((char) geli.getBestGenotype().getAllele2()), false);
|
||||
if (!alleles.contains(allele2) && !allele2.equals(refAllele, true)) alleles.add(allele2);
|
||||
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
Collection<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
MutableGenotype call = new MutableGenotype(name, alleles);
|
||||
|
||||
// setup the genotype likelihoods
|
||||
double[] post = new double[10];
|
||||
String[] gTypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"};
|
||||
for (int x = 0; x < 10; x++)
|
||||
post[x] = Double.valueOf(geli.getLikelihood(DiploidGenotype.fromBases((byte) gTypes[x].charAt(0), (byte) gTypes[x].charAt(1))));
|
||||
|
||||
// set the likelihoods, depth, and RMS mapping quality values
|
||||
call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY, post);
|
||||
call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY, (int) geli.getMaxMappingQuality());
|
||||
call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY, geli.getNumReads());
|
||||
|
||||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
VariantContext vc = new VariantContext(name, ((rodGELI) input).getLocation(), alleles, genotypes, geli.getBestToReferenceLod(), null, attributes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// HapMap to VariantContext
|
||||
|
|
|
|||
|
|
@ -36,7 +36,11 @@ public class rodGELI extends BasicReferenceOrderedDatum {
|
|||
return GenomeLocParser.createGenomeLoc(gh.getSequenceIndex(), gh.getPosition());
|
||||
}
|
||||
|
||||
/** Required by ReferenceOrderedDatum interface. This implementation provides its own iterator,
|
||||
public GenotypeLikelihoods getGeliRecord() {
|
||||
return gh;
|
||||
}
|
||||
|
||||
/** Required by ReferenceOrderedDatum interface. This implementation provides its own iterator,
|
||||
* so this method does nothing at all (always returns false).
|
||||
*
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -142,6 +142,11 @@ public class GeliAdapter implements GeliGenotypeWriter {
|
|||
if ( maxMappingQual < p.getMappingQual() )
|
||||
maxMappingQual = p.getMappingQual();
|
||||
}
|
||||
} else {
|
||||
if (genotype.hasAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY))
|
||||
readCount = genotype.getAttributeAsInt(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY);
|
||||
if (genotype.hasAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY))
|
||||
maxMappingQual = Double.valueOf(genotype.getAttributeAsInt(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY));
|
||||
}
|
||||
|
||||
LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
|
||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.util.AsciiLineReader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
|
@ -7,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||
|
|
@ -115,7 +120,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
|||
* the information that encodes a Geli makes it into the VC and then out to disk.
|
||||
*/
|
||||
@Test
|
||||
public void testVariantContextGeliToGeli() {
|
||||
public void testVariantContextGeliTextToGeliText() {
|
||||
|
||||
// our input and output files
|
||||
File knownFile = new File(validationDataLocation + "/well_formed.geli"); // our known good geli
|
||||
|
|
@ -139,7 +144,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
|||
|
||||
// while we have records, make a Variant Context and output it to a GLF file
|
||||
while (line != null && line != "") {
|
||||
parseGeli(geliText, line);
|
||||
parseGeliText(geliText, line);
|
||||
records.add(geliText); // we know they're all single calls in the reference file
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText);
|
||||
gw.addCall(vc,null);
|
||||
|
|
@ -160,7 +165,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
|||
|
||||
// while we have records, make a Variant Context and output it to a GLF file
|
||||
while (line != null && line != "") {
|
||||
parseGeli(geliText,line);
|
||||
parseGeliText(geliText,line);
|
||||
records2.add(geliText); // we know they're all single calls in the reference file
|
||||
line = readLine(reader);
|
||||
|
||||
|
|
@ -169,18 +174,74 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
|||
reader.close();
|
||||
// compare sizes
|
||||
Assert.assertEquals("The input GLF file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
|
||||
// now compare each record TODO: uncomment out next two lines, fix equals so that rounding doesn't ruin our comparison
|
||||
// now compare each record
|
||||
for (int x = 0; x < records.size(); x++)
|
||||
Assert.assertTrue("GLF Records were not preserved when cycling them to and from disc", records.get(x).equals(records2.get(x)));
|
||||
}
|
||||
|
||||
/**
|
||||
* this test takes a known GeliBinary file, reads in the records (storing them into an array),
|
||||
* and creates VariantContext records. These VC records are then outputted through a genotype writer,
|
||||
* and then read back in off of disk and compared to the original records. This way we are positive all
|
||||
* the information that encodes a Geli makes it into the VC and then out to disk.
|
||||
*/
|
||||
@Test
|
||||
public void testVariantContextGeliBinaryToGeliBinary() {
|
||||
|
||||
// our input and output files
|
||||
File knownFile = new File(validationDataLocation + "large_test_geli_binary.geli"); // our known good geli
|
||||
File tempFile = new File("temp_binary.geli"); // our temporary geli output -> input file
|
||||
//tempFile.deleteOnExit(); // delete when we're done
|
||||
|
||||
// create our genotype writer for GLFs
|
||||
GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY,tempFile);
|
||||
|
||||
// buffer the records we see
|
||||
List<rodGELI> records = new ArrayList<rodGELI>();
|
||||
|
||||
// a little more complicated than the GLF example, we have to read the file in
|
||||
GeliFileReader reader = new GeliFileReader(knownFile);
|
||||
|
||||
((GeliGenotypeWriter)gw).writeHeader(reader.getFileHeader()); // the write header command ignores the parameter
|
||||
|
||||
CloseableIterator<GenotypeLikelihoods> iterator = reader.iterator();
|
||||
// while we have records, make a Variant Context and output it to a GeliBinary file
|
||||
while (iterator.hasNext()) {
|
||||
rodGELI gel = new rodGELI("myROD",iterator.next());
|
||||
records.add(gel);
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("myROD",gel);
|
||||
if (vc != null) gw.addCall(vc,null);
|
||||
}
|
||||
iterator.close();
|
||||
gw.close(); // close the file
|
||||
reader.close();
|
||||
|
||||
// buffer the new records we see
|
||||
List<rodGELI> records2 = new ArrayList<rodGELI>();
|
||||
|
||||
reader = new GeliFileReader(tempFile);
|
||||
iterator = reader.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
rodGELI gel = new rodGELI("myROD",iterator.next());
|
||||
records2.add(gel);
|
||||
}
|
||||
iterator.close();
|
||||
reader.close();
|
||||
// compare sizes
|
||||
Assert.assertEquals("The input GeliBinary file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
|
||||
// now compare each record
|
||||
for (int x = 0; x < records.size(); x++) {
|
||||
Assert.assertTrue("GeliBinary Records were not preserved when cycling them to and from disc", records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* parse the geli given a line representation
|
||||
* @param geliText the object to parse into
|
||||
* @param line the line to parse with
|
||||
*/
|
||||
private void parseGeli(RodGeliText geliText, String line) {
|
||||
private void parseGeliText(RodGeliText geliText, String line) {
|
||||
boolean parsed = false;
|
||||
try {
|
||||
parsed = geliText.parseLine(null,line.split(TabularROD.DEFAULT_DELIMITER_REGEX));
|
||||
|
|
|
|||
Loading…
Reference in New Issue