diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java b/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java index cc35f4ad1..ebdc4382d 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java @@ -17,7 +17,7 @@ public class Allele { // the types of variants we currently allow public enum AlleleType { - REFERENCE, SNP, INSERTION, DELETION, INVERSION, UNKNOWN_POINT_ALLELE + REFERENCE, SNP, INSERTION, DELETION, INVERSION, UNKNOWN_POINT_ALLELE, DELETION_REFERENCE } public Allele(AlleleType type, String bases) { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java index 7bd64aaa3..c34304c95 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java @@ -21,7 +21,6 @@ import net.sf.samtools.SAMFileHeader; * To change this template use File | Settings | File Templates. */ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum { - private boolean allowTest = false; private final Set headerEntries = new HashSet(Arrays.asList("#Family ID","Individual ID","Sex", "Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE")); private final byte SNP_MAJOR_MODE = 0x00000001; @@ -97,6 +96,10 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements return currentVariant.getGenotypes(); } + public boolean variantIsSNP() { + return currentVariant.isSNP(); + } + // AM I PARSING A TEXT OR A BINARY FILE ?? @@ -226,6 +229,9 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements ArrayList parsedVariants = instantiateVariantsFromBimFile(binaryFiles.bimFile); ArrayList sampleNames = getSampleNameOrderingFromFamFile(binaryFiles.famFile); ArrayList updatedVariants = getGenotypesFromBedFile(parsedVariants,sampleNames,binaryFiles.bedFile); + + java.util.Collections.sort(updatedVariants); + return updatedVariants; } @@ -429,7 +435,7 @@ class PlinkVariantInfo implements Comparable { return genotypes; } - private boolean isSNP() { + public boolean isSNP() { return this.indelType == null; } @@ -516,6 +522,7 @@ class PlinkVariantInfo implements Comparable { for ( String alStr : alleleStrings ) { alleles.add(new Allele(Allele.AlleleType.UNKNOWN_POINT_ALLELE,alStr)); } + genotypes.add(new Genotype(alleles,sampleName,20.0) ); sampleNames.add(sampleName); } @@ -544,7 +551,7 @@ class PlinkVariantInfo implements Comparable { alt = new Allele(indelType,baseStr); } else { alt = new Allele(indelType,""); - ref = new Allele(Allele.AlleleType.REFERENCE,baseStr); + ref = new Allele(Allele.AlleleType.DELETION_REFERENCE,baseStr); } this.setIndelLength(alt,baseStr.length()); @@ -571,8 +578,8 @@ class PlinkVariantInfo implements Comparable { allele2 = new Allele(indelType,""); reference = false; } else { - allele1 = new Allele(Allele.AlleleType.REFERENCE,strand1); - allele2 = new Allele(Allele.AlleleType.REFERENCE,strand2); + allele1 = new Allele(Allele.AlleleType.DELETION_REFERENCE,strand1); + allele2 = new Allele(Allele.AlleleType.DELETION_REFERENCE,strand2); reference = true; } } else { @@ -597,6 +604,7 @@ class PlinkVariantInfo implements Comparable { if ( reference || siteIndelLength != -1 ) { // if we're ref or know the insertion/deletion length of the site Genotype gen = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0); + setIndelGenotypeLength(gen,siteIndelLength); this.genotypes.add(gen); this.sampleNames.add(sampleName); } else { // hold on the variants until we *do* know the in/del length at this site diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java index 3d98a8013..c8c770486 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java @@ -85,7 +85,7 @@ public class PlinkRodTest extends BaseTest { ArrayList snp3 = genotypesInRod.get(2); Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size()); - Assert.assertEquals("That there are three samples in SNP 1", 3, sampleNamesInRod.get(0).size()); + Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size()); Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size()); @@ -114,4 +114,42 @@ public class PlinkRodTest extends BaseTest { lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i)); } } + + @Test + public void testStandardPedFileWithIndels() { + PlinkRodWithGenomeLoc rod = new PlinkRodWithGenomeLoc("test"); + try { + rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") ); + } catch ( FileNotFoundException e) { + throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e); + } + + // Iterate through the rod + + List> genotypesInRod = new ArrayList>(); + ArrayList> sampleNamesInRod = new ArrayList>(); + ArrayList lociInRod = new ArrayList(); + ArrayList snpSites = new ArrayList(); + do { + genotypesInRod.add(rod.getGenotypes()); + sampleNamesInRod.add(rod.getVariantSampleNames()); + lociInRod.add(rod.getLocation()); + snpSites.add(rod.variantIsSNP()); + } while ( rod.parseLine(null,null) ); + + boolean snpOrder = true; + List expectedOrder = Arrays.asList(true,false,true,false); + for ( int i = 0; i < 4; i ++ ) { + snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) ); + } + + Assert.assertTrue("That the variant type order is as expected", snpOrder); + Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() ); + Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() ); + Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom()); + Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet()); + Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases()); + Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases()); + Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases()); + } }