diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MendelianInheritanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MendelianInheritanceWalker.java index 0b28416e4..6571d1a40 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MendelianInheritanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MendelianInheritanceWalker.java @@ -6,19 +6,20 @@ import java.util.List; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.rodSAMPileup; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; +import org.broadinstitute.sting.gatk.refdata.Genotype; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.playground.utils.TrioConcordanceRecord; import org.broadinstitute.sting.utils.cmdLine.Argument; public class MendelianInheritanceWalker extends RefWalker { - @Argument(fullName="conf_cutoff", shortName="X",required=true ) public Double CONF_CUTOFF; + @Argument(fullName="consensus_cutoff", shortName="XC",required=true ) public Double CONS_CUTOFF; + @Argument(fullName="snp_cutoff", shortName="XS",required=true ) public Double SNP_CUTOFF; + @Argument(fullName="indel_cutoff", shortName="XI",required=true ) public Double INDEL_CUTOFF; private static Logger logger = Logger.getLogger(MendelianInheritanceWalker.class); - + private final static String star = new String("*"); @Override public TrioConcordanceRecord map(RefMetaDataTracker rodData, char ref, LocusContext context) { @@ -27,15 +28,33 @@ public class MendelianInheritanceWalker extends RefWalker mom_alleles = mom.getGenotype(); - List dad_alleles = dad.getGenotype(); + String kid_allele_1 = kid.getFWDAlleles().get(0); + String kid_allele_2 = kid.getFWDAlleles().get(1); + List mom_alleles = mom.getFWDAlleles(); + List dad_alleles = dad.getFWDAlleles(); - // kid must have one allele from mom and one allele from dad if it's not X chromosome! + if ( ! mom.isIndel() && ! dad.isIndel() && ! kid.isIndel() ) { + // individuals have SNPs (any member of the trio is ok, we'll check for consistency in a sec), but no indels at the site: - if ( mom_alleles.contains(kid_allele_1) && dad_alleles.contains(kid_allele_2) || - mom_alleles.contains(kid_allele_2) && dad_alleles.contains(kid_allele_1) ) { - t.consistent_snp = 1; + // warning: no special processing for X/Y chromosomes yet; not an issue for daughter + + if ( ! mom.isBiallelic() || ! dad.isBiallelic() || ! kid.isBiallelic() ) { + t.non_biallelic_snp = 1; // we currently ignore non-biallelic sites + return t; + } + + if ( mom_alleles.contains(kid_allele_1) && dad_alleles.contains(kid_allele_2) || + mom_alleles.contains(kid_allele_2) && dad_alleles.contains(kid_allele_1) ) { + t.consistent_snp = 1; + // logger.info("consistent SNP at "+context.getLocation() + // "("+ref+") " + mom_alleles.get(0)+"/" +mom_alleles.get(1) + " " + // dad_alleles.get(0)+"/" +dad_alleles.get(1) + " " + // kid_allele_1+"/" +kid_allele_2 // ); + } else { + // we are inconsistent. let's see what happened: + if ( kid.isSNP() && ! mom.isSNP() && ! dad.isSNP() ) t.missing_snp_in_parents = 1; + if ( ! kid.isSNP() && ( mom.isSNP() && mom.isHom() || dad.isSNP() && dad.isHom() ) ) t.missing_snp_in_kid = 1; + + t.inconsistent_snp = 1; + logger.info("INconsistent SNP at "+context.getLocation() + + "("+ref+") mom:" + mom_alleles.get(0)+"/" +mom_alleles.get(1) + " dad: " + + dad_alleles.get(0)+"/" +dad_alleles.get(1) + " kid: " + + kid_allele_1+"/" +kid_allele_2 + ); + } return t; - } + } + + if ( ! mom.isSNP() && ! dad.isSNP() && ! kid.isSNP() ) { + // individuals have indels (any member of the trio is ok, we'll check for consistency in a seq), but no indels at the site: + + // warning: no special processing for X/Y chromosomes yet; not an issue for daughter + + if ( ! mom.isBiallelic() || ! dad.isBiallelic() || ! kid.isBiallelic() ) { + t.non_biallelic_indel = 1; // we currently ignore non-biallelic sites + return t; + } + + if ( mom_alleles.contains(kid_allele_1) && dad_alleles.contains(kid_allele_2) || + mom_alleles.contains(kid_allele_2) && dad_alleles.contains(kid_allele_1) ) { + t.consistent_indels = 1; + + logger.info("consistent INDEL at "+context.getLocation() + + "("+ref+") mom:" + genotypeString(mom)+ " dad: " + + genotypeString(dad) + " kid: " + + genotypeString(kid) + ); + + } else { + if ( kid.isIndel() && ! mom.isIndel() && ! dad.isIndel() ) t.missing_indels_in_parents = 1; + if ( ! kid.isIndel() && ( mom.isIndel() && mom.isHom() || dad.isIndel() && dad.isHom() ) ) t.missing_indels_in_kid = 1; + + t.inconsistent_indels = 1; + logger.info("INconsistent INDEL at "+context.getLocation() + + "("+ref+") mom:" + genotypeString(mom)+ " dad: " + + genotypeString(dad) + " kid: " + + genotypeString(kid) + ); + } + return t; + } - // snp is inconsistent - - t.inconsistent_snp = 1; - logger.info("INconsistent SNP at "+context.getLocation() + - "("+ref+") " + mom_alleles.get(0)+"/" +mom_alleles.get(1) + " " + - dad_alleles.get(0)+"/" +dad_alleles.get(1) + " " + - kid_allele_1+"/" +kid_allele_2 - ); + t.unclassified_events = 1; return t; } + protected String alleleString(Genotype g, int n) { + if ( g.getFWDAlleles().get(n).length() == 0 ) return star; + return g.getFWDAlleles().get(n); + } + + protected String genotypeString(Genotype g) { + return alleleString(g, 0) +"/"+alleleString(g, 1); + } + @Override public TrioConcordanceRecord reduce(TrioConcordanceRecord value, TrioConcordanceRecord sum) { @@ -101,24 +173,41 @@ public class MendelianInheritanceWalker extends RefWalker= CONF_CUTOFF ; - else return av.getVariationConfidence() >= CONF_CUTOFF; + boolean hasCall(Genotype g) { + if ( g == null ) return false; // there's no call if there's no rod data available, duh! + if ( g.isReference() ) return g.getConsensusConfidence() >= CONS_CUTOFF ; + else { + if ( g.isSNP() ) return g.getVariantConfidence() >= SNP_CUTOFF; + else return Math.max(g.getConsensusConfidence(),g.getVariantConfidence()) >= INDEL_CUTOFF; + } } - protected String shortLine(AllelicVariant av) { + /* + protected String shortLine(Genotype av) { if ( av == null ) return new String( ""); if ( av.isReference() ) return new String ("*"); - if ( av.isSNP() ) return av.getAltBasesFWD(); - if ( av.isInsertion() ) return new String('+'+av.getAltBasesFWD()); - if ( av.isDeletion() ) return new String('-'+av.getRefBasesFWD()); - if ( av.isIndel() ) return new String('?'+av.getAltBasesFWD()); + List alleles = av.getFWDAlleles(); + + if ( av.isSNP() ) { + if ( alleles.get(0).charAt(0) == av.getRef() ) return alleles.get(1); + else return alleles.get(0); + } + if ( av.isInsertion() ) { + if ( alleles.get(0).length() == 0 ) return new String('+'+alleles.get(1)); + else return new String('+'+alleles.get(0)); + } + if ( av.isDeletion() ) { + if ( alleles.get(0).length() == 0 ) return new String ('-'+alleles.get(0)); + else return new String('-'+alleles.get(1)); + } + if ( av.isIndel() ) { + return new String('?'+alleles.get(0)); + } return new String(""); } - + */ }