latest version...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@477 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-04-21 20:38:37 +00:00
parent 64b2fd866f
commit 61e855200d
1 changed files with 132 additions and 43 deletions

View File

@ -6,19 +6,20 @@ import java.util.List;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodSAMPileup;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.Genotype;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.playground.utils.TrioConcordanceRecord;
import org.broadinstitute.sting.utils.cmdLine.Argument;
public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord, TrioConcordanceRecord> {
@Argument(fullName="conf_cutoff", shortName="X",required=true ) public Double CONF_CUTOFF;
@Argument(fullName="consensus_cutoff", shortName="XC",required=true ) public Double CONS_CUTOFF;
@Argument(fullName="snp_cutoff", shortName="XS",required=true ) public Double SNP_CUTOFF;
@Argument(fullName="indel_cutoff", shortName="XI",required=true ) public Double INDEL_CUTOFF;
private static Logger logger = Logger.getLogger(MendelianInheritanceWalker.class);
private final static String star = new String("*");
@Override
public TrioConcordanceRecord map(RefMetaDataTracker rodData, char ref, LocusContext context) {
@ -27,15 +28,33 @@ public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord
// String outLine = new String(context.getLocation() + " REF: "+ref + " RODS:" + rodData.getAllRods().size());
AllelicVariant mom = (rodSAMPileup)rodData.lookup("mother", null);
AllelicVariant dad = (rodSAMPileup)rodData.lookup("father", null);
AllelicVariant kid = (rodSAMPileup)rodData.lookup("daughter", null);
Genotype mom = (rodSAMPileup)rodData.lookup("mother", null);
Genotype dad = (rodSAMPileup)rodData.lookup("father", null);
Genotype kid = (rodSAMPileup)rodData.lookup("daughter", null);
if ( hasCall(mom)) t.mom_assessed = 1;
if ( hasCall(dad)) t.dad_assessed = 1;
if ( hasCall(kid)) t.kid_assessed = 1;
if ( hasCall(mom)) {
t.mom_assessed = 1;
if ( mom.isReference() ) t.mom_ref = 1;
else if ( mom.isSNP() ) t.mom_snp = 1;
else if ( mom.isIndel() ) t.mom_indel = 1;
}
if ( hasCall(dad)) {
t.dad_assessed = 1;
if ( dad.isReference() ) t.dad_ref = 1;
else if ( dad.isSNP() ) t.dad_snp = 1;
else if ( dad.isIndel() ) t.dad_indel = 1;
}
if ( hasCall(mom) && mom.isIndel() ) System.out.println("GOT INDEL: "+mom.toString());
if ( hasCall(kid)) {
t.kid_assessed = 1;
if ( kid.isReference() ) t.kid_ref = 1;
else if ( kid.isSNP() ) t.kid_snp = 1;
else if ( kid.isIndel() ) t.kid_indel = 1;
}
// if ( hasCall(mom) && mom.isIndel() ) System.out.println("GOT INDEL: "+mom.toString());
if (( t.mom_assessed + t.dad_assessed + t.kid_assessed) != 3 ) return t; // at least one person is not called; nothing to do, bail out
@ -43,10 +62,6 @@ public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord
t.assessed_loci = 1;
if ( ! mom.isBiallelic() || ! dad.isBiallelic() || ! kid.isBiallelic() ) {
t.non_biallelic = 1; // we currently ignore non-biallelic sites
return t;
}
// ok, we got a call and it is biallelic (at most)
@ -55,40 +70,97 @@ public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord
return t;
}
// by now we know that there's a SNP
// by now we know that there's a SNP or an indel: at least one of the individuals is non-ref
String kid_allele_1 = kid.getGenotype().get(0);
String kid_allele_2 = kid.getGenotype().get(1);
List<String> mom_alleles = mom.getGenotype();
List<String> dad_alleles = dad.getGenotype();
String kid_allele_1 = kid.getFWDAlleles().get(0);
String kid_allele_2 = kid.getFWDAlleles().get(1);
List<String> mom_alleles = mom.getFWDAlleles();
List<String> dad_alleles = dad.getFWDAlleles();
// kid must have one allele from mom and one allele from dad if it's not X chromosome!
if ( ! mom.isIndel() && ! dad.isIndel() && ! kid.isIndel() ) {
// individuals have SNPs (any member of the trio is ok, we'll check for consistency in a sec), but no indels at the site:
if ( mom_alleles.contains(kid_allele_1) && dad_alleles.contains(kid_allele_2) ||
mom_alleles.contains(kid_allele_2) && dad_alleles.contains(kid_allele_1) ) {
t.consistent_snp = 1;
// warning: no special processing for X/Y chromosomes yet; not an issue for daughter
if ( ! mom.isBiallelic() || ! dad.isBiallelic() || ! kid.isBiallelic() ) {
t.non_biallelic_snp = 1; // we currently ignore non-biallelic sites
return t;
}
if ( mom_alleles.contains(kid_allele_1) && dad_alleles.contains(kid_allele_2) ||
mom_alleles.contains(kid_allele_2) && dad_alleles.contains(kid_allele_1) ) {
t.consistent_snp = 1;
// logger.info("consistent SNP at "+context.getLocation() +
// "("+ref+") " + mom_alleles.get(0)+"/" +mom_alleles.get(1) + " " +
// dad_alleles.get(0)+"/" +dad_alleles.get(1) + " " +
// kid_allele_1+"/" +kid_allele_2
// );
} else {
// we are inconsistent. let's see what happened:
if ( kid.isSNP() && ! mom.isSNP() && ! dad.isSNP() ) t.missing_snp_in_parents = 1;
if ( ! kid.isSNP() && ( mom.isSNP() && mom.isHom() || dad.isSNP() && dad.isHom() ) ) t.missing_snp_in_kid = 1;
t.inconsistent_snp = 1;
logger.info("INconsistent SNP at "+context.getLocation() +
"("+ref+") mom:" + mom_alleles.get(0)+"/" +mom_alleles.get(1) + " dad: " +
dad_alleles.get(0)+"/" +dad_alleles.get(1) + " kid: " +
kid_allele_1+"/" +kid_allele_2
);
}
return t;
}
}
if ( ! mom.isSNP() && ! dad.isSNP() && ! kid.isSNP() ) {
// individuals have indels (any member of the trio is ok, we'll check for consistency in a seq), but no indels at the site:
// warning: no special processing for X/Y chromosomes yet; not an issue for daughter
if ( ! mom.isBiallelic() || ! dad.isBiallelic() || ! kid.isBiallelic() ) {
t.non_biallelic_indel = 1; // we currently ignore non-biallelic sites
return t;
}
if ( mom_alleles.contains(kid_allele_1) && dad_alleles.contains(kid_allele_2) ||
mom_alleles.contains(kid_allele_2) && dad_alleles.contains(kid_allele_1) ) {
t.consistent_indels = 1;
logger.info("consistent INDEL at "+context.getLocation() +
"("+ref+") mom:" + genotypeString(mom)+ " dad: " +
genotypeString(dad) + " kid: " +
genotypeString(kid)
);
} else {
if ( kid.isIndel() && ! mom.isIndel() && ! dad.isIndel() ) t.missing_indels_in_parents = 1;
if ( ! kid.isIndel() && ( mom.isIndel() && mom.isHom() || dad.isIndel() && dad.isHom() ) ) t.missing_indels_in_kid = 1;
t.inconsistent_indels = 1;
logger.info("INconsistent INDEL at "+context.getLocation() +
"("+ref+") mom:" + genotypeString(mom)+ " dad: " +
genotypeString(dad) + " kid: " +
genotypeString(kid)
);
}
return t;
}
// snp is inconsistent
t.inconsistent_snp = 1;
logger.info("INconsistent SNP at "+context.getLocation() +
"("+ref+") " + mom_alleles.get(0)+"/" +mom_alleles.get(1) + " " +
dad_alleles.get(0)+"/" +dad_alleles.get(1) + " " +
kid_allele_1+"/" +kid_allele_2
);
t.unclassified_events = 1;
return t;
}
protected String alleleString(Genotype g, int n) {
if ( g.getFWDAlleles().get(n).length() == 0 ) return star;
return g.getFWDAlleles().get(n);
}
protected String genotypeString(Genotype g) {
return alleleString(g, 0) +"/"+alleleString(g, 1);
}
@Override
public TrioConcordanceRecord reduce(TrioConcordanceRecord value, TrioConcordanceRecord sum) {
@ -101,24 +173,41 @@ public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord
return new TrioConcordanceRecord();
}
boolean hasCall(AllelicVariant av) {
if ( av == null ) return false; // there's no call if there's no rod data available, duh!
if ( av.isReference() ) return av.getConsensusConfidence() >= CONF_CUTOFF ;
else return av.getVariationConfidence() >= CONF_CUTOFF;
boolean hasCall(Genotype g) {
if ( g == null ) return false; // there's no call if there's no rod data available, duh!
if ( g.isReference() ) return g.getConsensusConfidence() >= CONS_CUTOFF ;
else {
if ( g.isSNP() ) return g.getVariantConfidence() >= SNP_CUTOFF;
else return Math.max(g.getConsensusConfidence(),g.getVariantConfidence()) >= INDEL_CUTOFF;
}
}
protected String shortLine(AllelicVariant av) {
/*
protected String shortLine(Genotype av) {
if ( av == null ) return new String( "<NULL>");
if ( av.isReference() ) return new String ("*");
if ( av.isSNP() ) return av.getAltBasesFWD();
if ( av.isInsertion() ) return new String('+'+av.getAltBasesFWD());
if ( av.isDeletion() ) return new String('-'+av.getRefBasesFWD());
if ( av.isIndel() ) return new String('?'+av.getAltBasesFWD());
List<String> alleles = av.getFWDAlleles();
if ( av.isSNP() ) {
if ( alleles.get(0).charAt(0) == av.getRef() ) return alleles.get(1);
else return alleles.get(0);
}
if ( av.isInsertion() ) {
if ( alleles.get(0).length() == 0 ) return new String('+'+alleles.get(1));
else return new String('+'+alleles.get(0));
}
if ( av.isDeletion() ) {
if ( alleles.get(0).length() == 0 ) return new String ('-'+alleles.get(0));
else return new String('-'+alleles.get(1));
}
if ( av.isIndel() ) {
return new String('?'+alleles.get(0));
}
return new String("<missing data!!!>");
}
*/
}