diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 85f778399..df73db2fe 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -34,6 +34,12 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, super(name); } + private RodVCF(String name, VCFRecord currentRecord, VCFReader reader) { + super(name); + mCurrentRecord = currentRecord; + mReader = reader; + } + public void assertNotNull() { if (mCurrentRecord == null) { throw new UnsupportedOperationException("The current Record is null"); @@ -46,9 +52,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, } public Object initialize(final File source) throws FileNotFoundException { - if (mReader == null) { - mReader = new VCFReader(source); - } + if (mReader == null) mReader = new VCFReader(source); return mReader.getHeader(); } @@ -60,7 +64,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, return ""; } - public Iterator createIterator(String name, File file) { + public static RodVCF createIterator(String name, File file) { RodVCF vcf = new RodVCF(name); try { vcf.initialize(file); @@ -125,7 +129,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, */ @Override public boolean isInsertion() { - this.assertNotNull(); + this.assertNotNull(); if (!mCurrentRecord.hasAlternateAllele()) return false; for (String alt : this.mCurrentRecord.getAlternateAlleles()) { @@ -142,7 +146,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, */ @Override public boolean isDeletion() { - this.assertNotNull(); + this.assertNotNull(); if (!mCurrentRecord.hasAlternateAllele()) return false; for (String alt : this.mCurrentRecord.getAlternateAlleles()) { @@ -155,7 +159,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, @Override public GenomeLoc getLocation() { this.assertNotNull(); - return GenomeLocParser.createGenomeLoc(mCurrentRecord.getChromosome(), mCurrentRecord.getPosition()); + return GenomeLocParser.createGenomeLoc(mCurrentRecord.getChromosome(), mCurrentRecord.getPosition(), mCurrentRecord.getPosition()); } /** @@ -202,7 +206,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, */ @Override public String getAlternateBases() { - if (!this.isBiallelic()) throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBaseList instead"); + if (!this.isBiallelic()) + throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBaseList instead"); return this.mCurrentRecord.getAlternateAlleles().get(0); } @@ -341,7 +346,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, @Override public RodVCF next() { mCurrentRecord = mReader.next(); - return this; + return new RodVCF(this.name, mCurrentRecord, mReader); } @Override diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/ReadCigarFormatter.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/ReadCigarFormatter.java new file mode 100644 index 000000000..2269b440c --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/ReadCigarFormatter.java @@ -0,0 +1,76 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; + +/** + * Returns formatted read given read string and cigar string + * Essentially removes header bases, soft clipped bases, and currently removes insertions + * Deletions coded as "D" + * + * @author shermanjia + */ + +public class ReadCigarFormatter { + public String FormatRead(String cigar, String read){ + // returns a cigar-formatted sequence (removes insertions, inserts 'D' to where deletions occur + String formattedRead = ""; char c; String count; + int cigarPlaceholder = 0; int subcigarLength = 0; + int readPlaceholder = 0; int subreadLength = 0; + + //reads cigar string + for (int i = 0; i < cigar.length(); i++){ + c = cigar.charAt(i); + if (c == 'M'){ + //If reach M for match/mismatch, get number immediately preceeding 'M' and tack on that many characters to sequence + subcigarLength = i-cigarPlaceholder; + count = cigar.substring(cigarPlaceholder, i); + + subreadLength = Integer.parseInt(count); + formattedRead = formattedRead + read.substring(readPlaceholder, readPlaceholder+subreadLength); + + //increment placeholders + cigarPlaceholder = i+1; + readPlaceholder = readPlaceholder + subreadLength; + } else if (c == 'I'){ + //***NOTE: To be modified later if needed (insertions removed here)*** + + //If reaches I for insertion, get number before 'I' and skip that many characters in sequence + count = cigar.substring(cigarPlaceholder, i); + subreadLength = Integer.parseInt(count); + + //increment placeholders without adding inserted bases to sequence (effectively removes insertion). + cigarPlaceholder = i+1; + readPlaceholder = readPlaceholder + subreadLength; + } else if (c == 'H' || c == 'S'){ + //(H = Headers or S = Soft clipped removed here)*** + + //If reaches H for insertion, get number before 'H' and skip that many characters in sequence + count = cigar.substring(cigarPlaceholder, i); + subreadLength = Integer.parseInt(count); + + //increment cigar placeholder without adding inserted bases to sequence (effectively removes insertion). + cigarPlaceholder = i+1; + } else if (c == 'D'){ + //If reaches D for deletion, insert 'D' into sequence as placeholder + count = cigar.substring(cigarPlaceholder, i); + subreadLength = Integer.parseInt(count); + + //Add one 'D' for each deleted base + String deletion = ""; + for (int j = 1; j <= subreadLength; j++){ + deletion = deletion + "D"; + } + + //update placeholders + formattedRead = formattedRead + deletion; + cigarPlaceholder = i+1; + } + + } + return formattedRead; + } + +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java index f104a8985..4f43c4bfe 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; +import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import org.broadinstitute.sting.utils.genotype.Variation; @@ -50,26 +50,31 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype)))) throw new StingException("Failure: trying to analyze genotypes of non-genotype data"); - // This shouldn't happen, but let's check anyways - if ( BaseUtils.simpleBaseToBaseIndex(ref) == -1 ) - return; + // This shouldn't happen, but let's check anyways + if (BaseUtils.simpleBaseToBaseIndex(ref) == -1) + return; + + // get the genotyping data + Genotype chipGenotype = null; + Genotype evalGenotype = null; + if (chip != null) chipGenotype = ((VariantBackedByGenotype)chip).getCalledGenotype(); + if (eval != null) evalGenotype = ((VariantBackedByGenotype)eval).getCalledGenotype(); - DiploidGenotype g = DiploidGenotype.createHomGenotype(ref); int truthIndex, callIndex; if (chip == null) truthIndex = UNKNOWN; - else if (chip.getAlternateBases().equals(g.toString())) + else if (!chipGenotype.isVariant(ref)) truthIndex = REF; - else if (chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1)) + else if (chipGenotype.isHet()) truthIndex = VAR_HET; else truthIndex = VAR_HOM; if (eval == null) callIndex = NO_CALL; - else if (eval.getAlternateBases().equals(g.toString())) + else if (!evalGenotype.isVariant(ref)) callIndex = REF; - else if (eval.getAlternateBases().charAt(0) != eval.getAlternateBases().charAt(1)) + else if (evalGenotype.isHet()) callIndex = VAR_HET; else callIndex = VAR_HOM; @@ -84,8 +89,8 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { Variation chip = (Variation) tracker.lookup(dbName, null); - if ( eval != null || chip != null ) - inc(chip, eval, ref); + if (eval != null || chip != null) + inc(chip, eval, ref); return null; } @@ -102,9 +107,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp s.add(sb.toString()); } - /** - * How many overall calls where made that aren't NO_CALLS or UNKNOWNS? - */ + /** How many overall calls where made that aren't NO_CALLS or UNKNOWNS? */ private int getNCalled() { int n = 0; for (int i = 0; i < 4; i++)