Adjusted Genotype concordance to more accurately use the new Genotyping code, fixed the VCF rod, and temp. fix the build by reintroducing Shermans ReadCigarFormatter
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1745 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9b78a789e2
commit
b1c321f161
|
|
@ -34,6 +34,12 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
||||||
super(name);
|
super(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private RodVCF(String name, VCFRecord currentRecord, VCFReader reader) {
|
||||||
|
super(name);
|
||||||
|
mCurrentRecord = currentRecord;
|
||||||
|
mReader = reader;
|
||||||
|
}
|
||||||
|
|
||||||
public void assertNotNull() {
|
public void assertNotNull() {
|
||||||
if (mCurrentRecord == null) {
|
if (mCurrentRecord == null) {
|
||||||
throw new UnsupportedOperationException("The current Record is null");
|
throw new UnsupportedOperationException("The current Record is null");
|
||||||
|
|
@ -46,9 +52,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
||||||
}
|
}
|
||||||
|
|
||||||
public Object initialize(final File source) throws FileNotFoundException {
|
public Object initialize(final File source) throws FileNotFoundException {
|
||||||
if (mReader == null) {
|
if (mReader == null) mReader = new VCFReader(source);
|
||||||
mReader = new VCFReader(source);
|
|
||||||
}
|
|
||||||
return mReader.getHeader();
|
return mReader.getHeader();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -60,7 +64,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<RodVCF> createIterator(String name, File file) {
|
public static RodVCF createIterator(String name, File file) {
|
||||||
RodVCF vcf = new RodVCF(name);
|
RodVCF vcf = new RodVCF(name);
|
||||||
try {
|
try {
|
||||||
vcf.initialize(file);
|
vcf.initialize(file);
|
||||||
|
|
@ -155,7 +159,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
||||||
@Override
|
@Override
|
||||||
public GenomeLoc getLocation() {
|
public GenomeLoc getLocation() {
|
||||||
this.assertNotNull();
|
this.assertNotNull();
|
||||||
return GenomeLocParser.createGenomeLoc(mCurrentRecord.getChromosome(), mCurrentRecord.getPosition());
|
return GenomeLocParser.createGenomeLoc(mCurrentRecord.getChromosome(), mCurrentRecord.getPosition(), mCurrentRecord.getPosition());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -202,7 +206,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String getAlternateBases() {
|
public String getAlternateBases() {
|
||||||
if (!this.isBiallelic()) throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBaseList instead");
|
if (!this.isBiallelic())
|
||||||
|
throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBaseList instead");
|
||||||
return this.mCurrentRecord.getAlternateAlleles().get(0);
|
return this.mCurrentRecord.getAlternateAlleles().get(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -341,7 +346,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
||||||
@Override
|
@Override
|
||||||
public RodVCF next() {
|
public RodVCF next() {
|
||||||
mCurrentRecord = mReader.next();
|
mCurrentRecord = mReader.next();
|
||||||
return this;
|
return new RodVCF(this.name, mCurrentRecord, mReader);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
/*
|
||||||
|
* To change this template, choose Tools | Templates
|
||||||
|
* and open the template in the editor.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns formatted read given read string and cigar string
|
||||||
|
* Essentially removes header bases, soft clipped bases, and currently removes insertions
|
||||||
|
* Deletions coded as "D"
|
||||||
|
*
|
||||||
|
* @author shermanjia
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class ReadCigarFormatter {
|
||||||
|
public String FormatRead(String cigar, String read){
|
||||||
|
// returns a cigar-formatted sequence (removes insertions, inserts 'D' to where deletions occur
|
||||||
|
String formattedRead = ""; char c; String count;
|
||||||
|
int cigarPlaceholder = 0; int subcigarLength = 0;
|
||||||
|
int readPlaceholder = 0; int subreadLength = 0;
|
||||||
|
|
||||||
|
//reads cigar string
|
||||||
|
for (int i = 0; i < cigar.length(); i++){
|
||||||
|
c = cigar.charAt(i);
|
||||||
|
if (c == 'M'){
|
||||||
|
//If reach M for match/mismatch, get number immediately preceeding 'M' and tack on that many characters to sequence
|
||||||
|
subcigarLength = i-cigarPlaceholder;
|
||||||
|
count = cigar.substring(cigarPlaceholder, i);
|
||||||
|
|
||||||
|
subreadLength = Integer.parseInt(count);
|
||||||
|
formattedRead = formattedRead + read.substring(readPlaceholder, readPlaceholder+subreadLength);
|
||||||
|
|
||||||
|
//increment placeholders
|
||||||
|
cigarPlaceholder = i+1;
|
||||||
|
readPlaceholder = readPlaceholder + subreadLength;
|
||||||
|
} else if (c == 'I'){
|
||||||
|
//***NOTE: To be modified later if needed (insertions removed here)***
|
||||||
|
|
||||||
|
//If reaches I for insertion, get number before 'I' and skip that many characters in sequence
|
||||||
|
count = cigar.substring(cigarPlaceholder, i);
|
||||||
|
subreadLength = Integer.parseInt(count);
|
||||||
|
|
||||||
|
//increment placeholders without adding inserted bases to sequence (effectively removes insertion).
|
||||||
|
cigarPlaceholder = i+1;
|
||||||
|
readPlaceholder = readPlaceholder + subreadLength;
|
||||||
|
} else if (c == 'H' || c == 'S'){
|
||||||
|
//(H = Headers or S = Soft clipped removed here)***
|
||||||
|
|
||||||
|
//If reaches H for insertion, get number before 'H' and skip that many characters in sequence
|
||||||
|
count = cigar.substring(cigarPlaceholder, i);
|
||||||
|
subreadLength = Integer.parseInt(count);
|
||||||
|
|
||||||
|
//increment cigar placeholder without adding inserted bases to sequence (effectively removes insertion).
|
||||||
|
cigarPlaceholder = i+1;
|
||||||
|
} else if (c == 'D'){
|
||||||
|
//If reaches D for deletion, insert 'D' into sequence as placeholder
|
||||||
|
count = cigar.substring(cigarPlaceholder, i);
|
||||||
|
subreadLength = Integer.parseInt(count);
|
||||||
|
|
||||||
|
//Add one 'D' for each deleted base
|
||||||
|
String deletion = "";
|
||||||
|
for (int j = 1; j <= subreadLength; j++){
|
||||||
|
deletion = deletion + "D";
|
||||||
|
}
|
||||||
|
|
||||||
|
//update placeholders
|
||||||
|
formattedRead = formattedRead + deletion;
|
||||||
|
cigarPlaceholder = i+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return formattedRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
|
|
@ -51,25 +51,30 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
||||||
|
|
||||||
// This shouldn't happen, but let's check anyways
|
// This shouldn't happen, but let's check anyways
|
||||||
if ( BaseUtils.simpleBaseToBaseIndex(ref) == -1 )
|
if (BaseUtils.simpleBaseToBaseIndex(ref) == -1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
DiploidGenotype g = DiploidGenotype.createHomGenotype(ref);
|
// get the genotyping data
|
||||||
|
Genotype chipGenotype = null;
|
||||||
|
Genotype evalGenotype = null;
|
||||||
|
if (chip != null) chipGenotype = ((VariantBackedByGenotype)chip).getCalledGenotype();
|
||||||
|
if (eval != null) evalGenotype = ((VariantBackedByGenotype)eval).getCalledGenotype();
|
||||||
|
|
||||||
int truthIndex, callIndex;
|
int truthIndex, callIndex;
|
||||||
if (chip == null)
|
if (chip == null)
|
||||||
truthIndex = UNKNOWN;
|
truthIndex = UNKNOWN;
|
||||||
else if (chip.getAlternateBases().equals(g.toString()))
|
else if (!chipGenotype.isVariant(ref))
|
||||||
truthIndex = REF;
|
truthIndex = REF;
|
||||||
else if (chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1))
|
else if (chipGenotype.isHet())
|
||||||
truthIndex = VAR_HET;
|
truthIndex = VAR_HET;
|
||||||
else
|
else
|
||||||
truthIndex = VAR_HOM;
|
truthIndex = VAR_HOM;
|
||||||
|
|
||||||
if (eval == null)
|
if (eval == null)
|
||||||
callIndex = NO_CALL;
|
callIndex = NO_CALL;
|
||||||
else if (eval.getAlternateBases().equals(g.toString()))
|
else if (!evalGenotype.isVariant(ref))
|
||||||
callIndex = REF;
|
callIndex = REF;
|
||||||
else if (eval.getAlternateBases().charAt(0) != eval.getAlternateBases().charAt(1))
|
else if (evalGenotype.isHet())
|
||||||
callIndex = VAR_HET;
|
callIndex = VAR_HET;
|
||||||
else
|
else
|
||||||
callIndex = VAR_HOM;
|
callIndex = VAR_HOM;
|
||||||
|
|
@ -84,7 +89,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
|
|
||||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
Variation chip = (Variation) tracker.lookup(dbName, null);
|
Variation chip = (Variation) tracker.lookup(dbName, null);
|
||||||
if ( eval != null || chip != null )
|
if (eval != null || chip != null)
|
||||||
inc(chip, eval, ref);
|
inc(chip, eval, ref);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -102,9 +107,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
s.add(sb.toString());
|
s.add(sb.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** How many overall calls where made that aren't NO_CALLS or UNKNOWNS? */
|
||||||
* How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
|
|
||||||
*/
|
|
||||||
private int getNCalled() {
|
private int getNCalled() {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue