Several more walkers have been brought up to use the new Allele representation.

This commit is contained in:
Eric Banks 2012-07-27 02:14:25 -04:00
parent 9e2209694a
commit ef335b6213
6 changed files with 38 additions and 22 deletions

View File

@ -107,11 +107,11 @@ public class FastaAlternateReference extends FastaReference {
continue; continue;
if ( vc.isSimpleDeletion()) { if ( vc.isSimpleDeletion()) {
deletionBasesRemaining = vc.getReference().length(); deletionBasesRemaining = vc.getReference().length() - 1;
// delete the next n bases, not this one // delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase); return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if ( vc.isSimpleInsertion()) { } else if ( vc.isSimpleInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} else if (vc.isSNP()) { } else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString()); return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} }

View File

@ -872,7 +872,13 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
for ( VariantContext knownIndel : knownIndelsToTry ) { for ( VariantContext knownIndel : knownIndelsToTry ) {
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() ) if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
continue; continue;
byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); final byte[] indelStr;
if ( knownIndel.isSimpleInsertion() ) {
final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases();
indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding
} else {
indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1);
}
int start = knownIndel.getStart() - leftmostIndex + 1; int start = knownIndel.getStart() - leftmostIndex + 1;
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel); Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
if ( c != null ) if ( c != null )

View File

@ -139,11 +139,11 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
final byte[] refSeq = ref.getBases(); final byte[] refSeq = ref.getBases();
// get the indel length // get the indel length
int indelLength; final int indelLength;
if ( vc.isSimpleDeletion() ) if ( vc.isSimpleDeletion() )
indelLength = vc.getReference().length(); indelLength = vc.getReference().length() - 1;
else else
indelLength = vc.getAlternateAllele(0).length(); indelLength = vc.getAlternateAllele(0).length() - 1;
if ( indelLength > 200 ) { if ( indelLength > 200 ) {
writer.add(vc); writer.add(vc);
@ -151,7 +151,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
} }
// create an indel haplotype // create an indel haplotype
int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength); final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength);
// create a CIGAR string to represent the event // create a CIGAR string to represent the event
@ -170,11 +170,12 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make(); VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference)); //System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
int indelIndex = originalIndex-difference; final int indelIndex = originalIndex-difference;
byte[] newBases = new byte[indelLength]; final byte[] newBases = new byte[indelLength + 1];
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength); newBases[0] = refSeq[indelIndex-1];
Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion()); System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength);
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]); final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
newVC = updateAllele(newVC, newAllele);
writer.add(newVC); writer.add(newVC);
return 1; return 1;
@ -195,7 +196,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
if ( vc.isSimpleDeletion() ) { if ( vc.isSimpleDeletion() ) {
indexOfRef += indelLength; indexOfRef += indelLength;
} else { } else {
System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength); System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength);
currentPos += indelLength; currentPos += indelLength;
} }
@ -205,14 +206,14 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
return hap; return hap;
} }
public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) { public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) {
// create a mapping from original allele to new allele // create a mapping from original allele to new allele
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size()); HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
if ( newAllele.isReference() ) { if ( newAllele.isReference() ) {
alleleMap.put(vc.getReference(), newAllele); alleleMap.put(vc.getReference(), newAllele);
alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0)); alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false));
} else { } else {
alleleMap.put(vc.getReference(), vc.getReference()); alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true));
alleleMap.put(vc.getAlternateAllele(0), newAllele); alleleMap.put(vc.getAlternateAllele(0), newAllele);
} }

View File

@ -248,7 +248,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
builder.id(parts[2]); builder.id(parts[2]);
final String ref = getCachedString(parts[3].toUpperCase()); final String ref = getCachedString(parts[3].toUpperCase());
builder.stop(pos + ref.length() - 1);
final String alts = getCachedString(parts[4].toUpperCase()); final String alts = getCachedString(parts[4].toUpperCase());
builder.log10PError(parseQual(parts[5])); builder.log10PError(parseQual(parts[5]));
@ -257,6 +256,17 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
final Map<String, Object> attrs = parseInfo(parts[7]); final Map<String, Object> attrs = parseInfo(parts[7]);
builder.attributes(attrs); builder.attributes(attrs);
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
// update stop with the end key if provided
try {
builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()));
} catch (Exception e) {
generateException("the END value in the INFO field is not valid");
}
} else {
builder.stop(pos + ref.length() - 1);
}
// get our alleles, filters, and setup an attribute map // get our alleles, filters, and setup an attribute map
final List<Allele> alleles = parseAlleles(ref, alts, lineNo); final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
builder.alleles(alleles); builder.alleles(alleles);

View File

@ -496,7 +496,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/ */
public boolean isSimpleInsertion() { public boolean isSimpleInsertion() {
// can't just call !isSimpleDeletion() because of complex indels // can't just call !isSimpleDeletion() because of complex indels
return getType() == Type.INDEL && isBiallelic() && getReference().length() < getAlternateAllele(0).length(); return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1;
} }
/** /**
@ -504,7 +504,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/ */
public boolean isSimpleDeletion() { public boolean isSimpleDeletion() {
// can't just call !isSimpleInsertion() because of complex indels // can't just call !isSimpleInsertion() because of complex indels
return getType() == Type.INDEL && isBiallelic() && getReference().length() > getAlternateAllele(0).length(); return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1;
} }
/** /**
@ -1120,8 +1120,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
if ( hasAttribute(VCFConstants.END_KEY) ) { if ( hasAttribute(VCFConstants.END_KEY) ) {
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1); final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
assert end != -1; assert end != -1;
if ( end != getEnd() && end != getEnd() + 1 ) { if ( end != getEnd() ) {
// the end is allowed to 1 bigger because of the padding
final String message = "Badly formed variant context at location " + getChr() + ":" final String message = "Badly formed variant context at location " + getChr() + ":"
+ getStart() + "; getEnd() was " + getEnd() + getStart() + "; getEnd() was " + getEnd()
+ " but this VariantContext contains an END key with value " + end; + " but this VariantContext contains an END key with value " + end;

View File

@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
WalkerTestSpec spec2 = new WalkerTestSpec( WalkerTestSpec spec2 = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s", "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
1, 1,
Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
executeTest("testFastaAlternateReferenceIndels", spec2); executeTest("testFastaAlternateReferenceIndels", spec2);
WalkerTestSpec spec3 = new WalkerTestSpec( WalkerTestSpec spec3 = new WalkerTestSpec(