Several more walkers have been brought up to use the new Allele representation.

This commit is contained in:
Eric Banks 2012-07-27 02:14:25 -04:00
parent 9e2209694a
commit ef335b6213
6 changed files with 38 additions and 22 deletions

View File

@ -107,11 +107,11 @@ public class FastaAlternateReference extends FastaReference {
continue;
if ( vc.isSimpleDeletion()) {
deletionBasesRemaining = vc.getReference().length();
deletionBasesRemaining = vc.getReference().length() - 1;
// delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if ( vc.isSimpleInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
}

View File

@ -872,7 +872,13 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
for ( VariantContext knownIndel : knownIndelsToTry ) {
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
continue;
byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
final byte[] indelStr;
if ( knownIndel.isSimpleInsertion() ) {
final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases();
indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding
} else {
indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1);
}
int start = knownIndel.getStart() - leftmostIndex + 1;
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
if ( c != null )

View File

@ -139,11 +139,11 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
final byte[] refSeq = ref.getBases();
// get the indel length
int indelLength;
final int indelLength;
if ( vc.isSimpleDeletion() )
indelLength = vc.getReference().length();
indelLength = vc.getReference().length() - 1;
else
indelLength = vc.getAlternateAllele(0).length();
indelLength = vc.getAlternateAllele(0).length() - 1;
if ( indelLength > 200 ) {
writer.add(vc);
@ -151,7 +151,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
}
// create an indel haplotype
int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength);
// create a CIGAR string to represent the event
@ -170,11 +170,12 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
int indelIndex = originalIndex-difference;
byte[] newBases = new byte[indelLength];
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
final int indelIndex = originalIndex-difference;
final byte[] newBases = new byte[indelLength + 1];
newBases[0] = refSeq[indelIndex-1];
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength);
final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
newVC = updateAllele(newVC, newAllele);
writer.add(newVC);
return 1;
@ -195,7 +196,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
if ( vc.isSimpleDeletion() ) {
indexOfRef += indelLength;
} else {
System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength);
System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength);
currentPos += indelLength;
}
@ -205,14 +206,14 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
return hap;
}
public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) {
public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) {
// create a mapping from original allele to new allele
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
if ( newAllele.isReference() ) {
alleleMap.put(vc.getReference(), newAllele);
alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0));
alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false));
} else {
alleleMap.put(vc.getReference(), vc.getReference());
alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true));
alleleMap.put(vc.getAlternateAllele(0), newAllele);
}

View File

@ -248,7 +248,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
builder.id(parts[2]);
final String ref = getCachedString(parts[3].toUpperCase());
builder.stop(pos + ref.length() - 1);
final String alts = getCachedString(parts[4].toUpperCase());
builder.log10PError(parseQual(parts[5]));
@ -257,6 +256,17 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
final Map<String, Object> attrs = parseInfo(parts[7]);
builder.attributes(attrs);
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
// update stop with the end key if provided
try {
builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()));
} catch (Exception e) {
generateException("the END value in the INFO field is not valid");
}
} else {
builder.stop(pos + ref.length() - 1);
}
// get our alleles, filters, and setup an attribute map
final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
builder.alleles(alleles);

View File

@ -496,7 +496,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/
public boolean isSimpleInsertion() {
// can't just call !isSimpleDeletion() because of complex indels
return getType() == Type.INDEL && isBiallelic() && getReference().length() < getAlternateAllele(0).length();
return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1;
}
/**
@ -504,7 +504,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/
public boolean isSimpleDeletion() {
// can't just call !isSimpleInsertion() because of complex indels
return getType() == Type.INDEL && isBiallelic() && getReference().length() > getAlternateAllele(0).length();
return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1;
}
/**
@ -1120,8 +1120,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
if ( hasAttribute(VCFConstants.END_KEY) ) {
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
assert end != -1;
if ( end != getEnd() && end != getEnd() + 1 ) {
// the end is allowed to 1 bigger because of the padding
if ( end != getEnd() ) {
final String message = "Badly formed variant context at location " + getChr() + ":"
+ getStart() + "; getEnd() was " + getEnd()
+ " but this VariantContext contains an END key with value " + end;

View File

@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
1,
Arrays.asList("0567b32ebdc26604ddf2a390de4579ac"));
Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
executeTest("testFastaAlternateReferenceIndels", spec2);
WalkerTestSpec spec3 = new WalkerTestSpec(