Removing an assumption that ADs were in the same order if the number of alleles matched. This happens for example when one sample is C->T and another sample is C->G.
This commit is contained in:
parent
c6a96c3958
commit
3a9a78c785
|
|
@ -119,7 +119,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
|
|||
|
||||
// get backward repeat unit and # repeats
|
||||
byte[] backwardRepeatUnit = Arrays.copyOfRange(readBases, offset - str + 1, offset + 1);
|
||||
maxBW = GATKVariantContextUtils.findNumberofRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxBW = GATKVariantContextUtils.findNumberOfRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
if (maxBW > 1) {
|
||||
bestBWRepeatUnit = backwardRepeatUnit.clone();
|
||||
break;
|
||||
|
|
@ -139,7 +139,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
|
|||
|
||||
// get forward repeat unit and # repeats
|
||||
byte[] forwardRepeatUnit = Arrays.copyOfRange(readBases, offset +1, offset+str+1);
|
||||
maxFW = GATKVariantContextUtils.findNumberofRepetitions(forwardRepeatUnit, Arrays.copyOfRange(readBases, offset + 1, readBases.length), true);
|
||||
maxFW = GATKVariantContextUtils.findNumberOfRepetitions(forwardRepeatUnit, Arrays.copyOfRange(readBases, offset + 1, readBases.length), true);
|
||||
if (maxFW > 1) {
|
||||
bestFWRepeatUnit = forwardRepeatUnit.clone();
|
||||
break;
|
||||
|
|
@ -157,7 +157,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
|
|||
// but correct representation at that place might be (C)4.
|
||||
// Hence, if the FW and BW units don't match, check if BW unit can still be a part of FW unit and add
|
||||
// representations to total
|
||||
maxBW = GATKVariantContextUtils.findNumberofRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxBW = GATKVariantContextUtils.findNumberOfRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxRL = maxFW + maxBW;
|
||||
bestRepeatUnit = bestFWRepeatUnit;
|
||||
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("5487ad609548c30e79a431115dc772ba"));
|
||||
Arrays.asList("9d9ddeb831e5512c5b1084ee22e65459"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +94,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("f7650a8a861dec3138848bb972929002"));
|
||||
Arrays.asList("aa0f9604bb496be143a6dde775e157fe"));
|
||||
executeTest("combineSingleSamplePipelineGVCFHierarchical", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
||||
1,
|
||||
Arrays.asList("df5a6a574c48c243fad5b44f34343fe3"));
|
||||
Arrays.asList("49f8ff728246d08cd20cd1c1521651f9"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -92,38 +92,38 @@ public class RepeatCovariatesUnitTest {
|
|||
@Test
|
||||
public void testFindNumberOfRepetitions() {
|
||||
// First, test logic to compute number of repetitions of a substring on a given string.
|
||||
int result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), true);
|
||||
int result = GATKVariantContextUtils.findNumberOfRepetitions("AC".getBytes(), "ACAC".getBytes(), true);
|
||||
Assert.assertEquals(2,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("AC".getBytes(), "ACACACAC".getBytes(), true);
|
||||
Assert.assertEquals(4,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), true);
|
||||
Assert.assertEquals(4,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), true);
|
||||
Assert.assertEquals(1,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
// Same tests but looking backward on string
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("AC".getBytes(), "ACAC".getBytes(), false);
|
||||
Assert.assertEquals(2,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("AC".getBytes(), "ACACACAC".getBytes(), false);
|
||||
Assert.assertEquals(4,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), false);
|
||||
Assert.assertEquals(4,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberOfRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false);
|
||||
Assert.assertEquals(3,result);
|
||||
|
||||
// test logic to get repeat unit and number of repeats from covariate value
|
||||
|
|
@ -211,8 +211,8 @@ public class RepeatCovariatesUnitTest {
|
|||
Assert.assertEquals(rurlValM,rurlValI);
|
||||
|
||||
|
||||
int fw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(offset+1,readLength).getBytes(),true);
|
||||
int bw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(0,offset+1).getBytes(),false);
|
||||
int fw = GATKVariantContextUtils.findNumberOfRepetitions(ruValM.getBytes(), readBases.substring(offset + 1, readLength).getBytes(), true);
|
||||
int bw = GATKVariantContextUtils.findNumberOfRepetitions(ruValM.getBytes(), readBases.substring(0, offset + 1).getBytes(), false);
|
||||
Assert.assertEquals(Math.min(fw+bw,RAC.MAX_REPEAT_LENGTH),(int)Integer.valueOf(rlValM));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -347,9 +347,9 @@ public class GATKVariantContextUtils {
|
|||
|
||||
final int[] repetitionCount = new int[2];
|
||||
// look for repetitions forward on the ref bases (i.e. starting at beginning of ref bases)
|
||||
int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases, true);
|
||||
repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext), true)-repetitionsInRef;
|
||||
repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext), true)-repetitionsInRef;
|
||||
int repetitionsInRef = findNumberOfRepetitions(repeatUnit, refBases, true);
|
||||
repetitionCount[0] = findNumberOfRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext), true)-repetitionsInRef;
|
||||
repetitionCount[1] = findNumberOfRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext), true)-repetitionsInRef;
|
||||
|
||||
return new Pair<>(repetitionCount, repeatUnit);
|
||||
|
||||
|
|
@ -393,7 +393,7 @@ public class GATKVariantContextUtils {
|
|||
* @oaram lookForward Look for repetitions forward (at beginning of string) or backward (at end of string)
|
||||
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
|
||||
*/
|
||||
public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString, boolean lookForward) {
|
||||
public static int findNumberOfRepetitions(byte[] repeatUnit, byte[] testString, boolean lookForward) {
|
||||
int numRepeats = 0;
|
||||
if (lookForward) {
|
||||
// look forward on the test string
|
||||
|
|
@ -891,7 +891,7 @@ public class GATKVariantContextUtils {
|
|||
final String name = first.getSource();
|
||||
final Allele refAllele = determineReferenceAllele(VCs);
|
||||
|
||||
final Set<Allele> alleles = new LinkedHashSet<>();
|
||||
final LinkedHashSet<Allele> alleles = new LinkedHashSet<>();
|
||||
final Set<String> filters = new HashSet<>();
|
||||
final Map<String, Object> attributes = new LinkedHashMap<>();
|
||||
final Set<String> inconsistentAttributes = new HashSet<>();
|
||||
|
|
@ -1159,7 +1159,7 @@ public class GATKVariantContextUtils {
|
|||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID).alleles(allelesList)
|
||||
.chr(loc.getContig()).start(loc.getStart()).computeEndFromAlleles(allelesList, loc.getStart(), loc.getStart())
|
||||
.genotypes(genotypes).unfiltered().attributes(new TreeMap<>(attributes)).log10PError(CommonInfo.NO_LOG10_PERROR); // we will need to regenotype later
|
||||
.genotypes(genotypes).unfiltered().attributes(new TreeMap<>(attributes)).log10PError(CommonInfo.NO_LOG10_PERROR); // we will need to re-genotype later
|
||||
|
||||
return builder.make();
|
||||
}
|
||||
|
|
@ -1289,7 +1289,7 @@ public class GATKVariantContextUtils {
|
|||
return result;
|
||||
}
|
||||
|
||||
public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
|
||||
public static GenotypesContext stripPLsAndAD(final GenotypesContext genotypes) {
|
||||
final GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
||||
|
||||
for ( final Genotype g : genotypes ) {
|
||||
|
|
@ -1430,7 +1430,7 @@ public class GATKVariantContextUtils {
|
|||
return loc == null || loc.getStart() == vc.getStart();
|
||||
}
|
||||
|
||||
static private AlleleMapper resolveIncompatibleAlleles(final Allele refAllele, final VariantContext vc, final Set<Allele> allAlleles) {
|
||||
static private AlleleMapper resolveIncompatibleAlleles(final Allele refAllele, final VariantContext vc, final LinkedHashSet<Allele> allAlleles) {
|
||||
if ( refAllele.equals(vc.getReference()) )
|
||||
return new AlleleMapper(vc);
|
||||
else {
|
||||
|
|
@ -1606,7 +1606,7 @@ public class GATKVariantContextUtils {
|
|||
// create the index mapping, using the <ALT> allele whenever such a mapping doesn't exist
|
||||
for ( int i = 1; i < targetAlleles.size(); i++ ) {
|
||||
final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAlleles.get(i));
|
||||
indexMapping[i] = indexOfRemappedAllele == -1 ? indexOfGenericAlt: indexOfRemappedAllele;
|
||||
indexMapping[i] = indexOfRemappedAllele == -1 ? indexOfGenericAlt : indexOfRemappedAllele;
|
||||
}
|
||||
|
||||
return indexMapping;
|
||||
|
|
@ -1656,9 +1656,6 @@ public class GATKVariantContextUtils {
|
|||
if ( originalAD == null || indexesOfRelevantAlleles == null ) throw new IllegalArgumentException("The list of input AD values and alleles must not be null");
|
||||
|
||||
final int numADs = indexesOfRelevantAlleles.length;
|
||||
if ( numADs == originalAD.length )
|
||||
return originalAD;
|
||||
|
||||
final int[] newAD = new int[numADs];
|
||||
|
||||
for ( int i = 0; i < numADs; i++ ) {
|
||||
|
|
|
|||
|
|
@ -858,11 +858,11 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
Pair<List<Integer>,byte[]> result;
|
||||
byte[] refBytes = "TATCATCATCGGA".getBytes();
|
||||
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes(), true),4);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes(), true),0);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes(), true),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes(), true),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes(), true),2);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberOfRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes(), true),4);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberOfRepetitions("G".getBytes(), "ATGATGATGATG".getBytes(), true),0);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberOfRepetitions("T".getBytes(), "T".getBytes(), true),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberOfRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes(), true),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberOfRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes(), true),2);
|
||||
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1);
|
||||
|
|
|
|||
Loading…
Reference in New Issue