Intermediate fix for pool GL unit test: fix up artificial read pileup provider to give consistent data. b) Increase downsampling in pool integration tests with reference sample, and shorten MT tests so they don't last too long
This commit is contained in:
parent
2ae890155c
commit
5b9a1af7fe
|
|
@ -68,10 +68,10 @@ public class ErrorModel {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||||
if (refSampleVC.isIndel()) {
|
if (refSampleVC.isIndel()) {
|
||||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||||
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
|
||||||
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
||||||
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
|
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
|
||||||
}
|
}
|
||||||
|
|
@ -96,7 +96,8 @@ public class ErrorModel {
|
||||||
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
|
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
|
||||||
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
|
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
|
||||||
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
|
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
|
||||||
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
|
if (!haplotypeMap.isEmpty())
|
||||||
|
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
|
||||||
}
|
}
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
for (PileupElement refPileupElement : refSamplePileup) {
|
for (PileupElement refPileupElement : refSamplePileup) {
|
||||||
|
|
@ -108,7 +109,7 @@ public class ErrorModel {
|
||||||
if (DEBUG) System.out.println(m);
|
if (DEBUG) System.out.println(m);
|
||||||
isMatch |= m;
|
isMatch |= m;
|
||||||
}
|
}
|
||||||
if (refSampleVC.isIndel()) {
|
if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
|
||||||
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
|
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
|
||||||
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
|
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
|
||||||
if (!isInformativeElement(perAlleleLikelihoods))
|
if (!isInformativeElement(perAlleleLikelihoods))
|
||||||
|
|
@ -173,10 +174,10 @@ public class ErrorModel {
|
||||||
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
|
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
|
||||||
if (allele.isReference()) {
|
if (allele.isReference()) {
|
||||||
// for a ref allele, any base mismatch or new indel is a mismatch.
|
// for a ref allele, any base mismatch or new indel is a mismatch.
|
||||||
if(allele.getBases().length>0 )
|
if(allele.getBases().length>0)
|
||||||
// todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
|
// todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
|
||||||
// could clean up/simplify this when unpadding is removed
|
// could clean up/simplify this when unpadding is removed
|
||||||
return (pileupElement.getBase() == refBase);
|
return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
|
||||||
else
|
else
|
||||||
// either null allele to compare, or ref/alt lengths are different (indel by definition).
|
// either null allele to compare, or ref/alt lengths are different (indel by definition).
|
||||||
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
|
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
@ -48,7 +49,12 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
|
||||||
|
|
||||||
myAlleles = new ArrayList<Allele>(alleles);
|
myAlleles = new ArrayList<Allele>(alleles);
|
||||||
|
|
||||||
refByte = alleles.get(0).getBases()[0]; // by construction, first allele in list is always ref!
|
Allele refAllele = alleles.get(0);
|
||||||
|
//sanity check: by construction, first allele should ALWAYS be the reference alleles
|
||||||
|
if (!refAllele.isReference())
|
||||||
|
throw new ReviewedStingException("BUG: First allele in list passed to PoolSNPGenotypeLikelihoods should be reference!");
|
||||||
|
|
||||||
|
refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref!
|
||||||
|
|
||||||
if (myAlleles.size() < BaseUtils.BASES.length) {
|
if (myAlleles.size() < BaseUtils.BASES.length) {
|
||||||
// likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,
|
// likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,
|
||||||
|
|
|
||||||
|
|
@ -18,27 +18,27 @@ public class PoolCallerIntegrationTest extends WalkerTest {
|
||||||
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
|
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
|
||||||
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
|
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
|
||||||
final String REFSAMPLE_NAME = "NA12878";
|
final String REFSAMPLE_NAME = "NA12878";
|
||||||
final String MTINTERVALS = "MT";
|
final String MTINTERVALS = "MT:1-3000";
|
||||||
final String LSVINTERVALS = "20:40,000,000-41,000,000";
|
final String LSVINTERVALS = "20:40,000,000-41,000,000";
|
||||||
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
|
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
|
||||||
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
|
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
|
||||||
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
|
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
|
||||||
private void PC_MT_Test(String bam, String args, String name, String md5) {
|
private void PC_MT_Test(String bam, String args, String name, String md5) {
|
||||||
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
|
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
|
||||||
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
|
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void PC_LSV_Test(String args, String name, String model, String md5) {
|
private void PC_LSV_Test(String args, String name, String model, String md5) {
|
||||||
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
|
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
|
||||||
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
|
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
|
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
|
||||||
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL",
|
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL",
|
||||||
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
|
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||||
|
|
@ -46,33 +46,33 @@ public class PoolCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBOTH_GGA_Pools() {
|
public void testBOTH_GGA_Pools() {
|
||||||
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
|
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","d8cba4ec4267d7d766081fcead845d08");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testINDEL_GGA_Pools() {
|
public void testINDEL_GGA_Pools() {
|
||||||
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
|
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","8e9b7e89c439b430e95b146a7540c72e");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() {
|
public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() {
|
||||||
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706");
|
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","96087fe9240e3656cc2a4e0ff0174d5b");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() {
|
public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() {
|
||||||
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29");
|
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","6fdae7093831ecfc82a06dd707d62fe9");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMT_SNP_DISCOVERY_sp4() {
|
public void testMT_SNP_DISCOVERY_sp4() {
|
||||||
PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5");
|
PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMT_SNP_GGA_sp10() {
|
public void testMT_SNP_GGA_sp10() {
|
||||||
|
|
||||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323");
|
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -392,8 +392,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
final byte refByte = readPileupTestProvider.getRefByte();
|
final byte refByte = readPileupTestProvider.getRefByte();
|
||||||
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
||||||
|
|
||||||
final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte);
|
|
||||||
final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte);
|
|
||||||
|
|
||||||
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
|
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
|
||||||
final Set<String> laneIDs = new TreeSet<String>();
|
final Set<String> laneIDs = new TreeSet<String>();
|
||||||
|
|
@ -411,17 +409,28 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
for (String laneID : laneIDs)
|
for (String laneID : laneIDs)
|
||||||
noisyErrorModels.put(laneID, Q30ErrorModel);
|
noisyErrorModels.put(laneID, Q30ErrorModel);
|
||||||
|
|
||||||
|
// all first ref allele
|
||||||
|
allAlleles.add(Allele.create(refByte,true));
|
||||||
for (byte b: BaseUtils.BASES) {
|
for (byte b: BaseUtils.BASES) {
|
||||||
if (refByte == b)
|
if (refByte != b)
|
||||||
allAlleles.add(Allele.create(b,true));
|
|
||||||
else
|
|
||||||
allAlleles.add(Allele.create(b, false));
|
allAlleles.add(Allele.create(b, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final int refIdx = 0;
|
||||||
|
int altIdx = -1;
|
||||||
|
|
||||||
|
for (int k=0; k < allAlleles.size(); k++)
|
||||||
|
if (altByte == allAlleles.get(k).getBases()[0]) {
|
||||||
|
altIdx = k;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
PrintStream out = null;
|
PrintStream out = null;
|
||||||
if (SIMULATE_NOISY_PILEUP) {
|
if (SIMULATE_NOISY_PILEUP) {
|
||||||
try {
|
try {
|
||||||
out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table"));
|
out = new PrintStream(new File("GLUnitTest.table"));
|
||||||
// out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
|
// out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
|
||||||
}
|
}
|
||||||
catch (Exception e) {}
|
catch (Exception e) {}
|
||||||
|
|
|
||||||
|
|
@ -62,9 +62,9 @@ public class ArtificialReadPileupTestProvider {
|
||||||
List<String> sampleNames = new ArrayList<String>();
|
List<String> sampleNames = new ArrayList<String>();
|
||||||
private String sampleName(int i) { return sampleNames.get(i); }
|
private String sampleName(int i) { return sampleNames.get(i); }
|
||||||
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
|
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
|
||||||
public final int offset = 5;
|
public final int locStart = 5; // 1-based
|
||||||
public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||||
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
|
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,locStart,locStart); //1-based
|
||||||
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
|
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
|
||||||
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
|
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
|
||||||
|
|
||||||
|
|
@ -103,22 +103,22 @@ public class ArtificialReadPileupTestProvider {
|
||||||
boolean addBaseErrors, int phredScaledBaseErrorRate) {
|
boolean addBaseErrors, int phredScaledBaseErrorRate) {
|
||||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||||
|
|
||||||
|
String refStr = new String(new byte[]{referenceContext.getBase()});
|
||||||
ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
|
ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
|
||||||
Allele refAllele, altAllele;
|
Allele refAllele, altAllele;
|
||||||
if (eventLength == 0) {// SNP case
|
if (eventLength == 0) {// SNP case
|
||||||
refAllele =Allele.create(referenceContext.getBase(),true);
|
refAllele =Allele.create(refStr,true);
|
||||||
altAllele = Allele.create(altBases.substring(0,1), false);
|
altAllele = Allele.create(altBases.substring(0,1), false);
|
||||||
|
|
||||||
} else if (eventLength>0){
|
} else if (eventLength>0){
|
||||||
// insertion
|
// insertion
|
||||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
refAllele = Allele.create(refStr, true);
|
||||||
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
altAllele = Allele.create(refStr+altBases.substring(0,eventLength), false);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// deletion
|
// deletion
|
||||||
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
|
refAllele =Allele.create(refBases.substring(locStart-1,locStart+Math.abs(eventLength)-1),true);
|
||||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
altAllele = Allele.create(refBases.substring(locStart-1,locStart), false);
|
||||||
}
|
}
|
||||||
int stop = loc.getStart();
|
int stop = loc.getStart();
|
||||||
vcAlleles.add(refAllele);
|
vcAlleles.add(refAllele);
|
||||||
|
|
@ -153,18 +153,15 @@ public class ArtificialReadPileupTestProvider {
|
||||||
int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
|
int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
|
||||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||||
int readStart = contigStart;
|
int readStart = contigStart;
|
||||||
int offset = (contigStop-contigStart+1)/2;
|
|
||||||
int refAlleleLength = 0;
|
int refAlleleLength = vc.getReference().getBases().length;
|
||||||
int readCounter = 0;
|
int readCounter = 0;
|
||||||
int alleleCounter = 0;
|
int alleleCounter = 0;
|
||||||
for (Allele allele: vc.getAlleles()) {
|
for (Allele allele: vc.getAlleles()) {
|
||||||
if (allele.isReference())
|
|
||||||
refAlleleLength = allele.getBases().length;
|
|
||||||
|
|
||||||
int alleleLength = allele.getBases().length;
|
int alleleLength = allele.getBases().length;
|
||||||
|
|
||||||
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
|
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
|
||||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
byte[] readBases = trueHaplotype(allele, locStart, vc.getReference());
|
||||||
if (addErrors)
|
if (addErrors)
|
||||||
addBaseErrors(readBases, phredScaledErrorRate);
|
addBaseErrors(readBases, phredScaledErrorRate);
|
||||||
|
|
||||||
|
|
@ -176,20 +173,20 @@ public class ArtificialReadPileupTestProvider {
|
||||||
read.setReadBases(readBases);
|
read.setReadBases(readBases);
|
||||||
read.setReadName(artificialReadName+readCounter++);
|
read.setReadName(artificialReadName+readCounter++);
|
||||||
|
|
||||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
boolean isBeforeDeletion = alleleLength<refAlleleLength;
|
||||||
|
boolean isBeforeInsertion = alleleLength>refAlleleLength;
|
||||||
|
|
||||||
|
int eventLength = alleleLength - refAlleleLength;
|
||||||
if (allele.isReference())
|
if (allele.isReference())
|
||||||
read.setCigarString(readBases.length + "M");
|
read.setCigarString(readBases.length + "M");
|
||||||
else {
|
else {
|
||||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
|
||||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
|
||||||
if (isBeforeDeletion || isBeforeInsertion)
|
if (isBeforeDeletion || isBeforeInsertion)
|
||||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
read.setCigarString(locStart+"M"+ eventLength + (isBeforeDeletion?"D":"I") +
|
||||||
(readBases.length-offset)+"M");
|
(readBases.length-locStart)+"M");
|
||||||
else // SNP case
|
else // SNP case
|
||||||
read.setCigarString(readBases.length+"M");
|
read.setCigarString(readBases.length+"M");
|
||||||
}
|
}
|
||||||
|
|
||||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
|
||||||
read.setReadPairedFlag(false);
|
read.setReadPairedFlag(false);
|
||||||
read.setAlignmentStart(readStart);
|
read.setAlignmentStart(readStart);
|
||||||
read.setMappingQuality(artificialMappingQuality);
|
read.setMappingQuality(artificialMappingQuality);
|
||||||
|
|
@ -198,7 +195,7 @@ public class ArtificialReadPileupTestProvider {
|
||||||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||||
|
|
||||||
|
|
||||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
pileupElements.add(new PileupElement(read,locStart-1,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength-1),eventLength));
|
||||||
}
|
}
|
||||||
alleleCounter++;
|
alleleCounter++;
|
||||||
}
|
}
|
||||||
|
|
@ -206,11 +203,22 @@ public class ArtificialReadPileupTestProvider {
|
||||||
return new ReadBackedPileupImpl(loc,pileupElements);
|
return new ReadBackedPileupImpl(loc,pileupElements);
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
|
/**
|
||||||
|
* create haplotype based on a particular allele
|
||||||
|
* @param allele Allele of interest. ASSUMED TO INCLUDE REF BASE AT startPosition!
|
||||||
|
* @param startPosition 1-based start position of allele
|
||||||
|
* @param refAllele REF allele
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private byte[] trueHaplotype(Allele allele, int startPosition, Allele refAllele) {
|
||||||
|
|
||||||
// create haplotype based on a particular allele
|
// create haplotype based on a particular allele
|
||||||
String prefix = refBases.substring(offset);
|
// startPosition is 1-based.
|
||||||
|
// so, if startPosition == 5, we need to include positions 1 to 4 , or indeces 0 to 3 of string
|
||||||
|
String prefix = refBases.substring(0,startPosition-1);
|
||||||
String alleleBases = new String(allele.getBases());
|
String alleleBases = new String(allele.getBases());
|
||||||
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
// where to start postfix? We have (startPosition-1) prefix bases + refAllele.length bases before postfix
|
||||||
|
String postfix = refBases.substring(startPosition -1 + refAllele.getBases().length,refBases.length());
|
||||||
|
|
||||||
return (prefix+alleleBases+postfix).getBytes();
|
return (prefix+alleleBases+postfix).getBytes();
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue