extended SimulateReadsForVariants to optionally use the AF field to indicate allele fraction of the simulated event, useful in cancer and other variable ploidy use cases
This commit is contained in:
parent
34ece31f4a
commit
7115cadbd8
|
|
@ -124,6 +124,11 @@ public class SimulateReadsForVariants extends RodWalker<Integer, Integer> {
|
|||
*/
|
||||
@Argument(fullName="readLength", shortName="RL", doc="Read lengths (bp)", required=false, minValue = 1, maxValue = Integer.MAX_VALUE)
|
||||
public int readLength = 101;
|
||||
/**
|
||||
* Use this argument to simulate events at a non-50/50 allele fraction represented in the VCF by AF (used for somatic event simulation)
|
||||
*/
|
||||
@Argument(fullName="useAFAsAlleleFraction", shortName="AF", doc="Use AF in VCF as event allele fraction ", required=false)
|
||||
public boolean useAFAsAlleleFraction = false;
|
||||
/**
|
||||
* The corresponding platform identifier will be specified in the simulated read group PL tag. This setting does not
|
||||
* affect the properties of the simulated reads.
|
||||
|
|
@ -237,7 +242,7 @@ public class SimulateReadsForVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
if ( verbose ) logger.info(String.format("Generating reads for %s", vc));
|
||||
|
||||
generateReadsForVariant(vc, ref);
|
||||
generateReadsForVariant(vc, ref, useAFAsAlleleFraction);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -289,19 +294,21 @@ public class SimulateReadsForVariants extends RodWalker<Integer, Integer> {
|
|||
*
|
||||
* @param vc the (bi-allelic) variant context for which to generate artificial reads
|
||||
* @param ref the original reference context
|
||||
* @param useAFAsAlleleFraction use AF tag to indicate allele fraction
|
||||
*/
|
||||
private void generateReadsForVariant(final VariantContext vc, final ReferenceContext ref) {
|
||||
private void generateReadsForVariant(final VariantContext vc, final ReferenceContext ref, boolean useAFAsAlleleFraction) {
|
||||
|
||||
final int refLength = vc.getReference().getBases().length;
|
||||
final ArtificialHaplotype refHap = constructHaplotype(vc.getReference(), refLength, ref);
|
||||
final ArtificialHaplotype altHap = constructHaplotype(vc.getAlternateAllele(0), refLength, ref);
|
||||
|
||||
int gi = 0;
|
||||
double refAlleleFraction = (useAFAsAlleleFraction)?1-vc.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0.5):0.5f;
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final int myDepth = sampleDepth();
|
||||
for ( int d = 0; d < myDepth; d++ ) {
|
||||
|
||||
final ArtificialHaplotype haplotype = chooseRefHaplotype(g) ? refHap : altHap;
|
||||
final ArtificialHaplotype haplotype = chooseRefHaplotype(g, refAlleleFraction) ? refHap : altHap;
|
||||
final byte[] readBases = Arrays.copyOf(haplotype.bases, readLength);
|
||||
|
||||
addMachineErrors(readBases, errorRate);
|
||||
|
|
@ -314,12 +321,14 @@ public class SimulateReadsForVariants extends RodWalker<Integer, Integer> {
|
|||
* Decides whether or not to choose the reference haplotype, depending on the given genotype
|
||||
*
|
||||
* @param g the genotype of the given sample
|
||||
* @param pReferenceGivenHet probability of choosing reference for hets
|
||||
*
|
||||
* @return true if one should use the reference haplotype, false otherwise
|
||||
*/
|
||||
private boolean chooseRefHaplotype(final Genotype g) {
|
||||
private boolean chooseRefHaplotype(final Genotype g, final double pReferenceGivenHet) {
|
||||
final double refP;
|
||||
if ( g.isHomRef() ) refP = 1;
|
||||
else if ( g.isHet() ) refP = 0.5;
|
||||
else if ( g.isHet() ) refP = pReferenceGivenHet;
|
||||
else refP = 0.0;
|
||||
|
||||
return ran.nextDouble() < refP;
|
||||
|
|
|
|||
|
|
@ -92,4 +92,17 @@ public class SimulateReadsForVariantsIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("26db391f223ead74d786006a502029d8"));
|
||||
executeTest("testPlatformTag", spec);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testAlleleFraction() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SimulateReadsForVariants --no_pg_tag --useAFAsAlleleFraction -DP 100 -R " + b37KGReference + " -V " + publicTestDir + "forAlleleFractionSimulation.vcf -o %s",
|
||||
1,
|
||||
Arrays.asList("3425c807525dff71310d1517e00a4f7e"));
|
||||
executeTest("testAlleleFraction", spec);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
##fileformat=VCFv4.1
|
||||
##phasing=none
|
||||
##INDIVIDUAL=TRUTH
|
||||
##SAMPLE=<ID=TRUTH,Individual="TRUTH",Description="bamsurgeon spike-in">
|
||||
##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Somatic mutation in primary">
|
||||
##INFO=<ID=AF,Number=1,Type=Float,Description="Variant Allele Frequency">
|
||||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
|
||||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT TUMOR
|
||||
20 100000 . A T 100 PASS SOMATIC;AF=0.500 GT 0/1
|
||||
20 101000 . T C 100 PASS SOMATIC;AF=0.400 GT 0/1
|
||||
20 102000 . A C 100 PASS SOMATIC;AF=0.300 GT 0/1
|
||||
20 103000 . C T 100 PASS SOMATIC;AF=0.200 GT 0/1
|
||||
20 104000 . A C 100 PASS SOMATIC;AF=0.100 GT 0/1
|
||||
20 105000 . A C 100 PASS SOMATIC;AF=0.050 GT 0/1
|
||||
20 105500 . A G 100 PASS SOMATIC;AF=0.020 GT 0/1
|
||||
20 106000 . CAT C 100 PASS SOMATIC;AF=0.500 GT 0/1
|
||||
20 107000 . ACT A 100 PASS SOMATIC;AF=0.400 GT 0/1
|
||||
20 107994 . AAG A 100 PASS SOMATIC;AF=0.300 GT 0/1
|
||||
20 108999 . GCA G 100 PASS SOMATIC;AF=0.200 GT 0/1
|
||||
20 110009 . CAT C 100 PASS SOMATIC;AF=0.100 GT 0/1
|
||||
20 112000 . ACC A 100 PASS SOMATIC;AF=0.050 GT 0/1
|
||||
20 112500 . AGC A 100 PASS SOMATIC;AF=0.020 GT 0/1
|
||||
20 113000 . G GAAT 100 PASS SOMATIC;AF=0.500 GT 0/1
|
||||
20 113999 . G GTGA 100 PASS SOMATIC;AF=0.400 GT 0/1
|
||||
20 115000 . T TCCA 100 PASS SOMATIC;AF=0.300 GT 0/1
|
||||
20 116000 . G GTTT 100 PASS SOMATIC;AF=0.200 GT 0/1
|
||||
20 117000 . T TCCA 100 PASS SOMATIC;AF=0.100 GT 0/1
|
||||
20 118000 . C CTGT 100 PASS SOMATIC;AF=0.050 GT 0/1
|
||||
20 119000 . C CTGT 100 PASS SOMATIC;AF=0.020 GT 0/1
|
||||
Binary file not shown.
Loading…
Reference in New Issue