Sequenom to VCF now allows user to specify filters for QC, and they will appear in the filter field of the output VCF
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2577 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f96b2b211e
commit
424d1b57f7
|
|
@ -33,6 +33,12 @@ public class SequenomToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
public File popFile = null;
|
||||
@Argument(fullName="useb36ContigNames",shortName="b36contig",doc="Uses b36 contig names (1:1,000,000) rather than hg18 (chr1:1,000,000) for comparison with ref", required=false)
|
||||
public boolean useb36contigs=false;
|
||||
@Argument(fullName="maxHardy", doc="Maximum Hardy-Weinberg score to consider an assay valid", required=false)
|
||||
public double maxHardy = 10;
|
||||
@Argument(fullName="maxNoCall", doc="Maximum no-call rate (as a proportion) to consider an assay valid", required=false)
|
||||
public double maxNoCall = 0.05;
|
||||
@Argument(fullName="maxHomNonref", doc="Maximum homozygous-nonreference rate (as a proportion) to consider an assay valid", required = false)
|
||||
public double maxHomNonref = 1.1;
|
||||
|
||||
private final int INIT_NUMBER_OF_POPULATIONS = 10;
|
||||
private final int DEFAULT_QUALITY = 20;
|
||||
|
|
@ -136,41 +142,52 @@ public class SequenomToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
sampleNumber++;
|
||||
}
|
||||
|
||||
double noCallProp = ( (double) numNoCalls )/( (double) sampleNames.size());
|
||||
double homNonRProp = ( (double) numHomNonrefCalls )/( (double) sampleNames.size() - numNoCalls);
|
||||
|
||||
record.setQual(DEFAULT_QUALITY);
|
||||
record.addInfoFields(generateInfoField(record, numNoCalls,numHomNonrefCalls,numNonrefAlleles,ref, varInfo));
|
||||
String hw = hardyWeinbergCalculation(ref,record);
|
||||
double hwScore = hw != null ? Double.valueOf(hw) : -0.0;
|
||||
record.addInfoFields(generateInfoField(record, numNoCalls,numHomNonrefCalls,numNonrefAlleles,ref, varInfo, hwScore));
|
||||
if ( hwScore > maxHardy ) {
|
||||
record.setFilterString("Hardy-Weinberg");
|
||||
} else if ( noCallProp > maxNoCall ) {
|
||||
record.setFilterString("No-calls");
|
||||
} else if ( homNonRProp > maxHomNonref) {
|
||||
record.setFilterString("HomNonref-calls");
|
||||
}
|
||||
|
||||
return record;
|
||||
|
||||
}
|
||||
|
||||
private String hardyWeinbergCalculation(ReferenceContext ref, VCFRecord rec) {
|
||||
if ( useSmartHardy ) {
|
||||
return smartHardy(ref, rec);
|
||||
} else {
|
||||
VCFVariationCall variant = new VCFVariationCall(ref.getBase(),ref.getLocus(),VCFVariationCall.VARIANT_TYPE.SNP);
|
||||
variant.setGenotypeCalls(rec.getGenotypes());
|
||||
return HWCalc.annotate(null, ref, null, variant);
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String,String> generateInfoField(VCFRecord rec, int nocall, int homnonref, int allnonref,
|
||||
ReferenceContext ref, SequenomVariantInfo info) {
|
||||
ReferenceContext ref, SequenomVariantInfo info, double hwScore) {
|
||||
double propNoCall = ( ( double ) nocall / (double) nSamples );
|
||||
double propHomNR = ( (double) homnonref / (double) nSamples );
|
||||
String hardy;
|
||||
|
||||
VCFVariationCall variant = new VCFVariationCall(ref.getBase(),ref.getLocus(),VCFVariationCall.VARIANT_TYPE.SNP);
|
||||
variant.setGenotypeCalls(rec.getGenotypes());
|
||||
|
||||
if ( useSmartHardy ) {
|
||||
hardy = smartHardy(ref, rec);
|
||||
} else {
|
||||
hardy = HWCalc.annotate(null, ref, null, variant);
|
||||
}
|
||||
|
||||
|
||||
HashMap<String,String> infoMap = new HashMap<String,String>(1);
|
||||
putInfoStrings(infoMap,propNoCall,propHomNR,allnonref,hardy,info.getName());
|
||||
putInfoStrings(infoMap,propNoCall,propHomNR,allnonref,hwScore,info.getName());
|
||||
|
||||
return infoMap;
|
||||
}
|
||||
|
||||
private void putInfoStrings(HashMap<String,String> infoMap, double pnc, double phnr, int nra, String hw, String nm) {
|
||||
private void putInfoStrings(HashMap<String,String> infoMap, double pnc, double phnr, int nra, double hw, String nm) {
|
||||
|
||||
infoMap.put("snpID",nm);
|
||||
infoMap.put("noCallPct",String.format("%.2f",100.0*pnc));
|
||||
infoMap.put("homNonrefPct",String.format("%.2f",100.0*phnr));
|
||||
infoMap.put("nonrefAlleles",String.format("%d",nra));
|
||||
infoMap.put("HW",hw);
|
||||
infoMap.put("HW",String.format("%.2f",hw));
|
||||
|
||||
//return String.format("snpID=%s;nocall=%f;homNonref=%4f;numNonrefAlleles=%d;HW=%s",nm,pnc,phnr,nra,hw);
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ import java.util.List;
|
|||
public class SequenomToVCFIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void testSequenomToVCFWithoutSmartHardy() {
|
||||
String testMD5 = "c6c2055d304674c2f7014c50f5cc160e";
|
||||
String testMD5 = "a16ce402ce4492c8c0552073c0a8bdf5";
|
||||
String testPedFile = "/humgen/gsa-hpprojects/GATK/data/Validation_Data/Sequenom_Test_File.txt";
|
||||
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -L 1:1000000-2000000 "+
|
||||
"-T SequenomToVCF -b36contig -ns 10 -sPed "+testPedFile+" -vcf %s";
|
||||
|
|
|
|||
Loading…
Reference in New Issue