Fixing/cleaning up the vcf merge util
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3047 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cdec84aa8f
commit
c88a2a3027
|
|
@ -90,20 +90,6 @@ public class RodVCF extends BasicReferenceOrderedDatum implements Variation, Ite
|
|||
return mCurrentRecord.getNonRefAlleleFrequency();
|
||||
}
|
||||
|
||||
public boolean hasStrandBias() {
|
||||
assertNotNull();
|
||||
return this.mCurrentRecord.getInfoValues().containsKey(VCFRecord.STRAND_BIAS_KEY);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the strand bias of this variant
|
||||
*
|
||||
* @return StrandBias with the stored slod
|
||||
*/
|
||||
public double getStrandBias() {
|
||||
return hasStrandBias() ? Double.valueOf(this.mCurrentRecord.getInfoValues().get(VCFRecord.STRAND_BIAS_KEY)) : 0.0;
|
||||
}
|
||||
|
||||
/** @return the VARIANT_TYPE of the current variant */
|
||||
public Variation.VARIANT_TYPE getType() {
|
||||
assertNotNull();
|
||||
|
|
|
|||
|
|
@ -69,14 +69,10 @@ public class VCFUtils {
|
|||
VCFParameters params = new VCFParameters();
|
||||
params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY);
|
||||
|
||||
// keep track of the locus specific data so we can merge them intelligently
|
||||
int totalReadDepth = 0;
|
||||
// keep track of the data so we can merge them intelligently
|
||||
double maxConfidence = 0.0;
|
||||
double totalSLOD = 0.0;
|
||||
int SLODsSeen = 0;
|
||||
double totalFreq = 0.0;
|
||||
int freqsSeen = 0;
|
||||
String id = null;
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
List<String> filters = new ArrayList<String>();
|
||||
|
||||
for ( RodVCF rod : rods ) {
|
||||
|
|
@ -87,9 +83,6 @@ public class VCFUtils {
|
|||
if ( params.getPosition() < 1 )
|
||||
params.setLocations(rod.getLocation(), call.getReference());
|
||||
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord));
|
||||
int depth = call.getReadCount();
|
||||
if ( depth > 0 )
|
||||
totalReadDepth += call.getReadCount();
|
||||
}
|
||||
|
||||
// set the overall confidence to be the max entry we see
|
||||
|
|
@ -97,32 +90,16 @@ public class VCFUtils {
|
|||
if ( confidence > maxConfidence )
|
||||
maxConfidence = confidence;
|
||||
|
||||
if ( !rod.isReference() && rod.hasNonRefAlleleFrequency() ) {
|
||||
totalFreq += rod.getNonRefAlleleFrequency();
|
||||
freqsSeen++;
|
||||
}
|
||||
|
||||
if ( rod.hasStrandBias() ) {
|
||||
totalSLOD += rod.getStrandBias();
|
||||
SLODsSeen++;
|
||||
}
|
||||
|
||||
if ( rod.getID() != null )
|
||||
id = rod.getID();
|
||||
|
||||
if ( rod.isFiltered() )
|
||||
filters.add(rod.getFilterString());
|
||||
|
||||
// just take the last value we see for a given key
|
||||
infoFields.putAll(rod.getInfoValues());
|
||||
}
|
||||
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
infoFields.put(VCFRecord.DEPTH_KEY, String.format("%d", totalReadDepth));
|
||||
|
||||
// set the overall strand bias and allele frequency to be the average of all entries we've seen
|
||||
if ( SLODsSeen > 0 )
|
||||
infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen)));
|
||||
if ( freqsSeen > 0 )
|
||||
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
|
||||
|
||||
return new VCFRecord(params.getReferenceBases(),
|
||||
params.getContig(),
|
||||
params.getPosition(),
|
||||
|
|
@ -152,16 +129,10 @@ public class VCFUtils {
|
|||
}
|
||||
|
||||
VCFGenotypeRecord record = new VCFGenotypeRecord(gtype.getSampleName(), alleles, VCFGenotypeRecord.PHASE.UNPHASED);
|
||||
|
||||
// calculate the genotype quality and the read depth
|
||||
record.setField(VCFGenotypeRecord.DEPTH_KEY, String.valueOf(gtype.getReadCount()));
|
||||
params.addFormatItem(VCFGenotypeRecord.DEPTH_KEY);
|
||||
double qual = Math.min(10.0 * gtype.getNegLog10PError(), VCFGenotypeRecord.MAX_QUAL_VALUE);
|
||||
if ( qual >= 0 )
|
||||
record.setField(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, String.format("%.2f", qual));
|
||||
else
|
||||
record.setField(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, String.format("%d", VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY));
|
||||
params.addFormatItem(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY);
|
||||
for ( Map.Entry<String, String> entry : gtype.getFields().entrySet() ) {
|
||||
record.setField(entry.getKey(), entry.getValue());
|
||||
params.addFormatItem(entry.getKey());
|
||||
}
|
||||
|
||||
record.setVCFRecord(vcfrecord);
|
||||
return record;
|
||||
|
|
|
|||
|
|
@ -10,35 +10,35 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
|||
return "-T CallsetConcordance -R " + oneKGLocation + "reference/human_b36_both.fasta -L 1:1-8000 -CO %s";
|
||||
}
|
||||
|
||||
//@Test
|
||||
@Test
|
||||
public void testSimpleVenn() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn", 1,
|
||||
Arrays.asList("cdd027a0d7bbfae2ba75480fcaa14356"));
|
||||
Arrays.asList("d9124d2b0fb5bec5bc50c26a16b4e900"));
|
||||
executeTest("testSimpleVenn", spec);
|
||||
}
|
||||
|
||||
//@Test
|
||||
@Test
|
||||
public void testSNPConcordance() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
|
||||
Arrays.asList("310bf0824fc407b2e40cbbaea234471e"));
|
||||
Arrays.asList("df1fbc744947f316f65f51a21368b0e4"));
|
||||
executeTest("testSNPConcordance", spec);
|
||||
}
|
||||
|
||||
//@Test
|
||||
@Test
|
||||
public void testNWayVenn() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
|
||||
Arrays.asList("179adafae6efdc879fc22442ab9e599f"));
|
||||
Arrays.asList("aa835ae5368b35f376b844d7f8ef2976"));
|
||||
executeTest("testNWayVenn", spec);
|
||||
}
|
||||
|
||||
//@Test
|
||||
@Test
|
||||
public void testMulti() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn -CT NWayVenn -CT SNPGenotypeConcordance:qscore=5", 1,
|
||||
Arrays.asList("c8fe63633ef6ed2b6068958f06cddfe0"));
|
||||
Arrays.asList("c9ef68cc3b7dc08f1d2b49170e6560ab"));
|
||||
executeTest("testMulti", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue