-Misc improvements to VCF code

-Small fix to callset concordance


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2497 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-04 02:28:47 +00:00
parent 29c129aced
commit ed2fff13aa
6 changed files with 44 additions and 17 deletions

View File

@ -25,7 +25,8 @@ public class NWayVenn implements ConcordanceType {
TreeSet<String> concordantSamples = new TreeSet<String>(); TreeSet<String> concordantSamples = new TreeSet<String>();
for ( Entry<String, Genotype> entry : samplesToRecords.entrySet() ) { for ( Entry<String, Genotype> entry : samplesToRecords.entrySet() ) {
concordantSamples.add(entry.getKey()); if ( !entry.getValue().isNoCall() )
concordantSamples.add(entry.getKey());
} }
StringBuffer tag = new StringBuffer(); StringBuffer tag = new StringBuffer();

View File

@ -34,7 +34,11 @@ public class SNPGenotypeConcordance implements ConcordanceType {
char refBase = ref.getBase(); char refBase = ref.getBase();
Genotype call1 = samplesToRecords.get(sample1); Genotype call1 = samplesToRecords.get(sample1);
if ( call1 != null && call1.isNoCall() )
call1 = null;
Genotype call2 = samplesToRecords.get(sample2); Genotype call2 = samplesToRecords.get(sample2);
if ( call2 != null && call2.isNoCall() )
call2 = null;
if ( call1 == null || call2 == null ) { if ( call1 == null || call2 == null ) {
if ( call1 != null && call1.isPointGenotype() && call1.isVariant(refBase) ) { if ( call1 != null && call1.isPointGenotype() && call1.isVariant(refBase) ) {

View File

@ -29,7 +29,11 @@ public class SimpleVenn implements ConcordanceType {
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) { public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) {
Genotype call1 = samplesToRecords.get(sample1); Genotype call1 = samplesToRecords.get(sample1);
if ( call1 != null && call1.isNoCall() )
call1 = null;
Genotype call2 = samplesToRecords.get(sample2); Genotype call2 = samplesToRecords.get(sample2);
if ( call2 != null && call2.isNoCall() )
call2 = null;
if ( call1 == null && call2 == null ) if ( call1 == null && call2 == null )
return null; return null;

View File

@ -247,19 +247,23 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) { public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) {
StringBuilder builder = new StringBuilder(); StringBuilder builder = new StringBuilder();
builder.append(toGenotypeString(altAlleles)); builder.append(toGenotypeString(altAlleles));
for ( String field : genotypeFormatStrings ) {
String value = mFields.get(field); if ( !isEmptyGenotype() ) {
if ( value == null && field.equals(OLD_DEPTH_KEY) ) for ( String field : genotypeFormatStrings ) {
String value = mFields.get(field);
if ( value == null && field.equals(OLD_DEPTH_KEY) )
value = mFields.get(DEPTH_KEY); value = mFields.get(DEPTH_KEY);
if ( value == null ) if ( value == null )
continue; continue;
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
if (value.equals("")) if (value.equals(""))
builder.append(getMissingFieldValue(field)); builder.append(getMissingFieldValue(field));
else else
builder.append(value); builder.append(value);
}
} }
return builder.toString(); return builder.toString();
} }

View File

@ -139,6 +139,8 @@ public class VCFUtils {
int SLODsSeen = 0; int SLODsSeen = 0;
double totalFreq = 0.0; double totalFreq = 0.0;
int freqsSeen = 0; int freqsSeen = 0;
String id = null;
String filter = null;
for ( RodVCF rod : rods ) { for ( RodVCF rod : rods ) {
List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords(); List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords();
@ -158,7 +160,7 @@ public class VCFUtils {
if ( confidence > maxConfidence ) if ( confidence > maxConfidence )
maxConfidence = confidence; maxConfidence = confidence;
if ( rod.hasNonRefAlleleFrequency() ) { if ( !rod.isReference() && rod.hasNonRefAlleleFrequency() ) {
totalFreq += rod.getNonRefAlleleFrequency(); totalFreq += rod.getNonRefAlleleFrequency();
freqsSeen++; freqsSeen++;
} }
@ -167,6 +169,12 @@ public class VCFUtils {
totalSLOD += rod.getStrandBias(); totalSLOD += rod.getStrandBias();
SLODsSeen++; SLODsSeen++;
} }
if ( rod.getID() != null )
id = rod.getID();
if ( rod.hasFilteringCodes() )
filter = rod.getFilterString();
} }
Map<String, String> infoFields = new HashMap<String, String>(); Map<String, String> infoFields = new HashMap<String, String>();
@ -178,16 +186,14 @@ public class VCFUtils {
infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen))); infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen)));
if ( freqsSeen > 0 ) if ( freqsSeen > 0 )
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen))); infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
// TODO -- "." and "0" are wrong -- need to use values from the records
return new VCFRecord(params.getReferenceBase(), return new VCFRecord(params.getReferenceBase(),
params.getContig(), params.getContig(),
params.getPosition(), params.getPosition(),
".", (id != null ? id : "."),
params.getAlternateBases(), params.getAlternateBases(),
maxConfidence, maxConfidence,
"0", (filter != null ? filter : "."),
infoFields, infoFields,
params.getFormatString(), params.getFormatString(),
params.getGenotypesRecords()); params.getGenotypesRecords());

View File

@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testNWayVenn() { public void testNWayVenn() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1, baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
Arrays.asList("0527ea8ec7de3a144bd0a56db80d62ba")); Arrays.asList("86d2342fabc8c0916a6d42a29f750ea2"));
executeTest("testNWayVenn", spec); executeTest("testNWayVenn", spec);
} }
@ -41,4 +41,12 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
Arrays.asList("6fbe00cb68d2cdc59dfcb79024fd9893")); Arrays.asList("6fbe00cb68d2cdc59dfcb79024fd9893"));
executeTest("testMulti", spec); executeTest("testMulti", spec);
} }
@Test
public void testComplex() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "complexExample.vcf -B set2,VCF," + validationDataLocation + "complexExample.vcf -CT NWayVenn", 1,
Arrays.asList("8b72e557c0dd111738eaa69e9003fb3f"));
executeTest("testComplex", spec);
}
} }