-Misc improvements to VCF code
-Small fix to callset concordance git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2497 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
29c129aced
commit
ed2fff13aa
|
|
@ -25,7 +25,8 @@ public class NWayVenn implements ConcordanceType {
|
||||||
|
|
||||||
TreeSet<String> concordantSamples = new TreeSet<String>();
|
TreeSet<String> concordantSamples = new TreeSet<String>();
|
||||||
for ( Entry<String, Genotype> entry : samplesToRecords.entrySet() ) {
|
for ( Entry<String, Genotype> entry : samplesToRecords.entrySet() ) {
|
||||||
concordantSamples.add(entry.getKey());
|
if ( !entry.getValue().isNoCall() )
|
||||||
|
concordantSamples.add(entry.getKey());
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuffer tag = new StringBuffer();
|
StringBuffer tag = new StringBuffer();
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,11 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
||||||
char refBase = ref.getBase();
|
char refBase = ref.getBase();
|
||||||
|
|
||||||
Genotype call1 = samplesToRecords.get(sample1);
|
Genotype call1 = samplesToRecords.get(sample1);
|
||||||
|
if ( call1 != null && call1.isNoCall() )
|
||||||
|
call1 = null;
|
||||||
Genotype call2 = samplesToRecords.get(sample2);
|
Genotype call2 = samplesToRecords.get(sample2);
|
||||||
|
if ( call2 != null && call2.isNoCall() )
|
||||||
|
call2 = null;
|
||||||
|
|
||||||
if ( call1 == null || call2 == null ) {
|
if ( call1 == null || call2 == null ) {
|
||||||
if ( call1 != null && call1.isPointGenotype() && call1.isVariant(refBase) ) {
|
if ( call1 != null && call1.isPointGenotype() && call1.isVariant(refBase) ) {
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,11 @@ public class SimpleVenn implements ConcordanceType {
|
||||||
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) {
|
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) {
|
||||||
|
|
||||||
Genotype call1 = samplesToRecords.get(sample1);
|
Genotype call1 = samplesToRecords.get(sample1);
|
||||||
|
if ( call1 != null && call1.isNoCall() )
|
||||||
|
call1 = null;
|
||||||
Genotype call2 = samplesToRecords.get(sample2);
|
Genotype call2 = samplesToRecords.get(sample2);
|
||||||
|
if ( call2 != null && call2.isNoCall() )
|
||||||
|
call2 = null;
|
||||||
|
|
||||||
if ( call1 == null && call2 == null )
|
if ( call1 == null && call2 == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -247,19 +247,23 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
|
||||||
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) {
|
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) {
|
||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
builder.append(toGenotypeString(altAlleles));
|
builder.append(toGenotypeString(altAlleles));
|
||||||
for ( String field : genotypeFormatStrings ) {
|
|
||||||
String value = mFields.get(field);
|
if ( !isEmptyGenotype() ) {
|
||||||
if ( value == null && field.equals(OLD_DEPTH_KEY) )
|
for ( String field : genotypeFormatStrings ) {
|
||||||
|
String value = mFields.get(field);
|
||||||
|
if ( value == null && field.equals(OLD_DEPTH_KEY) )
|
||||||
value = mFields.get(DEPTH_KEY);
|
value = mFields.get(DEPTH_KEY);
|
||||||
if ( value == null )
|
if ( value == null )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
|
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
|
||||||
if (value.equals(""))
|
if (value.equals(""))
|
||||||
builder.append(getMissingFieldValue(field));
|
builder.append(getMissingFieldValue(field));
|
||||||
else
|
else
|
||||||
builder.append(value);
|
builder.append(value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -139,6 +139,8 @@ public class VCFUtils {
|
||||||
int SLODsSeen = 0;
|
int SLODsSeen = 0;
|
||||||
double totalFreq = 0.0;
|
double totalFreq = 0.0;
|
||||||
int freqsSeen = 0;
|
int freqsSeen = 0;
|
||||||
|
String id = null;
|
||||||
|
String filter = null;
|
||||||
|
|
||||||
for ( RodVCF rod : rods ) {
|
for ( RodVCF rod : rods ) {
|
||||||
List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords();
|
List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords();
|
||||||
|
|
@ -158,7 +160,7 @@ public class VCFUtils {
|
||||||
if ( confidence > maxConfidence )
|
if ( confidence > maxConfidence )
|
||||||
maxConfidence = confidence;
|
maxConfidence = confidence;
|
||||||
|
|
||||||
if ( rod.hasNonRefAlleleFrequency() ) {
|
if ( !rod.isReference() && rod.hasNonRefAlleleFrequency() ) {
|
||||||
totalFreq += rod.getNonRefAlleleFrequency();
|
totalFreq += rod.getNonRefAlleleFrequency();
|
||||||
freqsSeen++;
|
freqsSeen++;
|
||||||
}
|
}
|
||||||
|
|
@ -167,6 +169,12 @@ public class VCFUtils {
|
||||||
totalSLOD += rod.getStrandBias();
|
totalSLOD += rod.getStrandBias();
|
||||||
SLODsSeen++;
|
SLODsSeen++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( rod.getID() != null )
|
||||||
|
id = rod.getID();
|
||||||
|
|
||||||
|
if ( rod.hasFilteringCodes() )
|
||||||
|
filter = rod.getFilterString();
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, String> infoFields = new HashMap<String, String>();
|
Map<String, String> infoFields = new HashMap<String, String>();
|
||||||
|
|
@ -178,16 +186,14 @@ public class VCFUtils {
|
||||||
infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen)));
|
infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen)));
|
||||||
if ( freqsSeen > 0 )
|
if ( freqsSeen > 0 )
|
||||||
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
|
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
|
||||||
|
|
||||||
// TODO -- "." and "0" are wrong -- need to use values from the records
|
|
||||||
|
|
||||||
return new VCFRecord(params.getReferenceBase(),
|
return new VCFRecord(params.getReferenceBase(),
|
||||||
params.getContig(),
|
params.getContig(),
|
||||||
params.getPosition(),
|
params.getPosition(),
|
||||||
".",
|
(id != null ? id : "."),
|
||||||
params.getAlternateBases(),
|
params.getAlternateBases(),
|
||||||
maxConfidence,
|
maxConfidence,
|
||||||
"0",
|
(filter != null ? filter : "."),
|
||||||
infoFields,
|
infoFields,
|
||||||
params.getFormatString(),
|
params.getFormatString(),
|
||||||
params.getGenotypesRecords());
|
params.getGenotypesRecords());
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
||||||
public void testNWayVenn() {
|
public void testNWayVenn() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
|
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
|
||||||
Arrays.asList("0527ea8ec7de3a144bd0a56db80d62ba"));
|
Arrays.asList("86d2342fabc8c0916a6d42a29f750ea2"));
|
||||||
executeTest("testNWayVenn", spec);
|
executeTest("testNWayVenn", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -41,4 +41,12 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList("6fbe00cb68d2cdc59dfcb79024fd9893"));
|
Arrays.asList("6fbe00cb68d2cdc59dfcb79024fd9893"));
|
||||||
executeTest("testMulti", spec);
|
executeTest("testMulti", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testComplex() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString() + " -B set1,VCF," + validationDataLocation + "complexExample.vcf -B set2,VCF," + validationDataLocation + "complexExample.vcf -CT NWayVenn", 1,
|
||||||
|
Arrays.asList("8b72e557c0dd111738eaa69e9003fb3f"));
|
||||||
|
executeTest("testComplex", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue