Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MNVs - other variants considered as part of phased MNV annotation #624

Open
wants to merge 2 commits into
base: release-5.2.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ public CellBaseDataResult<ProteinVariantAnnotation> getVariantAnnotation(String
if (!aaAlternate.equals("STOP") && !aaReference.equals("STOP")) {
TranscriptQuery query = new TranscriptQuery();
query.setTranscriptsId(Collections.singletonList(ensemblTranscriptId));
query.setDataRelease(dataRelease);
proteinVariantAnnotation.setSubstitutionScores(getSubstitutionScores(query, position, aaAlternate).getResults());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,28 @@ public static SequenceOntologyTerm newSequenceOntologyTerm(String name) throws S
return new SequenceOntologyTerm(ConsequenceTypeMappings.getSoAccessionString(name), name);
}


public static String buildVariantId(Variant variant) {
if (variant == null) {
return null;
}
return buildVariantId(variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate());
}

public static String buildVariantIds(List<Variant> variants) {
StringBuilder variantIds = new StringBuilder();
for (Variant variant : variants) {
if (variant == null) {
continue;
}
variantIds.append(buildVariantId(variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate()));
}
if (variantIds == null) {
return null;
}
return variantIds.toString();
}

public static String buildVariantId(String chromosome, int start, String reference, String alternate) {
StringBuilder stringBuilder = new StringBuilder();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,8 @@ private void adjustPhasedConsequenceTypes(Object[] variantArray, int dataRelease
consequenceType3.setSequenceOntologyTerms(soTerms);

// Flag these transcripts as already updated for this variant
flagTranscriptAnnotationUpdated(variant2, consequenceType1.getEnsemblTranscriptId());
flagTranscriptAnnotationUpdated(variant2, consequenceType1.getEnsemblTranscriptId(),
Arrays.asList(variant0, variant1));

variant2DisplayCTNeedsUpdate = true;

Expand Down Expand Up @@ -921,8 +922,8 @@ private void adjustPhasedConsequenceTypes(Object[] variantArray, int dataRelease
consequenceType2.setSequenceOntologyTerms(soTerms);

// Flag these transcripts as already updated for this variant
flagTranscriptAnnotationUpdated(variant0, consequenceType1.getEnsemblTranscriptId());
flagTranscriptAnnotationUpdated(variant1, consequenceType1.getEnsemblTranscriptId());
flagTranscriptAnnotationUpdated(variant0, consequenceType1.getEnsemblTranscriptId(), Arrays.asList((variant1)));
flagTranscriptAnnotationUpdated(variant1, consequenceType1.getEnsemblTranscriptId(), Arrays.asList((variant0)));

variant0DisplayCTNeedsUpdate = true;
variant1DisplayCTNeedsUpdate = true;
Expand All @@ -947,24 +948,25 @@ private void adjustPhasedConsequenceTypes(Object[] variantArray, int dataRelease
}
}

private void flagTranscriptAnnotationUpdated(Variant variant, String ensemblTranscriptId) {
private void flagTranscriptAnnotationUpdated(Variant variant, String ensemblTranscriptId, List<Variant> phasedVariants) {
Map<String, AdditionalAttribute> additionalAttributesMap = variant.getAnnotation().getAdditionalAttributes();
if (additionalAttributesMap == null) {
additionalAttributesMap = new HashMap<>();
AdditionalAttribute additionalAttribute = new AdditionalAttribute();
Map<String, String> transcriptsSet = new HashMap<>();
transcriptsSet.put(ensemblTranscriptId, null);
transcriptsSet.put(ensemblTranscriptId, VariantAnnotationUtils.buildVariantIds(phasedVariants));
additionalAttribute.setAttribute(transcriptsSet);
additionalAttributesMap.put("phasedTranscripts", additionalAttribute);
variant.getAnnotation().setAdditionalAttributes(additionalAttributesMap);
} else if (additionalAttributesMap.get("phasedTranscripts") == null) {
AdditionalAttribute additionalAttribute = new AdditionalAttribute();
Map<String, String> transcriptsSet = new HashMap<>();
transcriptsSet.put(ensemblTranscriptId, null);
transcriptsSet.put(ensemblTranscriptId, VariantAnnotationUtils.buildVariantIds(phasedVariants));
additionalAttribute.setAttribute(transcriptsSet);
additionalAttributesMap.put("phasedTranscripts", additionalAttribute);
} else {
additionalAttributesMap.get("phasedTranscripts").getAttribute().put(ensemblTranscriptId, null);
additionalAttributesMap.get("phasedTranscripts").getAttribute().put(ensemblTranscriptId,
VariantAnnotationUtils.buildVariantIds(phasedVariants));
}
}

Expand Down Expand Up @@ -1297,23 +1299,24 @@ private ConsequenceTypeCalculator getConsequenceTypeCalculator(Variant variant)
}
}

private boolean[] getRegulatoryRegionOverlaps(Variant variant) throws QueryException, IllegalAccessException, CellBaseException {
private boolean[] getRegulatoryRegionOverlaps(Variant variant, int dataRelease)
throws QueryException, IllegalAccessException, CellBaseException {
// 0: overlaps any regulatory region type
// 1: overlaps transcription factor binding site
boolean[] overlapsRegulatoryRegion = {false, false};

// Variant type checked in expected order of frequency of occurrence to minimize number of checks
// Most queries will be SNVs - it's worth implementing an special case for them
if (VariantType.SNV.equals(variant.getType())) {
return getRegulatoryRegionOverlaps(variant.getChromosome(), variant.getStart());
return getRegulatoryRegionOverlaps(variant.getChromosome(), variant.getStart(), dataRelease);
} else if (VariantType.INDEL.equals(variant.getType()) && StringUtils.isBlank(variant.getReference())) {
return getRegulatoryRegionOverlaps(variant.getChromosome(), variant.getStart() - 1, variant.getEnd());
return getRegulatoryRegionOverlaps(variant.getChromosome(), variant.getStart() - 1, variant.getEnd(), dataRelease);
// Short deletions and symbolic variants except breakends
} else if (!VariantType.BREAKEND.equals(variant.getType())) {
return getRegulatoryRegionOverlaps(variant.getChromosome(), variant.getStart(), variant.getEnd());
return getRegulatoryRegionOverlaps(variant.getChromosome(), variant.getStart(), variant.getEnd(), dataRelease);
// Breakend "variants" only annotate features overlapping the exact positions
} else {
overlapsRegulatoryRegion = getRegulatoryRegionOverlaps(variant.getChromosome(), Math.max(1, variant.getStart()));
overlapsRegulatoryRegion = getRegulatoryRegionOverlaps(variant.getChromosome(), Math.max(1, variant.getStart()), dataRelease);
// If already found one overlapping regulatory region there's no need to keep checking
if (overlapsRegulatoryRegion[0]) {
return overlapsRegulatoryRegion;
Expand All @@ -1322,15 +1325,15 @@ private boolean[] getRegulatoryRegionOverlaps(Variant variant) throws QueryExcep
if (variant.getSv() != null && variant.getSv().getBreakend() != null
&& variant.getSv().getBreakend().getMate() != null) {
return getRegulatoryRegionOverlaps(variant.getSv().getBreakend().getMate().getChromosome(),
Math.max(1, variant.getSv().getBreakend().getMate().getPosition()));
Math.max(1, variant.getSv().getBreakend().getMate().getPosition()), dataRelease);
} else {
return overlapsRegulatoryRegion;
}
}
}
}

private boolean[] getRegulatoryRegionOverlaps(String chromosome, Integer position)
private boolean[] getRegulatoryRegionOverlaps(String chromosome, Integer position, int dataRelease)
throws QueryException, IllegalAccessException, CellBaseException {
// 0: overlaps any regulatory region type
// 1: overlaps transcription factor binding site
Expand All @@ -1339,6 +1342,7 @@ private boolean[] getRegulatoryRegionOverlaps(String chromosome, Integer positio
RegulationQuery query = new RegulationQuery();
query.setIncludes(Collections.singletonList(REGULATORY_REGION_FEATURE_TYPE_ATTRIBUTE));
query.setRegions(Collections.singletonList(new Region(chromosome, position)));
query.setDataRelease(dataRelease);
CellBaseDataResult<RegulatoryFeature> cellBaseDataResult = regulationManager.search(query);

if (cellBaseDataResult.getNumResults() > 0) {
Expand All @@ -1357,7 +1361,7 @@ private boolean[] getRegulatoryRegionOverlaps(String chromosome, Integer positio
return overlapsRegulatoryRegion;
}

private boolean[] getRegulatoryRegionOverlaps(String chromosome, Integer start, Integer end)
private boolean[] getRegulatoryRegionOverlaps(String chromosome, Integer start, Integer end, int dataRelease)
throws QueryException, IllegalAccessException, CellBaseException {
// 0: overlaps any regulatory region type
// 1: overlaps transcription factor binding site
Expand All @@ -1369,7 +1373,7 @@ private boolean[] getRegulatoryRegionOverlaps(String chromosome, Integer start,
query.setLimit(1);
query.setRegions(Collections.singletonList(new Region(chromosome, start, end)));
query.setFeatureTypes(Collections.singletonList(TF_BINDING_SITE));

query.setDataRelease(dataRelease);
CellBaseDataResult<RegulatoryFeature> cellBaseDataResult = regulationManager.search(query);

// Overlaps transcription factor binding site - it's therefore a regulatory variant
Expand Down Expand Up @@ -1407,7 +1411,7 @@ private List<ConsequenceType> getConsequenceTypeList(Variant variant, List<Gene>
throws QueryException, IllegalAccessException, CellBaseException {
boolean[] overlapsRegulatoryRegion = {false, false};
if (regulatoryAnnotation) {
overlapsRegulatoryRegion = getRegulatoryRegionOverlaps(variant);
overlapsRegulatoryRegion = getRegulatoryRegionOverlaps(variant, dataRelease);
}
ConsequenceTypeCalculator consequenceTypeCalculator = getConsequenceTypeCalculator(variant);
List<ConsequenceType> consequenceTypeList = consequenceTypeCalculator.run(variant, geneList,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantBuilder;
import org.opencb.biodata.models.variant.avro.*;
import org.opencb.cellbase.core.api.query.QueryException;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.core.variant.AnnotationBasedPhasedQueryManager;
import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest;
import org.opencb.cellbase.lib.variant.annotation.VariantAnnotationCalculator;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.result.QueryResult;

import java.io.IOException;
import java.nio.file.Path;
Expand Down Expand Up @@ -1995,4 +1998,150 @@ public void testSpliceAnnotation() throws Exception {
assertEquals(1, cellBaseDataResult.getNumMatches());
}

private void initGrch38() throws Exception {

clearDB(CELLBASE_DBNAME);

createDataRelease();
dataRelease = 1;

jsonObjectMapper = new ObjectMapper();
jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);



Path path = Paths.get(getClass()
.getResource("/variant-annotation/grch38/gene.test.json.gz").toURI());
loadRunner.load(path, "gene", dataRelease);
updateDataRelease(dataRelease, "gene", Collections.emptyList());
path = Paths.get(getClass()
.getResource("/variant-annotation/grch38/genome_sequence.test.json.gz").toURI());
loadRunner.load(path, "genome_sequence", dataRelease);
updateDataRelease(dataRelease, "genome_sequence", Collections.emptyList());
path = Paths.get(getClass()
.getResource("/variant-annotation/grch38/clinical_variants.full.test.json.gz").toURI());
loadRunner.load(path, "clinical_variants", dataRelease);
updateDataRelease(dataRelease, "clinical_variants", Collections.emptyList());

// Create empty collection
createEmptyCollection("regulatory_region", dataRelease);
updateDataRelease(dataRelease, "regulatory_region", Collections.emptyList());

// Create empty collection
createEmptyCollection("protein", dataRelease);
updateDataRelease(dataRelease, "protein", Collections.emptyList());

// Create empty collection
createEmptyCollection("missense_variation_functional_score", dataRelease);
updateDataRelease(dataRelease, "missense_variation_functional_score", Collections.emptyList());

// Create empty collection
createEmptyCollection("protein_functional_prediction", dataRelease);
updateDataRelease(dataRelease, "protein_functional_prediction", Collections.emptyList());

// Create empty collection
createEmptyCollection("refseq", dataRelease);
updateDataRelease(dataRelease, "refseq", Collections.emptyList());

// Create empty collection
createEmptyCollection("conservation", dataRelease);
updateDataRelease(dataRelease, "conservation", Collections.emptyList());

// Create empty collection
createEmptyCollection("variation_functional_score", dataRelease);
updateDataRelease(dataRelease, "variation_functional_score", Collections.emptyList());

// Create empty collection
createEmptyCollection("splice_score", dataRelease);
updateDataRelease(dataRelease, "splice_score", Collections.emptyList());

variantAnnotationCalculator = new VariantAnnotationCalculator("hsapiens", "GRCh37", dataRelease,
cellBaseManagerFactory);
}

private static final String PHASE_DATA_URL_SEPARATOR = "\\+";
private static final String VARIANT_STRING_FORMAT = "\\+";

@Test
public void testMNVAdditionalProperties() throws Exception {

initGrch38();

// chr19:13025339:C:A+1|1+999 - synonymous
// chr19:13025341:G:T+1|1+999 - synonymous

Variant variant1 = parseVariant("chr19:13025339:C:A+1|1+999");
Variant variant2 = parseVariant("chr19:13025341:G:T+1|1+999");

List<Variant> variantList = new ArrayList<>();
variantList.add(variant1);
variantList.add(variant2);

QueryOptions queryOptions = new QueryOptions("useCache", false);
queryOptions.put("include", "consequenceType, reference, alternate, clinical");
queryOptions.put("normalize", true);
queryOptions.put("skipDecompose", false);
queryOptions.put("checkAminoAcidChange", false);
queryOptions.put("imprecise", true);
queryOptions.put("phased", true);
queryOptions.put("dataRelease", 1);

List<CellBaseDataResult<VariantAnnotation>> queryResult = variantAnnotationCalculator.getAnnotationByVariantList(variantList,
queryOptions);

assertEquals(2, queryResult.size());

VariantAnnotation v1 = queryResult.get(0).getResults().get(0);
VariantAnnotation v2 = queryResult.get(1).getResults().get(0);

assertEquals("missense_variant", v1.getDisplayConsequenceType());
assertEquals("missense_variant", v2.getDisplayConsequenceType());

Map<String, String> additionalAttributes1 = (Map<String, String>) v1.getAdditionalAttributes().get("phasedTranscripts").get("attribute");
Map<String, String> additionalAttributes2 = (Map<String, String>) v1.getAdditionalAttributes().get("phasedTranscripts").get("attribute");

//System.out.println(additionalAttributes1);

assertEquals(12, additionalAttributes1.size());
assertEquals(12, additionalAttributes2.size());
}

private Variant parseVariant(String variantString) {
String[] variantStringPartArray = variantString.split(PHASE_DATA_URL_SEPARATOR);

VariantBuilder variantBuilder;
if (variantStringPartArray.length > 0) {
variantBuilder = new VariantBuilder(variantStringPartArray[0]);
// Either 1 or 3 parts expected variant+GT+PS
if (variantStringPartArray.length == 3) {
List<String> formatList = new ArrayList<>(2);
// If phase set tag is not provided not phase data is added at all to the Variant object
if (!variantStringPartArray[2].isEmpty()) {
formatList.add(AnnotationBasedPhasedQueryManager.PHASE_SET_TAG);
List<String> sampleData = new ArrayList<>(2);
sampleData.add(variantStringPartArray[2]);
// Genotype field might be empty - just PS would be added to Variant object in that case
if (!variantStringPartArray[1].isEmpty()) {
formatList.add(AnnotationBasedPhasedQueryManager.GENOTYPE_TAG);
sampleData.add(variantStringPartArray[1]);
}
variantBuilder.setSampleDataKeys(formatList);
SampleEntry sampleEntry = new SampleEntry();
sampleEntry.setData(sampleData);
variantBuilder.setSamples(Collections.singletonList(sampleEntry));
}
} else if (variantStringPartArray.length > 3) {
throw new IllegalArgumentException("Malformed variant string " + variantString + ". "
+ "variantString+GT+PS expected, where variantString needs 3 or 4 fields separated by ':'. "
+ "Format: \"" + VARIANT_STRING_FORMAT + "\"");
}
} else {
throw new IllegalArgumentException("Malformed variant string " + variantString + ". "
+ "variantString+GT+PS expected, where variantString needs 3 or 4 fields separated by ':'. "
+ "Format: \"" + VARIANT_STRING_FORMAT + "\"");
}

return variantBuilder.build();
}
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.