Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SCRUM-4176 Fix loading of TranscriptGeneAssociations when transcript parent attribute not curie #1669

Merged
merged 3 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,6 @@ private boolean runLoad(
List<Long> associationIdsAdded,
BackendBulkDataProvider dataProvider, String assemblyId) {

Map<String, String> geneIdCurieMap = gff3Service.getIdCurieMap(gffData);

ProcessDisplayHelper ph = new ProcessDisplayHelper();
ph.addDisplayHandler(loadProcessDisplayService);
ph.startProcess("GFF CDS update for " + dataProvider.name(), gffData.size());
Expand Down Expand Up @@ -129,7 +127,7 @@ private boolean runLoad(
if (assemblyId != null) {
countType = "Locations";
try {
gff3Service.loadCDSLocationAssociations(gff3EntryPair, locationIdsAdded, dataProvider, assemblyId, geneIdCurieMap);
gff3Service.loadCDSLocationAssociations(gff3EntryPair, locationIdsAdded, dataProvider, assemblyId);
history.incrementCompleted(countType);
} catch (ObjectUpdateException e) {
history.incrementFailed(countType);
Expand All @@ -142,7 +140,7 @@ private boolean runLoad(
}
countType = "Associations";
try {
gff3Service.loadCDSParentChildAssociations(gff3EntryPair, associationIdsAdded, dataProvider, geneIdCurieMap);
gff3Service.loadCDSParentChildAssociations(gff3EntryPair, associationIdsAdded, dataProvider);
history.incrementCompleted(countType);
} catch (ObjectUpdateException e) {
history.incrementFailed(countType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,6 @@ private boolean runLoad(
List<Long> associationIdsAdded,
BackendBulkDataProvider dataProvider, String assemblyId) {

Map<String, String> geneIdCurieMap = gff3Service.getIdCurieMap(gffData);

ProcessDisplayHelper ph = new ProcessDisplayHelper();
ph.addDisplayHandler(loadProcessDisplayService);
ph.startProcess("GFF Exon update for " + dataProvider.name(), gffData.size());
Expand Down Expand Up @@ -131,7 +129,7 @@ private boolean runLoad(
if (assemblyId != null) {
countType = "Locations";
try {
gff3Service.loadExonLocationAssociations(gff3EntryPair, locationIdsAdded, dataProvider, assemblyId, geneIdCurieMap);
gff3Service.loadExonLocationAssociations(gff3EntryPair, locationIdsAdded, dataProvider, assemblyId);
history.incrementCompleted(countType);
} catch (ObjectUpdateException e) {
history.incrementFailed(countType);
Expand All @@ -145,7 +143,7 @@ private boolean runLoad(

countType = "Associations";
try {
gff3Service.loadExonParentChildAssociations(gff3EntryPair, associationIdsAdded, dataProvider, geneIdCurieMap);
gff3Service.loadExonParentChildAssociations(gff3EntryPair, associationIdsAdded, dataProvider);
history.incrementCompleted(countType);
} catch (ObjectUpdateException e) {
history.incrementFailed(countType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) {
BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType());

List<ImmutablePair<Gff3DTO, Map<String, String>>> preProcessedTranscriptGffData = Gff3AttributesHelper.getTranscriptGffData(gffData, dataProvider);
Map<String, String> geneIdCurieMap = gff3Service.getGeneIdCurieMap(gffData, dataProvider);

gffData.clear();

Expand All @@ -73,7 +74,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) {
addException(bulkLoadFileHistory, new ObjectUpdateExceptionData(null, "GFF Header does not contain assembly", null));
}

boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedTranscriptGffData, entityIdsAdded, locationIdsAdded, associationIdsAdded, dataProvider, assemblyId);
boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedTranscriptGffData, geneIdCurieMap, entityIdsAdded, locationIdsAdded, associationIdsAdded, dataProvider, assemblyId);

if (success) {
runCleanup(transcriptService, bulkLoadFileHistory, dataProvider.name(), transcriptService.getIdsByDataProvider(dataProvider), entityIdsAdded, "GFF transcript");
Expand All @@ -93,14 +94,13 @@ private boolean runLoad(
BulkLoadFileHistory history,
List<String> gffHeaderData,
List<ImmutablePair<Gff3DTO, Map<String, String>>> gffData,
Map<String, String> geneIdCurieMap,
List<Long> entityIdsAdded,
List<Long> locationIdsAdded,
List<Long> associationIdsAdded,
BackendBulkDataProvider dataProvider,
String assemblyId) {

Map<String, String> geneIdCurieMap = gff3Service.getIdCurieMap(gffData);

ProcessDisplayHelper ph = new ProcessDisplayHelper();
ph.addDisplayHandler(loadProcessDisplayService);
ph.startProcess("GFF Transcript update for " + dataProvider.name(), gffData.size());
Expand Down Expand Up @@ -128,7 +128,7 @@ private boolean runLoad(
if (assemblyId != null) {
countType = "Locations";
try {
gff3Service.loadTranscriptLocationAssociations(gff3EntryPair, locationIdsAdded, dataProvider, assemblyId, geneIdCurieMap);
gff3Service.loadTranscriptLocationAssociations(gff3EntryPair, locationIdsAdded, dataProvider, assemblyId);
history.incrementCompleted(countType);
} catch (ObjectUpdateException e) {
history.incrementFailed(countType);
Expand Down Expand Up @@ -162,9 +162,10 @@ public APIResponse runLoadApi(String dataProviderName, String assemblyName, List
List<Long> idsAdded = new ArrayList<>();
BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName);
List<ImmutablePair<Gff3DTO, Map<String, String>>> preProcessedTranscriptGffData = Gff3AttributesHelper.getTranscriptGffData(gffData, dataProvider);
Map<String, String> geneIdCurieMap = gff3Service.getGeneIdCurieMap(gffData, dataProvider);
BulkLoadFileHistory history = new BulkLoadFileHistory();
history = bulkLoadFileHistoryDAO.persist(history);
runLoad(history, null, preProcessedTranscriptGffData, idsAdded, idsAdded, idsAdded, dataProvider, assemblyName);
runLoad(history, null, preProcessedTranscriptGffData, geneIdCurieMap, idsAdded, idsAdded, idsAdded, dataProvider, assemblyName);
history.finishLoad();

return new LoadHistoryResponce(history);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptExonAssociationService;
import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptGeneAssociationService;
import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptGenomicLocationAssociationService;
import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper;
import org.alliancegenome.curation_api.services.helpers.gff3.Gff3UniqueIdHelper;
import org.alliancegenome.curation_api.services.ontology.NcbiTaxonTermService;
import org.alliancegenome.curation_api.services.validation.dto.Gff3DtoValidator;
Expand Down Expand Up @@ -58,7 +59,7 @@ public class Gff3Service {
@Inject Gff3DtoValidator gff3DtoValidator;

@Transactional
public void loadExonLocationAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map<String, String> geneIdCurieMap) throws ValidationException {
public void loadExonLocationAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId) throws ValidationException {
Gff3DTO gffEntry = gffEntryPair.getKey();

if (StringUtils.isBlank(assemblyId)) {
Expand All @@ -85,7 +86,7 @@ public void loadExonLocationAssociations(ImmutablePair<Gff3DTO, Map<String, Stri
}

@Transactional
public void loadCDSLocationAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map<String, String> geneIdCurieMap) throws ValidationException {
public void loadCDSLocationAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId) throws ValidationException {
Gff3DTO gffEntry = gffEntryPair.getKey();
Map<String, String> attributes = gffEntryPair.getValue();
if (StringUtils.isBlank(assemblyId)) {
Expand All @@ -112,7 +113,7 @@ public void loadCDSLocationAssociations(ImmutablePair<Gff3DTO, Map<String, Strin
}

@Transactional
public void loadTranscriptLocationAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map<String, String> geneIdCurieMap) throws ValidationException {
public void loadTranscriptLocationAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId) throws ValidationException {
Gff3DTO gffEntry = gffEntryPair.getKey();
Map<String, String> attributes = gffEntryPair.getValue();
if (StringUtils.isBlank(assemblyId)) {
Expand Down Expand Up @@ -140,7 +141,7 @@ public void loadTranscriptLocationAssociations(ImmutablePair<Gff3DTO, Map<String
}

@Transactional
public void loadExonParentChildAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider, Map<String, String> geneIdCurieMap) throws ValidationException {
public void loadExonParentChildAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider) throws ValidationException {
Gff3DTO gffEntry = gffEntryPair.getKey();

if (!StringUtils.equals(gffEntry.getType(), "exon") && !StringUtils.equals(gffEntry.getType(), "noncoding_exon")) {
Expand All @@ -164,7 +165,7 @@ public void loadExonParentChildAssociations(ImmutablePair<Gff3DTO, Map<String, S
}

@Transactional
public void loadCDSParentChildAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider, Map<String, String> geneIdCurieMap) throws ValidationException {
public void loadCDSParentChildAssociations(ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair, List<Long> idsAdded, BackendBulkDataProvider dataProvider) throws ValidationException {
Gff3DTO gffEntry = gffEntryPair.getKey();
Map<String, String> attributes = gffEntryPair.getValue();

Expand Down Expand Up @@ -211,13 +212,15 @@ public void loadGeneParentChildAssociations(ImmutablePair<Gff3DTO, Map<String, S
}
}

public Map<String, String> getIdCurieMap(List<ImmutablePair<Gff3DTO, Map<String, String>>> gffData) {
public Map<String, String> getGeneIdCurieMap(List<Gff3DTO> gffData, BackendBulkDataProvider dataProvider) {
Map<String, String> geneIdCurieMap = new HashMap<>();

for (ImmutablePair<Gff3DTO, Map<String, String>> gffEntryPair : gffData) {
Map<String, String> attributes = gffEntryPair.getValue();
if (attributes.containsKey("ID") && attributes.containsKey("gene_id")) {
geneIdCurieMap.put(attributes.get("ID"), attributes.get("gene_id"));
for (Gff3DTO gffEntry : gffData) {
if (gffEntry.getType().contains("gene")) {
Map<String, String> attributes = Gff3AttributesHelper.getAttributes(gffEntry, dataProvider);
if (attributes.containsKey("gene_id") && attributes.containsKey("ID")) {
geneIdCurieMap.put(attributes.get("ID"), attributes.get("gene_id"));
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,21 +101,28 @@ public static List<ImmutablePair<Gff3DTO, Map<String, String>>> getTranscriptGff

private static void processGffEntry(Gff3DTO originalGffEntry, List<ImmutablePair<Gff3DTO, Map<String, String>>> retGffData, BackendBulkDataProvider dataProvider) {
Map<String, String> attributes = getAttributes(originalGffEntry, dataProvider);
if (attributes.containsKey("Parent") && attributes.get("Parent").indexOf(",") > -1) {
for (String parent : attributes.get("Parent").split(",")) {
HashMap<String, String> attributesCopy = new HashMap<>();
attributesCopy.putAll(attributes);
String[] parentIdParts = parent.split(":");
if (parentIdParts.length == 1) {
parent = dataProvider.name() + ':' + parentIdParts[0];
if (attributes.containsKey("Parent")) {
if (attributes.get("Parent").indexOf(",") > -1) {
for (String parent : attributes.get("Parent").split(",")) {
if (!parent.endsWith("_transposable_element")) {
HashMap<String, String> attributesCopy = new HashMap<>();
attributesCopy.putAll(attributes);
String[] parentIdParts = parent.split(":");
if (parentIdParts.length == 1) {
parent = dataProvider.name() + ':' + parentIdParts[0];
}
attributesCopy.put("Parent", parent);
retGffData.add(new ImmutablePair<>(originalGffEntry, attributesCopy));
}
}
} else {
if (!attributes.get("Parent").endsWith("_transposable_element")) {
retGffData.add(new ImmutablePair<>(originalGffEntry, attributes));
}
attributesCopy.put("Parent", parent);
retGffData.add(new ImmutablePair<>(originalGffEntry, attributesCopy));
}
} else {
retGffData.add(new ImmutablePair<>(originalGffEntry, attributes));
}

}

}
Loading