Skip to content

Commit d4258a0

Browse files
Merge pull request #451 from Breeding-Insight/bug/BI-2573
BI-2573 Germplasm file import assigns pedigree incorrectly
2 parents 14acf7a + 8076cd1 commit d4258a0

File tree

4 files changed

+216
-14
lines changed

4 files changed

+216
-14
lines changed

src/main/java/org/breedinginsight/brapps/importer/services/processors/GermplasmProcessor.java

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@ public GermplasmProcessor(BrAPIGermplasmService brAPIGermplasmService, DSLContex
115115

116116
public void getExistingBrapiData(List<BrAPIImport> importRows, Program program) throws ApiException {
117117

118+
// BI-2573 - sort by entry no here so ordering is consistent everywhere in processor
119+
importRows.sort((left, right) -> {
120+
if (left.getGermplasm().getEntryNo() == null || right.getGermplasm().getEntryNo() == null) {
121+
return 0;
122+
} else {
123+
Integer leftEntryNo = Integer.parseInt(left.getGermplasm().getEntryNo());
124+
Integer rightEntryNo = Integer.parseInt(right.getGermplasm().getEntryNo());
125+
return leftEntryNo.compareTo(rightEntryNo);
126+
}
127+
});
128+
118129
// Get all of our objects specified in the data file by their unique attributes
119130
Map<String, Boolean> germplasmAccessionNumbers = new HashMap<>();
120131
for (int i = 0; i < importRows.size(); i++) {
@@ -265,16 +276,7 @@ public Map<String, ImportPreviewStatistics> process(ImportUpload upload, List<Br
265276
Map<String, Integer> entryNumberCounts = new HashMap<>();
266277
List<String> userProvidedEntryNumbers = new ArrayList<>();
267278
ValidationErrors validationErrors = new ValidationErrors();
268-
// Sort importRows by entry number (if present).
269-
importRows.sort((left, right) -> {
270-
if (left.getGermplasm().getEntryNo() == null || right.getGermplasm().getEntryNo() == null) {
271-
return 0;
272-
} else {
273-
Integer leftEntryNo = Integer.parseInt(left.getGermplasm().getEntryNo());
274-
Integer rightEntryNo = Integer.parseInt(right.getGermplasm().getEntryNo());
275-
return leftEntryNo.compareTo(rightEntryNo);
276-
}
277-
});
279+
278280
for (int i = 0; i < importRows.size(); i++) {
279281
log.debug("processing germplasm row: " + (i+1));
280282
BrAPIImport brapiImport = importRows.get(i);

src/test/java/org/breedinginsight/brapps/importer/GermplasmFileImportTest.java

Lines changed: 196 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,19 @@
3030
import org.breedinginsight.utilities.Utilities;
3131
import org.jooq.DSLContext;
3232
import org.junit.jupiter.api.*;
33+
import org.junit.jupiter.params.ParameterizedTest;
34+
import org.junit.jupiter.params.provider.ValueSource;
35+
import tech.tablesaw.api.Row;
3336
import tech.tablesaw.api.Table;
3437

3538
import javax.inject.Inject;
3639
import java.io.File;
3740
import java.time.OffsetDateTime;
38-
import java.util.ArrayList;
39-
import java.util.HashMap;
40-
import java.util.List;
41-
import java.util.Map;
41+
import java.util.*;
4242

4343
import static io.micronaut.http.HttpRequest.GET;
4444
import static io.micronaut.http.HttpRequest.POST;
45+
import static org.apache.commons.lang3.StringUtils.isNotBlank;
4546
import static org.junit.jupiter.api.Assertions.*;
4647

4748
@MicronautTest
@@ -733,6 +734,197 @@ public void selfReferenceParentError() {
733734
assertEquals(GermplasmProcessor.circularDependency, result.getAsJsonObject("progress").get("message").getAsString());
734735
}
735736

737+
/**
738+
* Test preview and commit data when entry numbers are in ascending order in file. This is the normal case and here
739+
* to catch any possible regressions
740+
*
741+
* @param commit controls whether import is preview or commit
742+
*/
743+
@ParameterizedTest
744+
@ValueSource(booleans = {false, true})
745+
@SneakyThrows
746+
public void entryNoAscending(boolean commit) {
747+
String pathname = "src/test/resources/files/germplasm_import/entry_no_asc.csv";
748+
Table fileData = Table.read().file(pathname);
749+
String listName = "EntryNoAsc";
750+
String listDescription = "Entry numbers in ascending order with pedigree";
751+
752+
JsonObject result = importGermplasm(pathname, listName, listDescription, commit);
753+
assertEquals(200, result.getAsJsonObject("progress").get("statuscode").getAsInt());
754+
755+
// preview table is sorted by entry number
756+
fileData = fileData.sortAscendingOn("Entry No");
757+
758+
JsonArray previewRows = result.get("preview").getAsJsonObject().get("rows").getAsJsonArray();
759+
checkEntryNoFields(fileData, previewRows, commit, listName, listDescription, "EntryNoAscGerm 2", "EntryNoAscGerm 3");
760+
}
761+
762+
/**
763+
* Prior to BI-2573 this file would result in a false positive circular dependency error. The reason is that when
764+
* entry no order did not match germplasm file order, pedigree information was assigned to the wrong germplasm
765+
* record due to inconsistencies in sorting during processing.
766+
*
767+
* Test preview and commit data when entry numbers are in descending order in file
768+
*
769+
* @param commit controls whether import is preview or commit
770+
*/
771+
@ParameterizedTest
772+
@ValueSource(booleans = {false, true})
773+
@SneakyThrows
774+
public void entryNoDescending(boolean commit) {
775+
String pathname = "src/test/resources/files/germplasm_import/entry_no_desc.csv";
776+
Table fileData = Table.read().file(pathname);
777+
String listName = "EntryNoDesc";
778+
String listDescription = "Entry numbers in descending order with pedigree";
779+
780+
JsonObject result = importGermplasm(pathname, listName, listDescription, commit);
781+
assertEquals(200, result.getAsJsonObject("progress").get("statuscode").getAsInt());
782+
783+
// preview table is sorted by entry number
784+
fileData = fileData.sortAscendingOn("Entry No");
785+
786+
JsonArray previewRows = result.get("preview").getAsJsonObject().get("rows").getAsJsonArray();
787+
checkEntryNoFields(fileData, previewRows, commit, listName, listDescription, "EntryNoDescGerm 2", "EntryNoDescGerm 1");
788+
}
789+
790+
/**
791+
* Shared method to perform entry number order tests for preview and commit
792+
*
793+
* @param fileData raw file data
794+
* @param previewRows processed preview rows
795+
* @param commit whether checking preview or commit fields
796+
* @param listName name of list when committing data
797+
* @param listDescription list description when committing data
798+
* @param motherName name of mother for pedigree check (from file)
799+
* @param fatherName name of father for pedigree check (from file)
800+
*/
801+
private void checkEntryNoFields(Table fileData, JsonArray previewRows, boolean commit,
802+
String listName, String listDescription,
803+
String motherName, String fatherName) {
804+
List<String> germplasmNames = new ArrayList<>();
805+
806+
for (int i = 0; i < previewRows.size(); i++) {
807+
JsonObject germplasm = previewRows.get(i).getAsJsonObject().getAsJsonObject("germplasm").getAsJsonObject("brAPIObject");
808+
germplasmNames.add(germplasm.get("germplasmName").getAsString());
809+
checkBasicResponse(germplasm, fileData, i);
810+
811+
if (!commit) {
812+
// preview checks
813+
checkEntryNoPreviewFields(fileData, previewRows, i, motherName, fatherName);
814+
} else {
815+
// commit checks
816+
checkEntryNoCommitFields(fileData, previewRows, i, motherName, fatherName);
817+
}
818+
}
819+
820+
if (commit) {
821+
// Check the germplasm list
822+
checkGermplasmList(Germplasm.constructGermplasmListName(listName, validProgram), listDescription, germplasmNames);
823+
}
824+
}
825+
826+
/**
827+
* Check important field in preview data beyond basic info
828+
* - entry number assignment
829+
* - pedigree assignment
830+
*
831+
* @param fileData raw file data
832+
* @param previewRows processed preview rows
833+
*/
834+
private void checkEntryNoPreviewFields(Table fileData, JsonArray previewRows, int i, String motherName, String fatherName) {
835+
JsonObject germplasm = previewRows.get(i).getAsJsonObject().getAsJsonObject("germplasm").getAsJsonObject("brAPIObject");
836+
837+
// Check preview specific items
838+
// Germplasm name (display name)
839+
assertEquals(fileData.getString(i, "Germplasm Name"), germplasm.get("germplasmName").getAsString());
840+
JsonObject additionalInfo = germplasm.getAsJsonObject("additionalInfo");
841+
842+
// check entry number assignment
843+
assertEquals(fileData.getString(i, "Entry No"), additionalInfo.get("importEntryNumber").getAsString(), "Wrong entry number");
844+
// check pedigree entry number assignment
845+
String fileFemaleEntryNo = fileData.getString(i, "Female Parent Entry No");
846+
if (isNotBlank(fileFemaleEntryNo)) {
847+
assertEquals(fileFemaleEntryNo, additionalInfo.get("femaleParentEntryNo").getAsString(), "Wrong female parent entry number");
848+
}
849+
String fileMaleEntryNo = fileData.getString(i, "Male Parent Entry No");
850+
if (isNotBlank(fileMaleEntryNo)) {
851+
assertEquals(fileMaleEntryNo, additionalInfo.get("maleParentEntryNo").getAsString(), "Wrong male parent entry number");
852+
}
853+
854+
// check preview pedigree values
855+
// only care about entry nos for this test case, not using GIDs
856+
if (isNotBlank(fileFemaleEntryNo) && isNotBlank(fileMaleEntryNo)) {
857+
String pedigree = germplasm.get("pedigree").getAsString();
858+
String[] pedigreeParts = pedigree.split("/");
859+
String mother = pedigreeParts[0];
860+
String father = pedigreeParts[1];
861+
assertEquals(motherName, mother, "Wrong mother");
862+
assertEquals(fatherName, father, "Wrong father");
863+
}
864+
}
865+
866+
/**
867+
* Check properties of processed and committed germplasm objects match what would be expected based on file contents.
868+
* Of particular importance are:
869+
* - germplasm are in entry no order
870+
* - gids are assigned in entry no order
871+
* - correct pedigree was assigned to germplasm based on file specification
872+
*
873+
* @param fileData raw file data
874+
* @param previewRows processed preview rows
875+
* @param i row index
876+
*/
877+
private void checkEntryNoCommitFields(Table fileData, JsonArray previewRows, int i, String motherName, String fatherName) {
878+
JsonObject germplasm = previewRows.get(i).getAsJsonObject().getAsJsonObject("germplasm").getAsJsonObject("brAPIObject");
879+
// Check commit specific items
880+
// Germplasm name (display name)
881+
String expectedGermplasmName = String.format("%s [%s-%s]", fileData.getString(i, "Germplasm Name"), validProgram.getKey(), germplasm.get("accessionNumber").getAsString());
882+
assertEquals(expectedGermplasmName, germplasm.get("germplasmName").getAsString());
883+
// Created Date
884+
JsonObject additionalInfo = germplasm.getAsJsonObject("additionalInfo");
885+
assertTrue(additionalInfo.has(BrAPIAdditionalInfoFields.CREATED_DATE), "createdDate is missing");
886+
// Accession Number
887+
assertTrue(germplasm.has("accessionNumber"), "accessionNumber missing");
888+
889+
// check that gids are assigned in entry no order
890+
if (i > 0) {
891+
JsonObject previousGermplasm = previewRows.get(i-1).getAsJsonObject().getAsJsonObject("germplasm").getAsJsonObject("brAPIObject");
892+
int lastEntryNo = previousGermplasm.getAsJsonObject("additionalInfo").get("importEntryNumber").getAsInt();
893+
int lastGid = previousGermplasm.get("accessionNumber").getAsInt();
894+
int currentEntryNo = additionalInfo.get("importEntryNumber").getAsInt();
895+
int currentGid = germplasm.get("accessionNumber").getAsInt();
896+
assertEquals(1, currentEntryNo-lastEntryNo, "Expected entry number to be monotonically increasing");
897+
assertEquals(1, currentGid-lastGid, "Expected GID to be monotonically increasing");
898+
}
899+
900+
// pedigree check just for single germplasm in file with pedigree info
901+
String fileFemaleEntryNo = fileData.getString(i, "Female Parent Entry No");
902+
String fileMaleEntryNo = fileData.getString(i, "Male Parent Entry No");
903+
904+
// only care about entry nos for this test case, not using GIDs
905+
if (isNotBlank(fileFemaleEntryNo) && isNotBlank(fileMaleEntryNo)) {
906+
String pedigree = germplasm.get("pedigree").getAsString();
907+
String[] pedigreeParts = pedigree.split("/");
908+
String mother = pedigreeParts[0];
909+
String father = pedigreeParts[1];
910+
String regexMatcher = "^(.*\\b) \\[([A-Z]{2,6})-(\\d+)\\]$";
911+
assertTrue(mother.matches(String.format(regexMatcher, motherName)), "Wrong mother");
912+
assertTrue(father.matches(String.format(regexMatcher, fatherName)), "Wrong father");
913+
}
914+
915+
// External Reference germplasm
916+
JsonArray externalReferences = germplasm.getAsJsonArray("externalReferences");
917+
boolean referenceFound = false;
918+
for (JsonElement reference: externalReferences) {
919+
String referenceSource = reference.getAsJsonObject().get("referenceSource").getAsString();
920+
if (referenceSource.equals(BRAPI_REFERENCE_SOURCE)) {
921+
referenceFound = true;
922+
break;
923+
}
924+
}
925+
assertTrue(referenceFound, "Germplasm UUID reference not found");
926+
}
927+
736928
private JsonObject importGermplasm(String pathname, String listName, String listDescription, Boolean commit) throws InterruptedException {
737929
File file = new File(pathname);
738930

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
GID,Germplasm Name,Breeding Method,Source,Female Parent GID,Male Parent GID,Entry No,Female Parent Entry No,Male Parent Entry No,External UID,Synonyms
2+
,EntryNoAscGerm 1,BCR,Test,,,1,2,3,1,
3+
,EntryNoAscGerm 2,BCR,Test,,,2,,,2,
4+
,EntryNoAscGerm 3,BCR,Test,,,3,,,3,
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
GID,Germplasm Name,Breeding Method,Source,Female Parent GID,Male Parent GID,Entry No,Female Parent Entry No,Male Parent Entry No,External UID,Synonyms
2+
,EntryNoDescGerm 1,BCR,Test,,,3,,,3,
3+
,EntryNoDescGerm 2,BCR,Test,,,2,,,2,
4+
,EntryNoDescGerm 3,BCR,Test,,,1,2,3,1,

0 commit comments

Comments
 (0)