Skip to content

Commit 3018cc0

Browse files
First implementation of foreign key constraints across resources
1 parent b4ea07b commit 3018cc0

File tree

19 files changed

+574
-44
lines changed

19 files changed

+574
-44
lines changed

src/main/java/io/frictionlessdata/datapackage/Package.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,6 @@ private void setJson(ObjectNode jsonNodeSource) throws Exception {
754754
if(this.strictValidation){
755755
this.jsonObject = null;
756756
this.resources.clear();
757-
758757
throw dpe;
759758
}else{
760759
if (dpe instanceof DataPackageValidationException)

src/main/java/io/frictionlessdata/datapackage/fk/PackageForeignKey.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,16 @@ public void validate() throws Exception {
6969

7070
}
7171
}
72+
73+
public ForeignKey getForeignKey() {
74+
return fk;
75+
}
76+
77+
public Package getDatapackage() {
78+
return datapackage;
79+
}
80+
81+
public Resource<?> getResource() {
82+
return resource;
83+
}
7284
}

src/main/java/io/frictionlessdata/datapackage/resource/AbstractResource.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import io.frictionlessdata.datapackage.exceptions.DataPackageValidationException;
1414
import io.frictionlessdata.datapackage.fk.PackageForeignKey;
1515
import io.frictionlessdata.tableschema.Table;
16+
import io.frictionlessdata.tableschema.exception.ForeignKeyException;
1617
import io.frictionlessdata.tableschema.exception.TypeInferringException;
1718
import io.frictionlessdata.tableschema.field.Field;
1819
import io.frictionlessdata.tableschema.fk.ForeignKey;
@@ -34,6 +35,7 @@
3435
import java.nio.file.Files;
3536
import java.nio.file.Path;
3637
import java.util.*;
38+
import java.util.stream.Collectors;
3739

3840
/**
3941
* Abstract base implementation of a Resource.
@@ -247,6 +249,7 @@ public List<Table> getTables() throws Exception {
247249

248250
public void checkRelations(Package pkg) {
249251
if (null != schema) {
252+
List<PackageForeignKey> fks = new ArrayList<>();
250253
for (ForeignKey fk : schema.getForeignKeys()) {
251254
String resourceName = fk.getReference().getResource();
252255
Resource referencedResource;
@@ -261,12 +264,56 @@ public void checkRelations(Package pkg) {
261264
}
262265
try {
263266
PackageForeignKey pFK = new PackageForeignKey(fk, this, pkg);
267+
fks.add(pFK);
264268
pFK.validate();
265269
} catch (Exception e) {
266270
throw new DataPackageValidationException("Foreign key validation failed: " + resourceName, e);
267271
}
268272
}
269273
}
274+
275+
try {
276+
Map<PackageForeignKey, List<Object>> map = new HashMap<>();
277+
for (PackageForeignKey fk : fks) {
278+
String refResourceName = fk.getForeignKey().getReference().getResource();
279+
Resource refResource = pkg.getResource(refResourceName);
280+
List<Object> data = refResource.getData(true, false, true, false);
281+
map.put(fk, data);
282+
}
283+
List<Object> data = this.getData(true, false, true, false);
284+
for (Object d : data) {
285+
Map<String, Object> row = (Map<String, Object>) d;
286+
for (String key : row.keySet()) {
287+
for (PackageForeignKey fk : map.keySet()) {
288+
if (fk.getForeignKey().getFieldNames().contains(key)) {
289+
List<Object>refData = (List<Object>) map.get(fk);
290+
Map<String, String> fieldMapping = fk.getForeignKey().getFieldMapping();
291+
String refFieldName = fieldMapping.get(key);
292+
Object fkVal = row.get(key);
293+
boolean found = false;
294+
295+
for (Object refRow : refData) {
296+
Map<String, Object> refRowMap = (Map<String, Object>) refRow;
297+
Object refVal = refRowMap.get(refFieldName);
298+
if (Objects.equals(fkVal, refVal)) {
299+
found = true;
300+
break;
301+
}
302+
}
303+
if (!found) {
304+
throw new ForeignKeyException("Foreign key validation failed: " + fk.getForeignKey().getFieldNames() + " -> " + fk.getForeignKey().getReference().getFieldNames() + ": '" + fkVal + "' not found in resource '"+fk.getForeignKey().getReference().getResource()+"'");
305+
}
306+
}
307+
}
308+
}
309+
310+
}
311+
312+
System.out.println("Data: "+data);
313+
314+
} catch (Exception e) {
315+
throw new DataPackageValidationException("Error reading data with relations: " + e.getMessage(), e);
316+
}
270317
}
271318
}
272319

src/main/java/io/frictionlessdata/datapackage/resource/Resource.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package io.frictionlessdata.datapackage.resource;
22

3+
import com.fasterxml.jackson.annotation.JacksonInject;
4+
import com.fasterxml.jackson.annotation.JsonCreator;
35
import com.fasterxml.jackson.annotation.JsonIgnore;
46
import com.fasterxml.jackson.databind.JsonNode;
57
import com.fasterxml.jackson.databind.node.ArrayNode;
@@ -302,7 +304,11 @@ public interface Resource<T> extends BaseInterface {
302304
* @throws DataPackageException for invalid data
303305
* @throws Exception if other operation fails.
304306
*/
305-
static AbstractResource build(ObjectNode resourceJson, Object basePath, boolean isArchivePackage) throws IOException, DataPackageException, Exception {
307+
308+
static AbstractResource build(
309+
ObjectNode resourceJson,
310+
Object basePath,
311+
boolean isArchivePackage) throws IOException, DataPackageException, Exception {
306312
String name = textValueOrNull(resourceJson, JSONBase.JSON_KEY_NAME);
307313
Object path = resourceJson.get(JSONBase.JSON_KEY_PATH);
308314
Object data = resourceJson.get(JSONBase.JSON_KEY_DATA);
@@ -337,8 +343,11 @@ static AbstractResource build(ObjectNode resourceJson, Object basePath, boolean
337343
resource = new JSONDataResource(name, data.toString());
338344
} else if (format.equals(Resource.FORMAT_JSON))
339345
resource = new JSONDataResource(name, data.toString());
340-
else if (format.equals(Resource.FORMAT_CSV))
341-
resource = new CSVDataResource(name, data.toString());
346+
else if (format.equals(Resource.FORMAT_CSV)) {
347+
// data is in inline CSV format like "data": "A,B,C\n1,2,3\n4,5,6"
348+
String dataString = ((TextNode)data).textValue().replaceAll("\\\\n", "\n");
349+
resource = new CSVDataResource(name, dataString);
350+
}
342351
} else {
343352
throw new DataPackageValidationException(
344353
"Invalid Resource. The path property or the data and format properties cannot be null.");
Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,39 @@
11
package io.frictionlessdata.datapackage;
22

3+
import io.frictionlessdata.datapackage.exceptions.DataPackageValidationException;
34
import io.frictionlessdata.datapackage.resource.Resource;
5+
import io.frictionlessdata.tableschema.exception.ForeignKeyException;
6+
import io.frictionlessdata.tableschema.exception.TableValidationException;
7+
import org.junit.jupiter.api.Assertions;
48
import org.junit.jupiter.api.DisplayName;
59
import org.junit.jupiter.api.Test;
610

711
import java.nio.file.Path;
812
import java.util.List;
913

14+
import static org.junit.jupiter.api.Assertions.assertThrows;
15+
1016
public class ForeignKeysTest {
1117

1218
@Test
13-
@DisplayName("Test that foreign keys are validated correctly")
14-
void testValidationURLAsSchemaReference() throws Exception{
15-
Path resourcePath = TestUtil.getResourcePath("/fixtures/datapackages/foreign-keys.json");
19+
@DisplayName("Test that foreign keys are validated correctly, good case")
20+
void testForeignKeysGoodCase() throws Exception{
21+
Path resourcePath = TestUtil.getResourcePath("/fixtures/datapackages/foreign_keys_valid.json");
22+
Package pkg = new Package(resourcePath, true);
23+
pkg.getResource("teams");
24+
}
25+
26+
@Test
27+
@DisplayName("Test that foreign keys are validated correctly, bad case")
28+
void testForeignKeysBadCase() throws Exception{
29+
Path resourcePath = TestUtil.getResourcePath("/fixtures/datapackages/foreign_keys_invalid.json");
1630
Package pkg = new Package(resourcePath, true);
1731
Resource teams = pkg.getResource("teams");
18-
teams.checkRelations(pkg);
19-
List data = teams.getData(true);
20-
System.out.println("Data: " + data);
32+
33+
DataPackageValidationException ex = assertThrows(DataPackageValidationException.class,
34+
() -> teams.checkRelations(pkg));
35+
Throwable cause = ex.getCause();
36+
Assertions.assertInstanceOf(ForeignKeyException.class, cause);
37+
Assertions.assertEquals("Foreign key validation failed: [city] -> [name]: 'Munich' not found in resource 'cities'", cause.getMessage());
2138
}
2239
}

src/test/java/io/frictionlessdata/datapackage/PackageTest.java

Lines changed: 59 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import io.frictionlessdata.datapackage.exceptions.DataPackageValidationException;
99
import io.frictionlessdata.datapackage.resource.*;
1010
import io.frictionlessdata.tableschema.exception.ConstraintsException;
11+
import io.frictionlessdata.tableschema.exception.TableValidationException;
1112
import io.frictionlessdata.tableschema.exception.ValidationException;
1213
import io.frictionlessdata.tableschema.field.DateField;
1314
import io.frictionlessdata.tableschema.schema.Schema;
@@ -24,12 +25,9 @@
2425
import java.nio.file.Path;
2526
import java.nio.file.Paths;
2627
import java.nio.file.attribute.FileAttribute;
27-
import java.security.DigestInputStream;
2828
import java.security.MessageDigest;
29-
import java.security.NoSuchAlgorithmException;
3029
import java.time.ZonedDateTime;
3130
import java.util.*;
32-
import java.util.stream.Collectors;
3331

3432
import static io.frictionlessdata.datapackage.Profile.*;
3533
import static io.frictionlessdata.datapackage.TestUtil.getBasePath;
@@ -982,39 +980,66 @@ void validateDataPackage() throws Exception {
982980
}
983981

984982
@Test
985-
@Disabled
986-
@DisplayName("Datapackage with same data in different formats")
983+
@DisplayName("Datapackage with same data in different formats, lenient validation")
987984
void validateDataPackageDifferentFormats() throws Exception {
988-
Path resourcePath = TestUtil.getResourcePath("/fixtures/datapackages/different-data-formats/datapackage.json");
985+
Path resourcePath = TestUtil.getResourcePath("/fixtures/datapackages/different-data-formats_incl_invalid/datapackage.json");
986+
Package dp = new Package(resourcePath, false);
987+
List<Object[]> teamsWithHeaders = dp.getResource("teams_with_headers_csv_file_with_schema").getData(false, false, true, false);
988+
List<Object[]> teamsWithHeadersCsvFileNoSchema = dp.getResource("teams_with_headers_csv_file_no_schema").getData(false, false, true, false);
989+
List<Object[]> teamsNoHeadersCsvFileNoSchema = dp.getResource("teams_no_headers_csv_file_no_schema").getData(false, false, true, false);
990+
List<Object[]> teamsNoHeadersCsvInlineNoSchema = dp.getResource("teams_no_headers_inline_csv_no_schema").getData(false, false, true, false);
991+
992+
List<Object[]> teamsArraysInline = dp.getResource("teams_arrays_inline_with_headers_with_schema").getData(false, false, true, false);
993+
List<Object[]> teamsObjectsInline = dp.getResource("teams_objects_inline_with_schema").getData(false, false, true, false);
994+
List<Object[]> teamsArrays = dp.getResource("teams_arrays_file_with_headers_with_schema").getData(false, false, true, false);
995+
List<Object[]> teamsObjects = dp.getResource("teams_objects_file_with_schema").getData(false, false, true, false);
996+
List<Object[]> teamsArraysInlineNoSchema = dp.getResource("teams_arrays_inline_with_headers_no_schema").getData(false, false, true, false);
997+
998+
// ensure tables without headers throw errors on reading if a Schema is set
999+
TableValidationException ex = assertThrows(TableValidationException.class,
1000+
() -> dp.getResource("teams_arrays_no_headers_inline_with_schema").getData(false, false, true, false));
1001+
Assertions.assertEquals("Field 'id' not found in table headers or table has no headers.", ex.getMessage());
1002+
1003+
TableValidationException ex2 = assertThrows(TableValidationException.class,
1004+
() -> dp.getResource("teams_no_headers_inline_csv_with_schema").getData(false, false, true, false));
1005+
Assertions.assertEquals("Field 'id' not found in table headers or table has no headers.", ex2.getMessage());
1006+
1007+
TableValidationException ex3 = assertThrows(TableValidationException.class,
1008+
() -> dp.getResource("teams_no_headers_csv_file_with_schema").getData(false, false, true, false));
1009+
Assertions.assertEquals("Field 'id' not found in table headers or table has no headers.", ex3.getMessage());
1010+
1011+
Assertions.assertArrayEquals(getFullTeamsData().toArray(), teamsWithHeaders.toArray());
1012+
Assertions.assertArrayEquals(getFullTeamsData().toArray(), teamsArraysInline.toArray());
1013+
Assertions.assertArrayEquals(getFullTeamsData().toArray(), teamsObjectsInline.toArray());
1014+
Assertions.assertArrayEquals(getFullTeamsData().toArray(), teamsArrays.toArray());
1015+
Assertions.assertArrayEquals(getFullTeamsData().toArray(), teamsObjects.toArray());
1016+
1017+
// those without Schema lose the type information. With header row means all data is there
1018+
Assertions.assertArrayEquals(getFullTeamsDataString().toArray(), teamsWithHeadersCsvFileNoSchema.toArray());
1019+
Assertions.assertArrayEquals(getFullTeamsDataString().toArray(), teamsArraysInlineNoSchema.toArray());
1020+
1021+
// those without a header row and with no Schema will lose the first row of data (skipped as a header row). Seems wrong but that's what the python port does
1022+
Assertions.assertArrayEquals(getTeamsDataStringMissingFirstRow().toArray(), teamsNoHeadersCsvFileNoSchema.toArray());
1023+
Assertions.assertArrayEquals(getTeamsDataStringMissingFirstRow().toArray(), teamsNoHeadersCsvInlineNoSchema.toArray());
1024+
}
1025+
1026+
@Test
1027+
@DisplayName("Datapackage with same data in different valid formats, strict validation")
1028+
void validateDataPackageDifferentFormatsStrict() throws Exception {
1029+
Path resourcePath = TestUtil.getResourcePath("/fixtures/datapackages/different-valid-data-formats/datapackage.json");
9891030
Package dp = new Package(resourcePath, true);
1031+
9901032
List<Object[]> teamsWithHeaders = dp.getResource("teams_with_headers_csv_file").getData(false, false, true, false);
991-
List<Object[]> teamsNoHeaders = dp.getResource("teams_arrays_no_headers_inline").getData(false, false, true, false);
992-
List<Object[]> teamsNoHeadersCsv = dp.getResource("teams_no_headers_inline_csv").getData(false, false, true, false);
993-
List<Object[]> teamsNoHeadersFile = dp.getResource("teams_no_headers_csv_file").getData(false, false, true, false);
9941033
List<Object[]> teamsArraysInline = dp.getResource("teams_arrays_inline").getData(false, false, true, false);
9951034
List<Object[]> teamsObjectsInline = dp.getResource("teams_objects_inline").getData(false, false, true, false);
9961035
List<Object[]> teamsArrays = dp.getResource("teams_arrays_file").getData(false, false, true, false);
9971036
List<Object[]> teamsObjects = dp.getResource("teams_objects_file").getData(false, false, true, false);
9981037

999-
// Assert the validation messages
1000-
System.out.println(teamsWithHeaders.stream().map(Arrays::toString).collect(Collectors.joining("\n")));
1001-
System.out.println(teamsNoHeaders.stream().map(Arrays::toString).collect(Collectors.joining("\n")));
1002-
System.out.println(teamsNoHeadersCsv.stream().map(Arrays::toString).collect(Collectors.joining("\n")));
1003-
System.out.println(teamsNoHeadersFile.stream().map(Arrays::toString).collect(Collectors.joining("\n")));
1004-
System.out.println(teamsArraysInline.stream().map(Arrays::toString).collect(Collectors.joining("\n")));
1005-
System.out.println(teamsObjectsInline.stream().map(Arrays::toString).collect(Collectors.joining("\n")));
1006-
System.out.println(teamsArrays.stream().map(Arrays::toString).collect(Collectors.joining("\n")));
1007-
System.out.println(teamsObjects.stream().map(Arrays::toString).collect(Collectors.joining("\n")));
1008-
10091038
Assertions.assertArrayEquals(teamsWithHeaders.toArray(), getFullTeamsData().toArray());
10101039
Assertions.assertArrayEquals(teamsArraysInline.toArray(), getFullTeamsData().toArray());
10111040
Assertions.assertArrayEquals(teamsObjectsInline.toArray(), getFullTeamsData().toArray());
10121041
Assertions.assertArrayEquals(teamsArrays.toArray(), getFullTeamsData().toArray());
10131042
Assertions.assertArrayEquals(teamsObjects.toArray(), getFullTeamsData().toArray());
1014-
1015-
// those without a header row will lose the first row of data. Seems wrong but that's what the python port does
1016-
Assertions.assertArrayEquals(teamsNoHeaders.toArray(), getTeamsDataMissingFirstRow().toArray());
1017-
Assertions.assertArrayEquals(teamsNoHeadersFile.toArray(), getTeamsDataMissingFirstRow().toArray());
10181043
}
10191044

10201045
private static List<Object[]> getFullTeamsData() {
@@ -1025,10 +1050,18 @@ private static List<Object[]> getFullTeamsData() {
10251050
return expectedData;
10261051
}
10271052

1028-
private static List<Object[]> getTeamsDataMissingFirstRow() {
1053+
private static List<Object[]> getFullTeamsDataString() {
10291054
List<Object[]> expectedData = new ArrayList<>();
1030-
expectedData.add(new Object[]{BigInteger.valueOf(2), "Real", "Madrid"});
1031-
expectedData.add(new Object[]{BigInteger.valueOf(3), "Bayern", "Munich"});
1055+
expectedData.add(new Object[]{"1", "Arsenal", "London"});
1056+
expectedData.add(new Object[]{"2", "Real", "Madrid"});
1057+
expectedData.add(new Object[]{"3", "Bayern", "Munich"});
1058+
return expectedData;
1059+
}
1060+
1061+
private static List<Object[]> getTeamsDataStringMissingFirstRow() {
1062+
List<Object[]> expectedData = new ArrayList<>();
1063+
expectedData.add(new Object[]{"2", "Real", "Madrid"});
1064+
expectedData.add(new Object[]{"3", "Bayern", "Munich"});
10321065
return expectedData;
10331066
}
10341067

src/test/java/io/frictionlessdata/datapackage/resource/ResourceTest.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package io.frictionlessdata.datapackage.resource;
22

3+
import com.fasterxml.jackson.core.type.TypeReference;
4+
import com.fasterxml.jackson.databind.InjectableValues;
35
import com.fasterxml.jackson.databind.node.ArrayNode;
46
import com.fasterxml.jackson.databind.node.ObjectNode;
57
import io.frictionlessdata.datapackage.Package;
@@ -262,6 +264,9 @@ public void testBuildAndIterateDataFromCsvFormat() throws Exception{
262264
Iterator<String[]> iter = resource.objectArrayIterator();
263265
int expectedDataIndex = 0;
264266

267+
// check that data was read
268+
Assertions.assertTrue(iter.hasNext());
269+
265270
// Assert data.
266271
while(iter.hasNext()){
267272
String[] record = iter.next();
@@ -412,7 +417,7 @@ public void testBuildAndIterateDataFromJSONFormat() throws Exception{
412417
String dataString = getFileContents("/fixtures/resource/valid_json_array_resource.json");
413418
Resource resource = Resource.build((ObjectNode) JsonUtil.getInstance().createNode(dataString), getBasePath(), false);
414419

415-
// Expected data.
420+
// Expected data.
416421
List<String[]> expectedData = this.getExpectedPopulationData();
417422

418423
// Get Iterator.

src/test/resources/fixtures/datapackages/different-data-formats/data/teams_arrays.json renamed to src/test/resources/fixtures/datapackages/different-data-formats_incl_invalid/data/teams_arrays.json

File renamed without changes.

src/test/resources/fixtures/datapackages/different-data-formats/data/teams_no_headers.csv renamed to src/test/resources/fixtures/datapackages/different-data-formats_incl_invalid/data/teams_no_headers.csv

File renamed without changes.

src/test/resources/fixtures/datapackages/different-data-formats/data/teams_objects.json renamed to src/test/resources/fixtures/datapackages/different-data-formats_incl_invalid/data/teams_objects.json

File renamed without changes.

0 commit comments

Comments
 (0)