Skip to content

Commit

Permalink
feat: add tests for verifying standard parquet map schema
Browse files Browse the repository at this point in the history
- add tests
- refactor implementation of the original method into smaller
modular methods

[raystack#137]
  • Loading branch information
Meghajit committed May 9, 2022
1 parent e1d080c commit 581ef5e
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,31 @@ public static boolean checkIsLegacySimpleGroupMap(SimpleGroup simpleGroup, Strin
* @return true, if the map structure follows the spec and false otherwise.
*/
public static boolean checkIsStandardSimpleGroupMap(SimpleGroup simpleGroup, String fieldName) {
if (simpleGroup.getType().getType(fieldName) instanceof GroupType) {
GroupType mapType = simpleGroup.getType().getType(fieldName).asGroupType();
if (mapType.asGroupType().getType("key_value") instanceof GroupType) {
GroupType nestedKeyValueMessageType = mapType.asGroupType().getType("key_value").asGroupType();
return (mapType.getRepetition().equals(OPTIONAL)
|| mapType.isRepetition(REQUIRED))
&& mapType.getLogicalTypeAnnotation().equals(LogicalTypeAnnotation.mapType())
&& mapType.getFieldCount() == 1
&& mapType.containsField("key_value")
&& nestedKeyValueMessageType.isRepetition(REPEATED)
&& nestedKeyValueMessageType.asGroupType().containsField("key")
&& nestedKeyValueMessageType.asGroupType().getType("key").isRepetition(REQUIRED);
return applyMapFieldValidations(simpleGroup, fieldName)
&& applyNestedKeyValueFieldValidations(simpleGroup, fieldName);
}

private static boolean applyMapFieldValidations(SimpleGroup simpleGroup, String fieldName) {
Type mapType = simpleGroup.getType().getType(fieldName);
if (mapType instanceof GroupType) {
GroupType mapGroupType = mapType.asGroupType();
return (mapGroupType.getRepetition().equals(OPTIONAL)
|| mapGroupType.isRepetition(REQUIRED))
&& mapGroupType.getLogicalTypeAnnotation().equals(LogicalTypeAnnotation.mapType())
&& mapGroupType.getFieldCount() == 1;
}
return false;
}

private static boolean applyNestedKeyValueFieldValidations(SimpleGroup simpleGroup, String fieldName) {
GroupType mapGroupType = simpleGroup.getType().getType(fieldName).asGroupType();
if (mapGroupType.containsField("key_value")) {
Type nestedKeyValueType = mapGroupType.getType("key_value");
if (nestedKeyValueType instanceof GroupType) {
GroupType nestedKeyValueGroupType = nestedKeyValueType.asGroupType();
return nestedKeyValueGroupType.isRepetition(REPEATED)
&& nestedKeyValueGroupType.containsField("key")
&& nestedKeyValueGroupType.getType("key").isRepetition(REQUIRED);
}
}
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@

import org.apache.parquet.example.data.simple.SimpleGroup;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.junit.Test;

import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
import static org.apache.parquet.schema.Types.buildMessage;
import static org.apache.parquet.schema.Types.optionalGroup;
import static org.apache.parquet.schema.Types.optionalMap;
import static org.apache.parquet.schema.Types.repeatedGroup;
import static org.apache.parquet.schema.Types.requiredGroup;
import static org.junit.Assert.assertFalse;
Expand Down Expand Up @@ -116,7 +120,7 @@ public void checkIsLegacySimpleGroupMapShouldReturnFalseWhenMapFieldSchemaDoesNo
}

@Test
public void checkIsLegacySimpleGroupMapShouldReturnTrueWhenMapFieldConformsToTheLegacySchema() {
public void checkIsLegacySimpleGroupMapShouldOnlyReturnTrueWhenMapFieldConformsToTheLegacySchema() {
GroupType mapSchema = repeatedGroup()
.optional(INT32).named("key")
.optional(FLOAT).named("value")
Expand All @@ -128,4 +132,157 @@ public void checkIsLegacySimpleGroupMapShouldReturnTrueWhenMapFieldConformsToThe

assertTrue(SimpleGroupValidation.checkIsLegacySimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldReturnFalseWhenMapFieldIsNotOfTypeGroupType() {
MessageType parquetSchema = buildMessage()
.required(INT32).named("sample_map_field")
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertFalse(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldReturnFalseWhenMapFieldIsOfRepeatedType() {
GroupType mapType = repeatedGroup()
.required(INT32).named("key_value")
.named("sample_map_field");

MessageType parquetSchema = buildMessage()
.addField(mapType)
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertFalse(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldReturnFalseWhenMapFieldDoesNotContainCorrectLogicalTypeAnnotation() {
GroupType mapType = requiredGroup().as(LogicalTypeAnnotation.enumType())
.required(INT32).named("key_value")
.named("sample_map_field");

MessageType parquetSchema = buildMessage()
.addField(mapType)
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertFalse(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldReturnFalseWhenMapFieldDoesNotContainCorrectNumberOfNestedFields() {
GroupType mapType = requiredGroup().as(LogicalTypeAnnotation.mapType())
.required(INT32).named("key_value")
.required(INT32).named("extra_field")
.named("sample_map_field");

MessageType parquetSchema = buildMessage()
.addField(mapType)
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertFalse(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldReturnFalseWhenMapFieldDoesNotContainNestedKeyValueField() {
GroupType mapType = requiredGroup().as(LogicalTypeAnnotation.mapType())
.required(INT32).named("extra_field")
.named("sample_map_field");

MessageType parquetSchema = buildMessage()
.addField(mapType)
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertFalse(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldReturnFalseWhenNestedKeyValueFieldIsNotAGroupType() {
GroupType mapType = requiredGroup().as(LogicalTypeAnnotation.mapType())
.required(INT32).named("key_value")
.named("sample_map_field");

MessageType parquetSchema = buildMessage()
.addField(mapType)
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertFalse(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldReturnFalseWhenNestedKeyValueFieldIsNotRepeated() {
GroupType keyValueType = optionalGroup()
.required(INT32).named("some_key")
.required(INT32).named("some_value")
.named("key_value");

GroupType mapType = requiredGroup().as(LogicalTypeAnnotation.mapType())
.addField(keyValueType)
.named("sample_map_field");

MessageType parquetSchema = buildMessage()
.addField(mapType)
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertFalse(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldReturnFalseWhenNestedKeyValueGroupDoesNotContainKeyField() {
GroupType keyValueType = repeatedGroup()
.required(INT32).named("some_key")
.required(INT32).named("some_value")
.named("key_value");

GroupType mapType = requiredGroup().as(LogicalTypeAnnotation.mapType())
.addField(keyValueType)
.named("sample_map_field");

MessageType parquetSchema = buildMessage()
.addField(mapType)
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertFalse(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldReturnFalseWhenNestedKeyValueGroupDoesNotHaveKeyAsRequired() {
GroupType keyValueType = repeatedGroup()
.optional(INT32).named("key")
.required(INT32).named("some_value")
.named("key_value");

GroupType mapType = requiredGroup().as(LogicalTypeAnnotation.mapType())
.addField(keyValueType)
.named("sample_map_field");

MessageType parquetSchema = buildMessage()
.addField(mapType)
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertFalse(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}

@Test
public void checkIsStandardSimpleGroupMapShouldOnlyReturnTrueWhenMapFieldConformsToTheStandardSpec() {
GroupType mapType = optionalMap()
.key(PrimitiveType.PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType())
.optionalValue(PrimitiveType.PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType())
.named("sample_map_field");

MessageType parquetSchema = buildMessage()
.addField(mapType)
.named("TestMessage");
SimpleGroup simpleGroup = new SimpleGroup(parquetSchema);

assertTrue(SimpleGroupValidation.checkIsStandardSimpleGroupMap(simpleGroup, "sample_map_field"));
}
}

0 comments on commit 581ef5e

Please sign in to comment.