From 954d9421d2370a05dd8718b00ca57af2d998eba5 Mon Sep 17 00:00:00 2001 From: Nicolas Trinquier Date: Wed, 14 Nov 2018 14:06:02 +0100 Subject: [PATCH 1/2] Allow merging more restrictive field in less restrictive field --- .../org/apache/parquet/schema/GroupType.java | 3 -- .../apache/parquet/schema/PrimitiveType.java | 3 +- .../java/org/apache/parquet/schema/Type.java | 23 ++++++++++++ .../parquet/schema/TestMessageType.java | 17 +++++---- .../org/apache/parquet/schema/TestType.java | 36 +++++++++++++++++++ 5 files changed, 69 insertions(+), 13 deletions(-) create mode 100644 parquet-column/src/test/java/org/apache/parquet/schema/TestType.java diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java index 64e7062959..52184e1044 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java @@ -400,9 +400,6 @@ List mergeFields(GroupType toMerge, boolean strict) { Type merged; if (toMerge.containsField(type.getName())) { Type fieldToMerge = toMerge.getType(type.getName()); - if (fieldToMerge.getRepetition().isMoreRestrictiveThan(type.getRepetition())) { - throw new IncompatibleSchemaModificationException("repetition constraint is more restrictive: can not merge type " + fieldToMerge + " into " + type); - } if (type.getLogicalTypeAnnotation() != null && !type.getLogicalTypeAnnotation().equals(fieldToMerge.getLogicalTypeAnnotation())) { throw new IncompatibleSchemaModificationException("cannot merge logical type " + fieldToMerge.getLogicalTypeAnnotation() + " into " + type.getLogicalTypeAnnotation()); } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index 9ab53b2b03..b59fdec15d 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -750,7 +750,8 @@ protected Type union(Type toMerge, boolean strict) { } } - Types.PrimitiveBuilder builder = Types.primitive(primitive, toMerge.getRepetition()); + Repetition repetition = Repetition.leastRestrictive(this.getRepetition(), toMerge.getRepetition()); + Types.PrimitiveBuilder builder = Types.primitive(primitive, repetition); if (PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY == primitive) { builder.length(length); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index d046957374..dd13ec112a 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -20,6 +20,7 @@ import static org.apache.parquet.Preconditions.checkNotNull; +import java.util.Arrays; import java.util.List; import org.apache.parquet.io.InvalidRecordException; @@ -111,6 +112,28 @@ public boolean isMoreRestrictiveThan(Repetition other) { */ abstract public boolean isMoreRestrictiveThan(Repetition other); + + /** + * @param repetitions repetitions to traverse + * @return the least restrictive repetition of all repetitions provided + */ + public static Repetition leastRestrictive(Repetition... repetitions) { + boolean hasOptional = false; + + for (Repetition repetition : repetitions) { + if (repetition == REPEATED) { + return REPEATED; + } else if (repetition == OPTIONAL) { + hasOptional = true; + } + } + + if (hasOptional) { + return OPTIONAL; + } + + return REQUIRED; + } } private final String name; diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java index e511d4252f..ee7663c31a 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java @@ -20,9 +20,6 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY; import static org.apache.parquet.schema.OriginalType.LIST; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; @@ -30,6 +27,9 @@ import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; import static org.apache.parquet.schema.Type.Repetition.REPEATED; import static org.apache.parquet.schema.Type.Repetition.REQUIRED; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import org.junit.Test; @@ -89,12 +89,11 @@ public void testMergeSchema() { MessageType t4 = new MessageType("root2", new PrimitiveType(REQUIRED, BINARY, "a")); - try { - t3.union(t4); - fail("moving from optional to required"); - } catch (IncompatibleSchemaModificationException e) { - assertEquals("repetition constraint is more restrictive: can not merge type required binary a into optional binary a", e.getMessage()); - } + assertEquals( + t3.union(t4), + new MessageType("root1", + new PrimitiveType(OPTIONAL, BINARY, "a")) + ); assertEquals( t4.union(t3), diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestType.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestType.java new file mode 100644 index 0000000000..5f1c87e168 --- /dev/null +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestType.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.schema; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestType { + @Test + public void test() { + Type.Repetition REQUIRED = Type.Repetition.REQUIRED; + Type.Repetition OPTIONAL = Type.Repetition.OPTIONAL; + Type.Repetition REPEATED = Type.Repetition.REPEATED; + + assertEquals(REPEATED, Type.Repetition.leastRestrictive(REQUIRED, OPTIONAL, REPEATED, REQUIRED, OPTIONAL, REPEATED)); + assertEquals(OPTIONAL, Type.Repetition.leastRestrictive(REQUIRED, OPTIONAL, REQUIRED, OPTIONAL)); + assertEquals(REQUIRED, Type.Repetition.leastRestrictive(REQUIRED, REQUIRED)); + } +} From 51ef1c63134362c54c7829eab7ad51f0a0fde70e Mon Sep 17 00:00:00 2001 From: Nicolas Trinquier Date: Mon, 19 Nov 2018 14:34:57 +0100 Subject: [PATCH 2/2] Make class and function names more explicit --- .../parquet/schema/{TestType.java => TestRepetitionType.java} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename parquet-column/src/test/java/org/apache/parquet/schema/{TestType.java => TestRepetitionType.java} (94%) diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestType.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestRepetitionType.java similarity index 94% rename from parquet-column/src/test/java/org/apache/parquet/schema/TestType.java rename to parquet-column/src/test/java/org/apache/parquet/schema/TestRepetitionType.java index 5f1c87e168..524b03c66c 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestType.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestRepetitionType.java @@ -22,9 +22,9 @@ import static org.junit.Assert.assertEquals; -public class TestType { +public class TestRepetitionType { @Test - public void test() { + public void testLeastRestrictiveRepetition() { Type.Repetition REQUIRED = Type.Repetition.REQUIRED; Type.Repetition OPTIONAL = Type.Repetition.OPTIONAL; Type.Repetition REPEATED = Type.Repetition.REPEATED;