From 3b0310d6d4ddefc802d2c92f504dad636f1c7166 Mon Sep 17 00:00:00 2001 From: Masayuki Takahashi Date: Mon, 11 Feb 2019 18:56:43 +0900 Subject: [PATCH 1/3] PARQUET-1527: [parquet-tools] cat command throw java.lang.ClassCastException --- .../tools/read/SimpleRecordConverter.java | 1 + .../tools/read/TestSimpleRecordConverter.java | 147 ++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleRecordConverter.java b/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleRecordConverter.java index 7a1c81d6f8..4e6129043e 100644 --- a/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleRecordConverter.java +++ b/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleRecordConverter.java @@ -71,6 +71,7 @@ public Optional visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotat } }).orElse(new SimplePrimitiveConverter(field.getName())); } + return new SimplePrimitiveConverter(field.getName()); } GroupType groupType = field.asGroupType(); diff --git a/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java new file mode 100644 index 0000000000..a2d7cb6822 --- /dev/null +++ b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.parquet.tools.read; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.column.ParquetProperties; +import org.apache.parquet.example.data.Group; +import org.apache.parquet.example.data.simple.SimpleGroupFactory; +import org.apache.parquet.hadoop.ParquetReader; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.example.GroupWriteSupport; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Type; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; + + +public class TestSimpleRecordConverter { + + private static final String INT32_FIELD = "int32_field"; + private static final String INT64_FIELD = "int64_field"; + private static final String FLOAT_FIELD = "float_field"; + private static final String DOUBLE_FIELD = "double_field"; + private static final String BINARY_FIELD = "binary_field"; + private static final String FIXED_LEN_BYTE_ARRAY_FIELD = "flba_field"; + + + private File testFile; + + @Test + public void testConverter() throws IOException { + ParquetReader reader = + ParquetReader.builder(new SimpleReadSupport(), new Path(this.testFile.getAbsolutePath())).build(); + for (SimpleRecord record = reader.read(); record != null; record = reader.read()) { + for (SimpleRecord.NameValue value : record.getValues()) { + switch(value.getName()) { + case INT32_FIELD: + Assert.assertEquals(32, value.getValue()); + break; + case INT64_FIELD: + Assert.assertEquals(64L, value.getValue()); + break; + case FLOAT_FIELD: + Assert.assertEquals(1.0f, value.getValue()); + break; + case DOUBLE_FIELD: + Assert.assertEquals(2.0d, value.getValue()); + break; + case BINARY_FIELD: + Assert.assertArrayEquals("foobar".getBytes(), (byte[])value.getValue()); + break; + case FIXED_LEN_BYTE_ARRAY_FIELD: + Assert.assertArrayEquals(new byte[]{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }, (byte[])value.getValue()); + break; + } + } + } + } + + @Before + public void setUp() throws IOException { + this.testFile = createTempFile(); + MessageType schema = createSchema(); + write(schema, testFile); + } + + @After + public void tearDown() { + this.testFile.delete(); + } + + private MessageType createSchema() { + return new MessageType("schema", + new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT32, INT32_FIELD), + new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT64, INT64_FIELD), + new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.FLOAT, FLOAT_FIELD), + new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.DOUBLE, DOUBLE_FIELD), + new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, BINARY_FIELD), + new PrimitiveType(Type.Repetition.REQUIRED, + PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, 12, FIXED_LEN_BYTE_ARRAY_FIELD) + ); + } + + private void write(MessageType schema, File f) throws IOException { + Path fsPpath = new Path(f.getPath()); + Configuration conf = new Configuration(); + SimpleGroupFactory fact = new SimpleGroupFactory(schema); + GroupWriteSupport.setSchema(schema, conf); + + ParquetWriter writer = new ParquetWriter( + fsPpath, + new GroupWriteSupport(), + CompressionCodecName.UNCOMPRESSED, + 1024, + 1024, + 512, + true, + false, + ParquetProperties.WriterVersion.PARQUET_2_0, + conf); + try { + writer.write(fact.newGroup() + .append(INT32_FIELD, 32) + .append(INT64_FIELD, 64L) + .append(FLOAT_FIELD, 1.0f) + .append(DOUBLE_FIELD, 2.0d) + .append(BINARY_FIELD, Binary.fromString("foobar")) + .append(FIXED_LEN_BYTE_ARRAY_FIELD, + Binary.fromConstantByteArray(new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }))); + } finally { + writer.close(); + } + } + + private File createTempFile() throws IOException { + File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); + tmp.deleteOnExit(); + tmp.delete(); + return tmp; + } +} From 83d0a6b12f83606f81b119a27b47b607b2e1dea7 Mon Sep 17 00:00:00 2001 From: Masayuki Takahashi Date: Mon, 11 Feb 2019 20:52:26 +0900 Subject: [PATCH 2/3] PARQUET-1527: [parquet-tools] cat command throw java.lang.ClassCastException - Use TemporaryFolder instead of File.createTempFile --- .../tools/read/TestSimpleRecordConverter.java | 32 ++++++------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java index a2d7cb6822..6ac0648ddc 100644 --- a/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java +++ b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java @@ -32,15 +32,12 @@ import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.*; +import org.junit.rules.TemporaryFolder; import java.io.File; import java.io.IOException; - public class TestSimpleRecordConverter { private static final String INT32_FIELD = "int32_field"; @@ -50,13 +47,13 @@ public class TestSimpleRecordConverter { private static final String BINARY_FIELD = "binary_field"; private static final String FIXED_LEN_BYTE_ARRAY_FIELD = "flba_field"; - - private File testFile; + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); @Test public void testConverter() throws IOException { ParquetReader reader = - ParquetReader.builder(new SimpleReadSupport(), new Path(this.testFile.getAbsolutePath())).build(); + ParquetReader.builder(new SimpleReadSupport(), new Path(testFile().getAbsolutePath())).build(); for (SimpleRecord record = reader.read(); record != null; record = reader.read()) { for (SimpleRecord.NameValue value : record.getValues()) { switch(value.getName()) { @@ -85,14 +82,8 @@ public void testConverter() throws IOException { @Before public void setUp() throws IOException { - this.testFile = createTempFile(); MessageType schema = createSchema(); - write(schema, testFile); - } - - @After - public void tearDown() { - this.testFile.delete(); + write(schema); } private MessageType createSchema() { @@ -107,8 +98,8 @@ private MessageType createSchema() { ); } - private void write(MessageType schema, File f) throws IOException { - Path fsPpath = new Path(f.getPath()); + private void write(MessageType schema) throws IOException { + Path fsPpath = new Path(testFile().getPath()); Configuration conf = new Configuration(); SimpleGroupFactory fact = new SimpleGroupFactory(schema); GroupWriteSupport.setSchema(schema, conf); @@ -138,10 +129,7 @@ private void write(MessageType schema, File f) throws IOException { } } - private File createTempFile() throws IOException { - File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); - tmp.deleteOnExit(); - tmp.delete(); - return tmp; + private File testFile() { + return new File(this.tempFolder.getRoot(), getClass().getSimpleName() + ".parquet"); } } From fd86f7fcc54b7b86bfe6bc17db6996092687327d Mon Sep 17 00:00:00 2001 From: Masayuki Takahashi Date: Mon, 11 Feb 2019 21:05:46 +0900 Subject: [PATCH 3/3] PARQUET-1527: [parquet-tools] cat command throw java.lang.ClassCastException - Fix some issues on the tests --- .../tools/read/TestSimpleRecordConverter.java | 90 ++++++++++--------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java index 6ac0648ddc..69a339cdc3 100644 --- a/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java +++ b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleRecordConverter.java @@ -32,7 +32,10 @@ import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; -import org.junit.*; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.io.File; @@ -52,29 +55,31 @@ public class TestSimpleRecordConverter { @Test public void testConverter() throws IOException { - ParquetReader reader = - ParquetReader.builder(new SimpleReadSupport(), new Path(testFile().getAbsolutePath())).build(); - for (SimpleRecord record = reader.read(); record != null; record = reader.read()) { - for (SimpleRecord.NameValue value : record.getValues()) { - switch(value.getName()) { - case INT32_FIELD: - Assert.assertEquals(32, value.getValue()); - break; - case INT64_FIELD: - Assert.assertEquals(64L, value.getValue()); - break; - case FLOAT_FIELD: - Assert.assertEquals(1.0f, value.getValue()); - break; - case DOUBLE_FIELD: - Assert.assertEquals(2.0d, value.getValue()); - break; - case BINARY_FIELD: - Assert.assertArrayEquals("foobar".getBytes(), (byte[])value.getValue()); - break; - case FIXED_LEN_BYTE_ARRAY_FIELD: - Assert.assertArrayEquals(new byte[]{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }, (byte[])value.getValue()); - break; + try ( + ParquetReader reader = + ParquetReader.builder(new SimpleReadSupport(), new Path(testFile().getAbsolutePath())).build()) { + for (SimpleRecord record = reader.read(); record != null; record = reader.read()) { + for (SimpleRecord.NameValue value : record.getValues()) { + switch(value.getName()) { + case INT32_FIELD: + Assert.assertEquals(32, value.getValue()); + break; + case INT64_FIELD: + Assert.assertEquals(64L, value.getValue()); + break; + case FLOAT_FIELD: + Assert.assertEquals(1.0f, value.getValue()); + break; + case DOUBLE_FIELD: + Assert.assertEquals(2.0d, value.getValue()); + break; + case BINARY_FIELD: + Assert.assertArrayEquals("foobar".getBytes(), (byte[])value.getValue()); + break; + case FIXED_LEN_BYTE_ARRAY_FIELD: + Assert.assertArrayEquals(new byte[]{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }, (byte[])value.getValue()); + break; + } } } } @@ -82,11 +87,10 @@ public void testConverter() throws IOException { @Before public void setUp() throws IOException { - MessageType schema = createSchema(); - write(schema); + createTestParquetFile(); } - private MessageType createSchema() { + private static MessageType createSchema() { return new MessageType("schema", new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT32, INT32_FIELD), new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT64, INT64_FIELD), @@ -98,24 +102,26 @@ private MessageType createSchema() { ); } - private void write(MessageType schema) throws IOException { - Path fsPpath = new Path(testFile().getPath()); + private void createTestParquetFile() throws IOException { + Path fsPath = new Path(testFile().getPath()); Configuration conf = new Configuration(); + + MessageType schema = createSchema(); SimpleGroupFactory fact = new SimpleGroupFactory(schema); GroupWriteSupport.setSchema(schema, conf); - ParquetWriter writer = new ParquetWriter( - fsPpath, - new GroupWriteSupport(), - CompressionCodecName.UNCOMPRESSED, - 1024, - 1024, - 512, - true, - false, - ParquetProperties.WriterVersion.PARQUET_2_0, - conf); - try { + try ( + ParquetWriter writer = new ParquetWriter<>( + fsPath, + new GroupWriteSupport(), + CompressionCodecName.UNCOMPRESSED, + 1024, + 1024, + 512, + true, + false, + ParquetProperties.WriterVersion.PARQUET_2_0, + conf)) { writer.write(fact.newGroup() .append(INT32_FIELD, 32) .append(INT64_FIELD, 64L) @@ -124,8 +130,6 @@ private void write(MessageType schema) throws IOException { .append(BINARY_FIELD, Binary.fromString("foobar")) .append(FIXED_LEN_BYTE_ARRAY_FIELD, Binary.fromConstantByteArray(new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }))); - } finally { - writer.close(); } }