Skip to content

Commit 34f16fb

Browse files
feat: add additional parameters to CsvOptions and ParquetOptions (#3370)
* feat: add additional parameters to CsvOptions and ParquetOptions * fix lint * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Addressed review comments --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent ceb270c commit 34f16fb

File tree

4 files changed

+76
-6
lines changed

4 files changed

+76
-6
lines changed

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@
2626
*/
2727
public final class CsvOptions extends FormatOptions {
2828

29-
private static final long serialVersionUID = 2193570529308612708L;
29+
private static final long serialVersionUID = 2193570529308612709L;
3030

3131
private final Boolean allowJaggedRows;
3232
private final Boolean allowQuotedNewLines;
3333
private final String encoding;
3434
private final String fieldDelimiter;
35+
private final String nullMarker;
3536
private final String quote;
3637
private final Long skipLeadingRows;
3738
private final Boolean preserveAsciiControlCharacters;
@@ -42,6 +43,7 @@ public static final class Builder {
4243
private Boolean allowQuotedNewLines;
4344
private String encoding;
4445
private String fieldDelimiter;
46+
private String nullMarker;
4547
private String quote;
4648
private Long skipLeadingRows;
4749
private Boolean preserveAsciiControlCharacters;
@@ -53,6 +55,7 @@ private Builder(CsvOptions csvOptions) {
5355
this.allowQuotedNewLines = csvOptions.allowQuotedNewLines;
5456
this.encoding = csvOptions.encoding;
5557
this.fieldDelimiter = csvOptions.fieldDelimiter;
58+
this.nullMarker = csvOptions.nullMarker;
5659
this.quote = csvOptions.quote;
5760
this.skipLeadingRows = csvOptions.skipLeadingRows;
5861
this.preserveAsciiControlCharacters = csvOptions.preserveAsciiControlCharacters;
@@ -110,6 +113,18 @@ public Builder setFieldDelimiter(String fieldDelimiter) {
110113
return this;
111114
}
112115

116+
/**
117+
* [Optional] Specifies a string that represents a null value in a CSV file. For example, if you
118+
* specify \"\\N\", BigQuery interprets \"\\N\" as a null value when querying a CSV file. The
119+
* default value is the empty string. If you set this property to a custom value, BigQuery
120+
* throws an error if an empty string is present for all data types except for STRING and BYTE.
121+
* For STRING and BYTE columns, BigQuery interprets the empty string as an empty value.
122+
*/
123+
public Builder setNullMarker(String nullMarker) {
124+
this.nullMarker = nullMarker;
125+
return this;
126+
}
127+
113128
/**
114129
* Sets the value that is used to quote data sections in a CSV file. BigQuery converts the
115130
* string to ISO-8859-1 encoding, and then uses the first byte of the encoded string to split
@@ -154,6 +169,7 @@ private CsvOptions(Builder builder) {
154169
this.allowQuotedNewLines = builder.allowQuotedNewLines;
155170
this.encoding = builder.encoding;
156171
this.fieldDelimiter = builder.fieldDelimiter;
172+
this.nullMarker = builder.nullMarker;
157173
this.quote = builder.quote;
158174
this.skipLeadingRows = builder.skipLeadingRows;
159175
this.preserveAsciiControlCharacters = builder.preserveAsciiControlCharacters;
@@ -192,6 +208,11 @@ public String getFieldDelimiter() {
192208
return fieldDelimiter;
193209
}
194210

211+
/** Returns the string that represents a null value in a CSV file. */
212+
public String getNullMarker() {
213+
return nullMarker;
214+
}
215+
195216
/** Returns the value that is used to quote data sections in a CSV file. */
196217
public String getQuote() {
197218
return quote;
@@ -226,6 +247,7 @@ public String toString() {
226247
.add("allowQuotedNewLines", allowQuotedNewLines)
227248
.add("encoding", encoding)
228249
.add("fieldDelimiter", fieldDelimiter)
250+
.add("nullMarker", nullMarker)
229251
.add("quote", quote)
230252
.add("skipLeadingRows", skipLeadingRows)
231253
.add("preserveAsciiControlCharacters", preserveAsciiControlCharacters)
@@ -240,6 +262,7 @@ public int hashCode() {
240262
allowQuotedNewLines,
241263
encoding,
242264
fieldDelimiter,
265+
nullMarker,
243266
quote,
244267
skipLeadingRows,
245268
preserveAsciiControlCharacters);
@@ -258,6 +281,7 @@ com.google.api.services.bigquery.model.CsvOptions toPb() {
258281
csvOptions.setAllowQuotedNewlines(allowQuotedNewLines);
259282
csvOptions.setEncoding(encoding);
260283
csvOptions.setFieldDelimiter(fieldDelimiter);
284+
csvOptions.setNullMarker(nullMarker);
261285
csvOptions.setQuote(quote);
262286
csvOptions.setSkipLeadingRows(skipLeadingRows);
263287
csvOptions.setPreserveAsciiControlCharacters(preserveAsciiControlCharacters);
@@ -283,6 +307,9 @@ static CsvOptions fromPb(com.google.api.services.bigquery.model.CsvOptions csvOp
283307
if (csvOptions.getFieldDelimiter() != null) {
284308
builder.setFieldDelimiter(csvOptions.getFieldDelimiter());
285309
}
310+
if (csvOptions.getNullMarker() != null) {
311+
builder.setNullMarker(csvOptions.getNullMarker());
312+
}
286313
if (csvOptions.getQuote() != null) {
287314
builder.setQuote(csvOptions.getQuote());
288315
}

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@
2121

2222
public class ParquetOptions extends FormatOptions {
2323

24-
private static final long serialVersionUID = 1992L;
24+
private static final long serialVersionUID = 1993L;
2525

2626
private final Boolean enableListInference;
2727
private final Boolean enumAsString;
28+
private final String mapTargetType;
2829

2930
public Boolean getEnableListInference() {
3031
return enableListInference;
@@ -34,16 +35,23 @@ public Boolean getEnumAsString() {
3435
return enumAsString;
3536
}
3637

38+
/** Returns how the Parquet map is represented. */
39+
public String getMapTargetType() {
40+
return mapTargetType;
41+
}
42+
3743
/** A builder for {@code ParquetOptions} objects. */
3844
public static final class Builder {
3945
private Boolean enableListInference;
4046
private Boolean enumAsString;
47+
private String mapTargetType;
4148

4249
private Builder() {}
4350

4451
private Builder(ParquetOptions parquetOptions) {
4552
this.enableListInference = parquetOptions.enableListInference;
4653
this.enumAsString = parquetOptions.enumAsString;
54+
this.mapTargetType = parquetOptions.mapTargetType;
4755
}
4856

4957
public Builder setEnableListInference(Boolean enableListInference) {
@@ -56,6 +64,17 @@ public Builder setEnumAsString(Boolean enumAsString) {
5664
return this;
5765
}
5866

67+
/**
68+
* [Optional] Indicates how to represent a Parquet map if present.
69+
*
70+
* @see <a href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#maptargettype">
71+
* MapTargetType</a>
72+
*/
73+
public Builder setMapTargetType(String mapTargetType) {
74+
this.mapTargetType = mapTargetType;
75+
return this;
76+
}
77+
5978
public ParquetOptions build() {
6079
return new ParquetOptions(this);
6180
}
@@ -69,19 +88,21 @@ public Builder toBuilder() {
6988
super(FormatOptions.PARQUET);
7089
enableListInference = builder.enableListInference;
7190
enumAsString = builder.enumAsString;
91+
mapTargetType = builder.mapTargetType;
7292
}
7393

7494
@Override
7595
public String toString() {
7696
return MoreObjects.toStringHelper(this)
7797
.add("enableListInference", enableListInference)
7898
.add("enumAsString", enumAsString)
99+
.add("mapTargetType", mapTargetType)
79100
.toString();
80101
}
81102

82103
@Override
83104
public final int hashCode() {
84-
return Objects.hash(enableListInference, enumAsString);
105+
return Objects.hash(enableListInference, enumAsString, mapTargetType);
85106
}
86107

87108
@Override
@@ -93,7 +114,9 @@ public final boolean equals(Object obj) {
93114
return false;
94115
}
95116
ParquetOptions other = (ParquetOptions) obj;
96-
return enableListInference == other.enableListInference && enumAsString == other.enumAsString;
117+
return enableListInference == other.enableListInference
118+
&& enumAsString == other.enumAsString
119+
&& Objects.equals(mapTargetType, ((ParquetOptions) obj).getMapTargetType());
97120
}
98121

99122
/** Returns a builder for a {@link ParquetOptions} object. */
@@ -110,6 +133,9 @@ static ParquetOptions fromPb(
110133
if (parquetOptions.getEnumAsString() != null) {
111134
builder.setEnumAsString(parquetOptions.getEnumAsString());
112135
}
136+
if (parquetOptions.getMapTargetType() != null) {
137+
builder.setMapTargetType(parquetOptions.getMapTargetType());
138+
}
113139
return builder.build();
114140
}
115141

@@ -122,6 +148,9 @@ com.google.api.services.bigquery.model.ParquetOptions toPb() {
122148
if (enumAsString != null) {
123149
parquetOptions.setEnumAsString(enumAsString);
124150
}
151+
if (mapTargetType != null) {
152+
parquetOptions.setMapTargetType(mapTargetType);
153+
}
125154
return parquetOptions;
126155
}
127156
}

google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/CsvOptionsTest.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ public class CsvOptionsTest {
2828
private static final Boolean ALLOW_QUOTED_NEWLINE = true;
2929
private static final Charset ENCODING = StandardCharsets.UTF_8;
3030
private static final String FIELD_DELIMITER = ",";
31+
private static final String NULL_MARKER = "\\N";
3132
private static final String QUOTE = "\"";
3233
private static final long SKIP_LEADING_ROWS = 42L;
3334

@@ -38,6 +39,7 @@ public class CsvOptionsTest {
3839
.setAllowQuotedNewLines(ALLOW_QUOTED_NEWLINE)
3940
.setEncoding(ENCODING)
4041
.setFieldDelimiter(FIELD_DELIMITER)
42+
.setNullMarker(NULL_MARKER)
4143
.setQuote(QUOTE)
4244
.setSkipLeadingRows(SKIP_LEADING_ROWS)
4345
.setPreserveAsciiControlCharacters(PRESERVE_ASCII_CONTROL_CHARACTERS)
@@ -65,6 +67,7 @@ public void testBuilder() {
6567
assertEquals(ALLOW_QUOTED_NEWLINE, CSV_OPTIONS.allowQuotedNewLines());
6668
assertEquals(ENCODING.name(), CSV_OPTIONS.getEncoding());
6769
assertEquals(FIELD_DELIMITER, CSV_OPTIONS.getFieldDelimiter());
70+
assertEquals(NULL_MARKER, CSV_OPTIONS.getNullMarker());
6871
assertEquals(QUOTE, CSV_OPTIONS.getQuote());
6972
assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.getSkipLeadingRows());
7073
assertEquals(
@@ -84,6 +87,7 @@ private void compareCsvOptions(CsvOptions expected, CsvOptions value) {
8487
assertEquals(expected.allowQuotedNewLines(), value.allowQuotedNewLines());
8588
assertEquals(expected.getEncoding(), value.getEncoding());
8689
assertEquals(expected.getFieldDelimiter(), value.getFieldDelimiter());
90+
assertEquals(expected.getNullMarker(), value.getNullMarker());
8791
assertEquals(expected.getQuote(), value.getQuote());
8892
assertEquals(expected.getSkipLeadingRows(), value.getSkipLeadingRows());
8993
}

google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ParquetOptionsTest.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,23 @@
2424
public class ParquetOptionsTest {
2525

2626
private static final ParquetOptions OPTIONS =
27-
ParquetOptions.newBuilder().setEnableListInference(true).setEnumAsString(true).build();
27+
ParquetOptions.newBuilder()
28+
.setEnableListInference(true)
29+
.setEnumAsString(true)
30+
.setMapTargetType("ARRAY_OF_STRUCT")
31+
.build();
2832

2933
@Test
3034
public void testToBuilder() {
3135
compareParquetOptions(OPTIONS, OPTIONS.toBuilder().build());
3236
ParquetOptions parquetOptions = OPTIONS.toBuilder().setEnableListInference(true).build();
3337
assertEquals(true, parquetOptions.getEnableListInference());
34-
parquetOptions = parquetOptions.toBuilder().setEnumAsString(true).build();
38+
parquetOptions =
39+
parquetOptions
40+
.toBuilder()
41+
.setEnumAsString(true)
42+
.setMapTargetType("ARRAY_OF_STRUCT")
43+
.build();
3544
compareParquetOptions(OPTIONS, parquetOptions);
3645
}
3746

@@ -47,6 +56,7 @@ public void testBuilder() {
4756
assertEquals(FormatOptions.PARQUET, OPTIONS.getType());
4857
assertEquals(true, OPTIONS.getEnableListInference());
4958
assertEquals(true, OPTIONS.getEnumAsString());
59+
assertEquals("ARRAY_OF_STRUCT", OPTIONS.getMapTargetType());
5060
}
5161

5262
@Test

0 commit comments

Comments
 (0)