Joe Tsai 2d80e9b3ab encoding/prototext: adjust handling of invalid UTF-8
The following changes are made:
* Permit invalid UTF-8 in proto2. This goes against specified behavior,
but matches functional behavior in wire marshaling (not just for Go,
but also in the other major language implementations as well).
* The Format function is specified as ignoring errors since its intended
purpose is to surface information to the human user even if it's not
exactly parsible back into a message. As such, add an unexported
allowInvalidUTF8 option that is specially used by Format.
* Add an EmitASCII option that forces the formatting of
strings and bytes to always be encoded as ASCII.
This ensures that the entire output is always ASCII as well.

Note that we do not replicate this behavior for protojson since:
* The JSON format fundamentally has a stricter and well-specified
grammar for exactly what is valid/invalid, while the text format
has not had a well-specified grammar for the longest time,
leading to all sorts of weird usages due to Hyrum's law.
* This is to ease migration from the legacy implementation,
which did permit invalid UTF-8 in proto2.
* The EmitASCII option relies on the ability to always escape
Unicode characters using ASCII escape sequences, but this is not
possible in JSON since the grammar only has an escape sequence defined
for Unicode characters \u0000 to \uffff, inclusive.
However, Unicode v12.0.0 defines characters up to \U0010FFFF,
which is beyond what the JSON grammar provides escape sequences for.

Change-Id: I2b524a904e9ec59f9ed5500e299613bc27c31a14
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/233077
Reviewed-by: Herbie Ong <herbie@google.com>
2020-05-13 05:25:02 +00:00

116 lines
2.5 KiB
Protocol Buffer

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Test Protobuf definitions with proto3 syntax.
syntax = "proto3";
package pb3;
option go_package = "google.golang.org/protobuf/internal/testprotos/textpb3";
// Scalars contains scalar field types.
message Scalars {
bool s_bool = 1;
int32 s_int32 = 2;
int64 s_int64 = 3;
uint32 s_uint32 = 4;
uint64 s_uint64 = 5;
sint32 s_sint32 = 6;
sint64 s_sint64 = 7;
fixed32 s_fixed32 = 8;
fixed64 s_fixed64 = 9;
sfixed32 s_sfixed32 = 10;
sfixed64 s_sfixed64 = 11;
// Textproto marshal outputs fields in the same order as this proto
// definition regardless of field number. Following fields are intended to
// test that assumption.
float s_float = 20;
double s_double = 21;
bytes s_bytes = 14;
string s_string = 13;
}
// Message contains repeated fields.
message Repeats {
repeated bool rpt_bool = 1;
repeated int32 rpt_int32 = 2;
repeated int64 rpt_int64 = 3;
repeated uint32 rpt_uint32 = 4;
repeated uint64 rpt_uint64 = 5;
repeated float rpt_float = 6;
repeated double rpt_double = 7;
repeated string rpt_string = 8;
repeated bytes rpt_bytes = 9;
}
message Proto3Optional {
optional bool opt_bool = 1;
optional int32 opt_int32 = 2;
optional int64 opt_int64 = 3;
optional uint32 opt_uint32 = 4;
optional uint64 opt_uint64 = 5;
optional float opt_float = 6;
optional double opt_double = 7;
optional string opt_string = 8;
optional bytes opt_bytes = 9;
optional Enum opt_enum = 10;
optional Nested opt_message = 11;
}
enum Enum {
ZERO = 0;
ONE = 1;
TWO = 2;
TEN = 10;
}
// Message contains enum fields.
message Enums {
Enum s_enum = 1;
enum NestedEnum {
CERO = 0;
UNO = 1;
DOS = 2;
DIEZ = 10;
}
NestedEnum s_nested_enum = 3;
}
// Message contains nested message field.
message Nests {
Nested s_nested = 2;
}
// Message type used as submessage.
message Nested {
string s_string = 1;
Nested s_nested = 2;
}
// Message contains oneof field.
message Oneofs {
oneof union {
Enum oneof_enum = 1;
string oneof_string = 2;
Nested oneof_nested = 3;
}
}
// Message contains map fields.
message Maps {
map<int32, string> int32_to_str = 1;
map<bool, uint32> bool_to_uint32 = 2;
map<uint64, Enum> uint64_to_enum = 3;
map<string, Nested> str_to_nested = 4;
map<string, Oneofs> str_to_oneofs = 5;
}
// Message for testing json_name option.
message JSONNames {
string s_string = 1 [json_name = "foo_bar"];
}