Skip to content

Commit 53fe9b7

Browse files
change: Support protobuf4 (#3985)
* Support protobuf4 * formatting... * Ignore pylint * Add .proto file for ref * Add pyi file * more formatting.. * Raise lower bound of protobuf * Support protobuf 3 AND 4 * Update lower bound to ensure compatibility. Loosen PyYAML
1 parent c0aee6e commit 53fe9b7

File tree

3 files changed

+215
-730
lines changed

3 files changed

+215
-730
lines changed

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@ def read_requirements(filename):
5252
"cloudpickle==2.2.1",
5353
"google-pasta",
5454
"numpy>=1.9.0,<2.0",
55-
"protobuf>=3.1,<4.0",
55+
"protobuf>=3.12,<5.0",
5656
"smdebug_rulesconfig==1.0.1",
57-
"importlib-metadata>=1.4.0,<5.0",
57+
"importlib-metadata>=1.4.0,<7.0",
5858
"packaging>=20.0",
5959
"pandas",
6060
"pathos",
6161
"schema",
62-
"PyYAML==6.0",
62+
"PyYAML~=6.0",
6363
"jsonschema",
6464
"platformdirs",
6565
"tblib==1.7.0",

src/sagemaker/amazon/record.proto

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
syntax = "proto2";
2+
3+
package aialgs.data;
4+
5+
option java_package = "com.amazonaws.aialgorithms.proto";
6+
option java_outer_classname = "RecordProtos";
7+
8+
// A sparse or dense rank-R tensor that stores data as doubles (float64).
9+
message Float32Tensor {
10+
// Each value in the vector. If keys is empty this is treated as a
11+
// dense vector.
12+
repeated float values = 1 [packed = true];
13+
14+
// If not empty then the vector is treated as sparse with
15+
// each key specifying the location of the value in the sparse vector.
16+
repeated uint64 keys = 2 [packed = true];
17+
18+
// Optional shape which will allow the vector to represent a matrix.
19+
// e.g. if shape = [ 10, 20 ] then floor(keys[i] / 10) will give the row
20+
// and keys[i] % 20 will give the column.
21+
// This also supports n-dimensonal tensors.
22+
// NB. this must be specified if the tensor is sparse.
23+
repeated uint64 shape = 3 [packed = true];
24+
}
25+
26+
// A sparse or dense rank-R tensor that stores data as doubles (float64).
27+
message Float64Tensor {
28+
// Each value in the vector. If keys is empty this is treated as a
29+
// dense vector.
30+
repeated double values = 1 [packed = true];
31+
32+
// If not empty then the vector is treated as sparse with
33+
// each key specifying the location of the value in the sparse vector.
34+
repeated uint64 keys = 2 [packed = true];
35+
36+
// Optional shape which will allow the vector to represent a matrix.
37+
// e.g. if shape = [ 10, 20 ] then floor(keys[i] / 10) will give the row
38+
// and keys[i] % 20 will give the column.
39+
// This also supports n-dimensonal tensors.
40+
// NB. this must be specified if the tensor is sparse.
41+
repeated uint64 shape = 3 [packed = true];
42+
}
43+
44+
// A sparse or dense rank-R tensor that stores data as 32-bit ints (int32).
45+
message Int32Tensor {
46+
// Each value in the vector. If keys is empty this is treated as a
47+
// dense vector.
48+
repeated int32 values = 1 [packed = true];
49+
50+
// If not empty then the vector is treated as sparse with
51+
// each key specifying the location of the value in the sparse vector.
52+
repeated uint64 keys = 2 [packed = true];
53+
54+
// Optional shape which will allow the vector to represent a matrix.
55+
// e.g. if shape = [ 10, 20 ] then floor(keys[i] / 10) will give the row
56+
// and keys[i] % 20 will give the column.
57+
// This also supports n-dimensonal tensors.
58+
// NB. this must be specified if the tensor is sparse.
59+
repeated uint64 shape = 3 [packed = true];
60+
}
61+
62+
// Support for storing binary data for parsing in other ways (such as JPEG/etc).
63+
// This is an example of another type of value and may not immediately be supported.
64+
message Bytes {
65+
repeated bytes value = 1;
66+
67+
// Stores the content type of the data if known.
68+
// This will allow the possibility of using decoders for common formats
69+
// in the future.
70+
optional string content_type = 2;
71+
}
72+
73+
message Value {
74+
oneof value {
75+
// The numbering assumes the possible use of:
76+
// - float16, float128
77+
// - int8, int16, int32
78+
Float32Tensor float32_tensor = 2;
79+
Float64Tensor float64_tensor = 3;
80+
Int32Tensor int32_tensor = 7;
81+
Bytes bytes = 9;
82+
}
83+
}
84+
85+
message Record {
86+
// Map from the name of the feature to the value.
87+
//
88+
// For vectors and libsvm-like datasets,
89+
// a single feature with the name `values`
90+
// should be specified.
91+
map<string, Value> features = 1;
92+
93+
// Optional set of labels for this record.
94+
// Similar to features field above, the key used for
95+
// generic scalar / vector labels should ve 'values'
96+
map<string, Value> label = 2;
97+
98+
// Unique identifier for this record in the dataset.
99+
//
100+
// Whilst not necessary, this allows better
101+
// debugging where there are data issues.
102+
//
103+
// This is not used by the algorithm directly.
104+
optional string uid = 3;
105+
106+
// Textual metadata describing the record.
107+
//
108+
// This may include JSON-serialized information
109+
// about the source of the record.
110+
//
111+
// This is not used by the algorithm directly.
112+
optional string metadata = 4;
113+
114+
// Optional serialized JSON object that allows per-record
115+
// hyper-parameters/configuration/other information to be set.
116+
//
117+
// The meaning/interpretation of this field is defined by
118+
// the algorithm author and may not be supported.
119+
//
120+
// This is used to pass additional inference configuration
121+
// when batch inference is used (e.g. types of scores to return).
122+
optional string configuration = 5;
123+
}

0 commit comments

Comments
 (0)