75
75
76
76
from codecs import (utf_8_decode as _utf_8_decode ,
77
77
utf_8_encode as _utf_8_encode )
78
+ from collections import abc as _abc
78
79
79
80
from bson .binary import (Binary , UuidRepresentation , ALL_UUID_SUBTYPES ,
80
81
OLD_UUID_SUBTYPE ,
92
93
from bson .max_key import MaxKey
93
94
from bson .min_key import MinKey
94
95
from bson .objectid import ObjectId
95
- from bson .py3compat import (abc ,
96
- b ,
97
- PY3 ,
98
- iteritems ,
99
- text_type ,
100
- string_type ,
101
- reraise )
102
96
from bson .regex import Regex
103
97
from bson .son import SON , RE_TYPE
104
98
from bson .timestamp import Timestamp
147
141
_UNPACK_TIMESTAMP_FROM = struct .Struct ("<II" ).unpack_from
148
142
149
143
150
- if PY3 :
151
- _OBJEND = 0
152
- # Only used to generate the _ELEMENT_GETTER dict
153
- def _maybe_ord (element_type ):
154
- return ord (element_type )
155
- # Only used in _raise_unkown_type below
156
- def _elt_to_hex (element_type ):
157
- return chr (element_type ).encode ()
158
- _supported_buffer_types = (bytes , bytearray )
159
- else :
160
- _OBJEND = b"\x00 "
161
- def _maybe_ord (element_type ):
162
- return element_type
163
- def _elt_to_hex (element_type ):
164
- return element_type
165
- _supported_buffer_types = (bytes ,)
166
-
167
-
168
-
169
- if platform .python_implementation () == 'Jython' :
170
- # This is why we can't have nice things.
171
- # https://bugs.jython.org/issue2788
172
- def get_data_and_view (data ):
173
- if isinstance (data , _supported_buffer_types ):
174
- return data , data
175
- data = memoryview (data ).tobytes ()
176
- return data , data
177
- else :
178
- def get_data_and_view (data ):
179
- if isinstance (data , _supported_buffer_types ):
180
- return data , memoryview (data )
181
- view = memoryview (data )
182
- return view .tobytes (), view
144
+ def get_data_and_view (data ):
145
+ if isinstance (data , (bytes , bytearray )):
146
+ return data , memoryview (data )
147
+ view = memoryview (data )
148
+ return view .tobytes (), view
183
149
184
150
185
151
def _raise_unknown_type (element_type , element_name ):
186
152
"""Unknown type helper."""
187
153
raise InvalidBSON ("Detected unknown BSON type %r for fieldname '%s'. Are "
188
154
"you using the latest driver version?" % (
189
- _elt_to_hex (element_type ), element_name ))
155
+ chr (element_type ). encode ( ), element_name ))
190
156
191
157
192
158
def _get_int (data , view , position , dummy0 , dummy1 , dummy2 ):
@@ -213,7 +179,7 @@ def _get_string(data, view, position, obj_end, opts, dummy):
213
179
if length < 1 or obj_end - position < length :
214
180
raise InvalidBSON ("invalid string length" )
215
181
end = position + length - 1
216
- if data [end ] != _OBJEND :
182
+ if data [end ] != 0 :
217
183
raise InvalidBSON ("invalid end of string" )
218
184
return _utf_8_decode (view [position :end ],
219
185
opts .unicode_decode_error_handler , True )[0 ], end + 1
@@ -226,7 +192,7 @@ def _get_object_size(data, position, obj_end):
226
192
except struct .error as exc :
227
193
raise InvalidBSON (str (exc ))
228
194
end = position + obj_size - 1
229
- if data [end ] != _OBJEND :
195
+ if data [end ] != 0 :
230
196
raise InvalidBSON ("bad eoo" )
231
197
if end >= obj_end :
232
198
raise InvalidBSON ("invalid object length" )
@@ -256,7 +222,7 @@ def _get_array(data, view, position, obj_end, opts, element_name):
256
222
"""Decode a BSON array to python list."""
257
223
size = _UNPACK_INT_FROM (data , position )[0 ]
258
224
end = position + size - 1
259
- if data [end ] != _OBJEND :
225
+ if data [end ] != 0 :
260
226
raise InvalidBSON ("bad eoo" )
261
227
262
228
position += 4
@@ -321,8 +287,8 @@ def _get_binary(data, view, position, obj_end, opts, dummy1):
321
287
uuid_representation = UuidRepresentation .PYTHON_LEGACY
322
288
return binary_value .as_uuid (uuid_representation ), end
323
289
324
- # Python3 special case. Decode subtype 0 to 'bytes'.
325
- if PY3 and subtype == 0 :
290
+ # Decode subtype 0 to 'bytes'.
291
+ if subtype == 0 :
326
292
value = data [position :end ]
327
293
else :
328
294
value = Binary (data [position :end ], subtype )
@@ -410,27 +376,27 @@ def _get_decimal128(data, view, position, dummy0, dummy1, dummy2):
410
376
# - obj_end: int, end of object to decode in 'data' if variable-length type
411
377
# - opts: a CodecOptions
412
378
_ELEMENT_GETTER = {
413
- _maybe_ord (BSONNUM ): _get_float ,
414
- _maybe_ord (BSONSTR ): _get_string ,
415
- _maybe_ord (BSONOBJ ): _get_object ,
416
- _maybe_ord (BSONARR ): _get_array ,
417
- _maybe_ord (BSONBIN ): _get_binary ,
418
- _maybe_ord (BSONUND ): lambda u , v , w , x , y , z : (None , w ), # Deprecated undefined
419
- _maybe_ord (BSONOID ): _get_oid ,
420
- _maybe_ord (BSONBOO ): _get_boolean ,
421
- _maybe_ord (BSONDAT ): _get_date ,
422
- _maybe_ord (BSONNUL ): lambda u , v , w , x , y , z : (None , w ),
423
- _maybe_ord (BSONRGX ): _get_regex ,
424
- _maybe_ord (BSONREF ): _get_ref , # Deprecated DBPointer
425
- _maybe_ord (BSONCOD ): _get_code ,
426
- _maybe_ord (BSONSYM ): _get_string , # Deprecated symbol
427
- _maybe_ord (BSONCWS ): _get_code_w_scope ,
428
- _maybe_ord (BSONINT ): _get_int ,
429
- _maybe_ord (BSONTIM ): _get_timestamp ,
430
- _maybe_ord (BSONLON ): _get_int64 ,
431
- _maybe_ord (BSONDEC ): _get_decimal128 ,
432
- _maybe_ord (BSONMIN ): lambda u , v , w , x , y , z : (MinKey (), w ),
433
- _maybe_ord (BSONMAX ): lambda u , v , w , x , y , z : (MaxKey (), w )}
379
+ ord (BSONNUM ): _get_float ,
380
+ ord (BSONSTR ): _get_string ,
381
+ ord (BSONOBJ ): _get_object ,
382
+ ord (BSONARR ): _get_array ,
383
+ ord (BSONBIN ): _get_binary ,
384
+ ord (BSONUND ): lambda u , v , w , x , y , z : (None , w ), # Deprecated undefined
385
+ ord (BSONOID ): _get_oid ,
386
+ ord (BSONBOO ): _get_boolean ,
387
+ ord (BSONDAT ): _get_date ,
388
+ ord (BSONNUL ): lambda u , v , w , x , y , z : (None , w ),
389
+ ord (BSONRGX ): _get_regex ,
390
+ ord (BSONREF ): _get_ref , # Deprecated DBPointer
391
+ ord (BSONCOD ): _get_code ,
392
+ ord (BSONSYM ): _get_string , # Deprecated symbol
393
+ ord (BSONCWS ): _get_code_w_scope ,
394
+ ord (BSONINT ): _get_int ,
395
+ ord (BSONTIM ): _get_timestamp ,
396
+ ord (BSONLON ): _get_int64 ,
397
+ ord (BSONDEC ): _get_decimal128 ,
398
+ ord (BSONMIN ): lambda u , v , w , x , y , z : (MinKey (), w ),
399
+ ord (BSONMAX ): lambda u , v , w , x , y , z : (MaxKey (), w )}
434
400
435
401
436
402
if _USE_C :
@@ -488,7 +454,7 @@ def _bson_to_dict(data, opts):
488
454
except Exception :
489
455
# Change exception type to InvalidBSON but preserve traceback.
490
456
_ , exc_value , exc_tb = sys .exc_info ()
491
- reraise ( InvalidBSON , exc_value , exc_tb )
457
+ raise InvalidBSON ( str ( exc_value )). with_traceback ( exc_tb )
492
458
if _USE_C :
493
459
_bson_to_dict = _cbson ._bson_to_dict
494
460
@@ -498,7 +464,7 @@ def _bson_to_dict(data, opts):
498
464
_PACK_LENGTH_SUBTYPE = struct .Struct ("<iB" ).pack
499
465
_PACK_LONG = struct .Struct ("<q" ).pack
500
466
_PACK_TIMESTAMP = struct .Struct ("<II" ).pack
501
- _LIST_NAMES = tuple (b (str (i )) + b "\x00 " for i in range (1000 ))
467
+ _LIST_NAMES = tuple ((str (i ) + "\x00 " ). encode ( 'utf8' ) for i in range (1000 ))
502
468
503
469
504
470
def gen_list_name ():
@@ -513,7 +479,7 @@ def gen_list_name():
513
479
514
480
counter = itertools .count (1000 )
515
481
while True :
516
- yield b (str (next (counter ))) + b "\x00 "
482
+ yield (str (next (counter )) + "\x00 " ). encode ( 'utf8' )
517
483
518
484
519
485
def _make_c_string_check (string ):
@@ -548,46 +514,32 @@ def _make_c_string(string):
548
514
return _utf_8_encode (string )[0 ] + b"\x00 "
549
515
550
516
551
- if PY3 :
552
- def _make_name (string ):
553
- """Make a 'C' string suitable for a BSON key."""
554
- # Keys can only be text in python 3.
555
- if "\x00 " in string :
556
- raise InvalidDocument ("BSON keys / regex patterns must not "
557
- "contain a NUL character" )
558
- return _utf_8_encode (string )[0 ] + b"\x00 "
559
- else :
560
- # Keys can be unicode or bytes in python 2.
561
- _make_name = _make_c_string_check
517
+ def _make_name (string ):
518
+ """Make a 'C' string suitable for a BSON key."""
519
+ # Keys can only be text in python 3.
520
+ if "\x00 " in string :
521
+ raise InvalidDocument ("BSON keys / regex patterns must not "
522
+ "contain a NUL character" )
523
+ return _utf_8_encode (string )[0 ] + b"\x00 "
562
524
563
525
564
526
def _encode_float (name , value , dummy0 , dummy1 ):
565
527
"""Encode a float."""
566
528
return b"\x01 " + name + _PACK_FLOAT (value )
567
529
568
530
569
- if PY3 :
570
- def _encode_bytes (name , value , dummy0 , dummy1 ):
571
- """Encode a python bytes."""
572
- # Python3 special case. Store 'bytes' as BSON binary subtype 0.
573
- return b"\x05 " + name + _PACK_INT (len (value )) + b"\x00 " + value
574
- else :
575
- def _encode_bytes (name , value , dummy0 , dummy1 ):
576
- """Encode a python str (python 2.x)."""
577
- try :
578
- _utf_8_decode (value , None , True )
579
- except UnicodeError :
580
- raise InvalidStringData ("strings in documents must be valid "
581
- "UTF-8: %r" % (value ,))
582
- return b"\x02 " + name + _PACK_INT (len (value ) + 1 ) + value + b"\x00 "
531
+ def _encode_bytes (name , value , dummy0 , dummy1 ):
532
+ """Encode a python bytes."""
533
+ # Python3 special case. Store 'bytes' as BSON binary subtype 0.
534
+ return b"\x05 " + name + _PACK_INT (len (value )) + b"\x00 " + value
583
535
584
536
585
537
def _encode_mapping (name , value , check_keys , opts ):
586
538
"""Encode a mapping type."""
587
539
if _raw_document_class (value ):
588
540
return b'\x03 ' + name + value .raw
589
541
data = b"" .join ([_element_to_bson (key , val , check_keys , opts )
590
- for key , val in iteritems ( value )])
542
+ for key , val in value . items ( )])
591
543
return b"\x03 " + name + _PACK_INT (len (data ) + 5 ) + data + b"\x00 "
592
544
593
545
@@ -603,7 +555,7 @@ def _encode_dbref(name, value, check_keys, opts):
603
555
if value .database is not None :
604
556
buf += _name_value_to_bson (
605
557
b"$db\x00 " , value .database , check_keys , opts )
606
- for key , val in iteritems ( value ._DBRef__kwargs ):
558
+ for key , val in value ._DBRef__kwargs . items ( ):
607
559
buf += _element_to_bson (key , val , check_keys , opts )
608
560
609
561
buf += b"\x00 "
@@ -751,8 +703,7 @@ def _encode_maxkey(name, dummy0, dummy1, dummy2):
751
703
float : _encode_float ,
752
704
int : _encode_int ,
753
705
list : _encode_list ,
754
- # unicode in py2, str in py3
755
- text_type : _encode_text ,
706
+ str : _encode_text ,
756
707
tuple : _encode_list ,
757
708
type (None ): _encode_none ,
758
709
uuid .UUID : _encode_uuid ,
@@ -770,7 +721,7 @@ def _encode_maxkey(name, dummy0, dummy1, dummy2):
770
721
UUIDLegacy : _encode_binary ,
771
722
Decimal128 : _encode_decimal128 ,
772
723
# Special case. This will never be looked up directly.
773
- abc .Mapping : _encode_mapping ,
724
+ _abc .Mapping : _encode_mapping ,
774
725
}
775
726
776
727
@@ -786,9 +737,6 @@ def _encode_maxkey(name, dummy0, dummy1, dummy2):
786
737
255 : _encode_minkey ,
787
738
}
788
739
789
- if not PY3 :
790
- _ENCODERS [long ] = _encode_long
791
-
792
740
793
741
_BUILT_IN_TYPES = tuple (t for t in _ENCODERS )
794
742
@@ -848,7 +796,7 @@ def _name_value_to_bson(name, value, check_keys, opts,
848
796
849
797
def _element_to_bson (key , value , check_keys , opts ):
850
798
"""Encode a single key, value pair."""
851
- if not isinstance (key , string_type ):
799
+ if not isinstance (key , str ):
852
800
raise InvalidDocument ("documents must have only string keys, "
853
801
"key was %r" % (key ,))
854
802
if check_keys :
@@ -870,7 +818,7 @@ def _dict_to_bson(doc, check_keys, opts, top_level=True):
870
818
if top_level and "_id" in doc :
871
819
elements .append (_name_value_to_bson (b"_id\x00 " , doc ["_id" ],
872
820
check_keys , opts ))
873
- for ( key , value ) in iteritems ( doc ):
821
+ for key , value in doc . items ( ):
874
822
if not top_level or key != "_id" :
875
823
elements .append (_element_to_bson (key , value ,
876
824
check_keys , opts ))
@@ -1017,7 +965,7 @@ def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS):
1017
965
if data_len - position < obj_size :
1018
966
raise InvalidBSON ("invalid object size" )
1019
967
obj_end = position + obj_size - 1
1020
- if data [obj_end ] != _OBJEND :
968
+ if data [obj_end ] != 0 :
1021
969
raise InvalidBSON ("bad eoo" )
1022
970
if use_raw :
1023
971
docs .append (
@@ -1036,7 +984,7 @@ def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS):
1036
984
except Exception :
1037
985
# Change exception type to InvalidBSON but preserve traceback.
1038
986
_ , exc_value , exc_tb = sys .exc_info ()
1039
- reraise ( InvalidBSON , exc_value , exc_tb )
987
+ raise InvalidBSON ( str ( exc_value )). with_traceback ( exc_tb )
1040
988
1041
989
1042
990
if _USE_C :
@@ -1051,7 +999,7 @@ def _decode_selective(rawdoc, fields, codec_options):
1051
999
else :
1052
1000
# Else, use the specified document_class.
1053
1001
doc = codec_options .document_class ()
1054
- for key , value in iteritems ( rawdoc ):
1002
+ for key , value in rawdoc . items ( ):
1055
1003
if key in fields :
1056
1004
if fields [key ] == 1 :
1057
1005
doc [key ] = _bson_to_dict (rawdoc .raw , codec_options )[key ]
0 commit comments