@@ -401,6 +401,7 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
401
401
402
402
npy_intp dims [] = {tm -> len };
403
403
404
+ // initialize all to False
404
405
final_src_match = PyArray_ZEROS (1 , dims , NPY_BOOL , 0 );
405
406
if (final_src_match == NULL ) {
406
407
goto error ;
@@ -772,7 +773,7 @@ AK_TM_transfer_object(TriMapObject* tm,
772
773
// NOTE: could use PyArray_Scalar instead of PyArray_GETITEM if we wanted to store scalars instead of Python objects; however, that is pretty uncommon for object arrays to store PyArray_Scalars
773
774
bool f_is_obj = PyArray_TYPE (array_from ) == NPY_OBJECT ;
774
775
775
- // the passed in object array is assumed to be contiguous and have NULL (not None) in each position
776
+ // the passed in object array is contiguous and have NULL (not None) in each position
776
777
PyObject * * array_to_data = (PyObject * * )PyArray_DATA (array_to );
777
778
PyObject * pyo ;
778
779
void * f ;
@@ -811,7 +812,7 @@ AK_TM_transfer_object(TriMapObject* tm,
811
812
Py_INCREF (pyo ); // one more than we need
812
813
* t ++ = pyo ;
813
814
}
814
- Py_DECREF (pyo ); // remove the extra one
815
+ Py_DECREF (pyo ); // remove the extra ref
815
816
}
816
817
else { // from_dst, dst is an array
817
818
dst_pos = 0 ;
@@ -834,6 +835,92 @@ AK_TM_transfer_object(TriMapObject* tm,
834
835
return 0 ;
835
836
}
836
837
838
+ // Returns -1 on error. Specialized transfer from any type of an array to an object array. For usage with merge, Will only transfer if the destination is not NULL.
839
+ static inline int
840
+ AK_TM_transfer_object_if_null (TriMapObject * tm ,
841
+ bool from_src ,
842
+ PyArrayObject * array_from ,
843
+ PyArrayObject * array_to
844
+ ) {
845
+ Py_ssize_t one_count = from_src ? tm -> src_one_count : tm -> dst_one_count ;
846
+ TriMapOne * one_pairs = from_src ? tm -> src_one : tm -> dst_one ;
847
+
848
+ // NOTE: could use PyArray_Scalar instead of PyArray_GETITEM if we wanted to store scalars instead of Python objects; however, that is pretty uncommon for object arrays to store PyArray_Scalars
849
+ bool f_is_obj = PyArray_TYPE (array_from ) == NPY_OBJECT ;
850
+
851
+ // the passed in object array is contiguous and have NULL (not None) in each position
852
+ PyObject * * array_to_data = (PyObject * * )PyArray_DATA (array_to );
853
+ PyObject * pyo ;
854
+ void * f ;
855
+ TriMapOne * o = one_pairs ;
856
+ TriMapOne * o_end = o + one_count ;
857
+ for (; o < o_end ; o ++ ) {
858
+ if (array_to_data [o -> to ] == NULL ) {
859
+ f = PyArray_GETPTR1 (array_from , o -> from );
860
+ if (f_is_obj ) {
861
+ pyo = * (PyObject * * )f ;
862
+ Py_INCREF (pyo );
863
+ }
864
+ else { // will convert any value to an object
865
+ pyo = PyArray_GETITEM (array_from , f );
866
+ }
867
+ array_to_data [o -> to ] = pyo ;
868
+ }
869
+ }
870
+ PyObject * * t ;
871
+ PyObject * * t_end ;
872
+ npy_intp dst_pos ;
873
+ npy_int64 f_pos ;
874
+ PyArrayObject * dst ;
875
+ for (Py_ssize_t i = 0 ; i < tm -> many_count ; i ++ ) {
876
+ t = array_to_data + tm -> many_to [i ].start ;
877
+ t_end = array_to_data + tm -> many_to [i ].stop ;
878
+
879
+ if (from_src ) {
880
+ while (t < t_end ) {
881
+ if (* t == NULL ) {
882
+ f = PyArray_GETPTR1 (array_from , tm -> many_from [i ].src );
883
+ if (f_is_obj ) {
884
+ pyo = * (PyObject * * )f ;
885
+ Py_INCREF (pyo );
886
+ }
887
+ else {
888
+ pyo = PyArray_GETITEM (array_from , f ); // given a new ref
889
+ }
890
+ * t ++ = pyo ;
891
+ }
892
+ else {
893
+ t ++ ;
894
+ }
895
+ }
896
+ }
897
+ else { // from_dst, dst is an array
898
+ dst_pos = 0 ;
899
+ dst = tm -> many_from [i ].dst ;
900
+ while (t < t_end ) {
901
+ if (* t == NULL ) {
902
+ f_pos = * (npy_int64 * )PyArray_GETPTR1 (dst , dst_pos );
903
+ f = PyArray_GETPTR1 (array_from , f_pos );
904
+ if (f_is_obj ) {
905
+ pyo = * (PyObject * * )f ;
906
+ Py_INCREF (pyo );
907
+ }
908
+ else {
909
+ pyo = PyArray_GETITEM (array_from , f );
910
+ }
911
+ * t ++ = pyo ;
912
+ dst_pos ++ ;
913
+ }
914
+ else {
915
+ t ++ ;
916
+ dst_pos ++ ;
917
+ }
918
+ }
919
+ }
920
+ }
921
+ return 0 ;
922
+ }
923
+
837
924
// Returns -1 on error.
838
925
static inline int
839
926
AK_TM_fill_object (TriMapObject * tm ,
@@ -855,7 +942,7 @@ AK_TM_fill_object(TriMapObject* tm,
855
942
return 0 ;
856
943
}
857
944
858
- #define AK_TM_TRANSFER_FLEXIBLE (c_type ) do { \
945
+ #define AK_TM_TRANSFER_FLEXIBLE (c_type , from_src , array_from , array_to ) do { \
859
946
Py_ssize_t one_count = from_src ? tm->src_one_count : tm->dst_one_count;\
860
947
TriMapOne* one_pairs = from_src ? tm->src_one : tm->dst_one; \
861
948
npy_intp t_element_size = PyArray_ITEMSIZE(array_to); \
@@ -1003,10 +1090,10 @@ AK_TM_map_no_fill(TriMapObject* tm,
1003
1090
}
1004
1091
}
1005
1092
else if (dtype_is_unicode ) {
1006
- AK_TM_TRANSFER_FLEXIBLE (Py_UCS4 );
1093
+ AK_TM_TRANSFER_FLEXIBLE (Py_UCS4 , from_src , array_from , array_to );
1007
1094
}
1008
1095
else if (dtype_is_string ) {
1009
- AK_TM_TRANSFER_FLEXIBLE (char );
1096
+ AK_TM_TRANSFER_FLEXIBLE (char , from_src , array_from , array_to );
1010
1097
}
1011
1098
else {
1012
1099
if (AK_TM_transfer_scalar (tm , from_src , array_from , array_to )) {
@@ -1048,6 +1135,102 @@ TriMap_map_dst_no_fill(TriMapObject *self, PyObject *arg) {
1048
1135
return AK_TM_map_no_fill (self , from_src , array_from );
1049
1136
}
1050
1137
1138
+ static inline PyObject *
1139
+ TriMap_map_merge (TriMapObject * tm , PyObject * args )
1140
+ {
1141
+ // both are "from_" arrays
1142
+ PyArrayObject * array_src ;
1143
+ PyArrayObject * array_dst ;
1144
+
1145
+ if (!PyArg_ParseTuple (args ,
1146
+ "O!O!:map_merge" ,
1147
+ & PyArray_Type , & array_src ,
1148
+ & PyArray_Type , & array_dst
1149
+ )) {
1150
+ return NULL ;
1151
+ }
1152
+ if (!tm -> finalized ) {
1153
+ PyErr_SetString (PyExc_RuntimeError , "Finalization is required" );
1154
+ return NULL ;
1155
+ }
1156
+ if (!(PyArray_NDIM (array_src ) == 1 )) {
1157
+ PyErr_SetString (PyExc_TypeError , "Array src must be 1D" );
1158
+ return NULL ;
1159
+ }
1160
+ if (!(PyArray_NDIM (array_dst ) == 1 )) {
1161
+ PyErr_SetString (PyExc_TypeError , "Array dst must be 1D" );
1162
+ return NULL ;
1163
+ }
1164
+ // passing a borrowed refs; returns a new ref
1165
+ PyArray_Descr * dtype = AK_resolve_dtype (
1166
+ PyArray_DESCR (array_src ),
1167
+ PyArray_DESCR (array_dst ));
1168
+ bool dtype_is_obj = dtype -> type_num == NPY_OBJECT ;
1169
+ bool dtype_is_unicode = dtype -> type_num == NPY_UNICODE ;
1170
+ bool dtype_is_string = dtype -> type_num == NPY_STRING ;
1171
+
1172
+ npy_intp dims [] = {tm -> len };
1173
+
1174
+ // create to array_to
1175
+ PyArrayObject * array_to ;
1176
+ if (dtype_is_obj ) {
1177
+ Py_DECREF (dtype ); // not needed
1178
+ // will initialize to NULL, not None
1179
+ array_to = (PyArrayObject * )PyArray_SimpleNew (1 , dims , NPY_OBJECT );
1180
+ }
1181
+ else if (dtype_is_unicode || dtype_is_string ) {
1182
+ array_to = (PyArrayObject * )PyArray_Zeros (1 , dims , dtype , 0 ); // steals dtype ref
1183
+ }
1184
+ else {
1185
+ array_to = (PyArrayObject * )PyArray_Empty (1 , dims , dtype , 0 ); // steals dtype ref
1186
+ }
1187
+ if (array_to == NULL ) {
1188
+ PyErr_SetNone (PyExc_MemoryError );
1189
+ return NULL ;
1190
+ }
1191
+
1192
+ // if we have fill values in src, we need to transfer from dst
1193
+ bool transfer_from_dst = PyArray_SIZE ((PyArrayObject * )tm -> final_src_fill ) != 0 ;
1194
+
1195
+ if (dtype_is_obj ) {
1196
+ if (AK_TM_transfer_object (tm , true, array_src , array_to )) {
1197
+ Py_DECREF ((PyObject * )array_to );
1198
+ return NULL ;
1199
+ }
1200
+ if (transfer_from_dst ) {
1201
+ if (AK_TM_transfer_object_if_null (tm , false, array_dst , array_to )) {
1202
+ Py_DECREF ((PyObject * )array_to );
1203
+ return NULL ;
1204
+ }
1205
+ }
1206
+ }
1207
+ else if (dtype_is_unicode ) {
1208
+ AK_TM_TRANSFER_FLEXIBLE (Py_UCS4 , true, array_src , array_to );
1209
+ if (transfer_from_dst ) {
1210
+ AK_TM_TRANSFER_FLEXIBLE (Py_UCS4 , false, array_dst , array_to );
1211
+ }
1212
+ }
1213
+ else if (dtype_is_string ) {
1214
+ AK_TM_TRANSFER_FLEXIBLE (char , true, array_src , array_to );
1215
+ if (transfer_from_dst ) {
1216
+ AK_TM_TRANSFER_FLEXIBLE (char , false, array_dst , array_to );
1217
+ }
1218
+ }
1219
+ else {
1220
+ if (AK_TM_transfer_scalar (tm , true, array_src , array_to )) {
1221
+ Py_DECREF ((PyObject * )array_to );
1222
+ return NULL ;
1223
+ }
1224
+ if (transfer_from_dst ) {
1225
+ if (AK_TM_transfer_scalar (tm , false, array_dst , array_to )) {
1226
+ Py_DECREF ((PyObject * )array_to );
1227
+ return NULL ;
1228
+ }
1229
+ }
1230
+ }
1231
+ return (PyObject * )array_to ;
1232
+ }
1233
+
1051
1234
// Returns NULL on error.
1052
1235
static inline PyObject *
1053
1236
AK_TM_map_fill (TriMapObject * tm ,
@@ -1108,19 +1291,19 @@ AK_TM_map_fill(TriMapObject* tm,
1108
1291
}
1109
1292
}
1110
1293
else if (dtype_is_unicode ) {
1111
- AK_TM_TRANSFER_FLEXIBLE (Py_UCS4 );
1294
+ AK_TM_TRANSFER_FLEXIBLE (Py_UCS4 , from_src , array_from , array_to );
1112
1295
if (AK_TM_fill_unicode (tm , from_src , array_to , fill_value )) {
1113
1296
goto error ;
1114
1297
}
1115
1298
}
1116
1299
else if (dtype_is_string ) {
1117
- AK_TM_TRANSFER_FLEXIBLE (char );
1300
+ AK_TM_TRANSFER_FLEXIBLE (char , from_src , array_from , array_to );
1118
1301
if (AK_TM_fill_string (tm , from_src , array_to , fill_value )) {
1119
1302
goto error ;
1120
1303
}
1121
1304
}
1122
1305
else {
1123
- // Most simple is to fill with scalar, then overwrite values as needed; for object and flexible dtypes this is not efficient; for object dtypes, this obbligates us to decref the filled value when assigning
1306
+ // Most simple is to fill with scalar, then overwrite values as needed; for object and flexible dtypes this is not efficient; for object dtypes, this obligates us to decref the filled value when assigning
1124
1307
if (PyArray_FillWithScalar (array_to , fill_value )) { // -1 on error
1125
1308
goto error ;
1126
1309
}
@@ -1180,6 +1363,8 @@ TriMap_map_dst_fill(TriMapObject *self, PyObject *args) {
1180
1363
return AK_TM_map_fill (self , from_src , array_from , fill_value , fill_value_dtype );
1181
1364
}
1182
1365
1366
+
1367
+
1183
1368
static PyMethodDef TriMap_methods [] = {
1184
1369
{"register_one" , (PyCFunction )TriMap_register_one , METH_VARARGS , NULL },
1185
1370
{"register_unmatched_dst" , (PyCFunction )TriMap_register_unmatched_dst , METH_NOARGS , NULL },
@@ -1192,6 +1377,7 @@ static PyMethodDef TriMap_methods[] = {
1192
1377
{"map_dst_no_fill" , (PyCFunction )TriMap_map_dst_no_fill , METH_O , NULL },
1193
1378
{"map_src_fill" , (PyCFunction )TriMap_map_src_fill , METH_VARARGS , NULL },
1194
1379
{"map_dst_fill" , (PyCFunction )TriMap_map_dst_fill , METH_VARARGS , NULL },
1380
+ {"map_merge" , (PyCFunction )TriMap_map_merge , METH_VARARGS , NULL },
1195
1381
{NULL },
1196
1382
};
1197
1383
0 commit comments