@@ -1002,21 +1002,24 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
1002
1002
goto next ;
1003
1003
}
1004
1004
1005
- if (!bkey_cmp (k .k -> p , bkey_start_pos ( & insert -> k ) ))
1005
+ if (!bkey_cmp (k .k -> p , start ))
1006
1006
goto next ;
1007
1007
1008
1008
while (bkey_cmp (insert -> k .p , bkey_start_pos (k .k )) > 0 ) {
1009
+ bool front_split = bkey_cmp (bkey_start_pos (k .k ), start ) < 0 ;
1010
+ bool back_split = bkey_cmp (k .k -> p , insert -> k .p ) > 0 ;
1011
+
1009
1012
/*
1010
1013
* If we're going to be splitting a compressed extent, note it
1011
1014
* so that __bch2_trans_commit() can increase our disk
1012
1015
* reservation:
1013
1016
*/
1014
- if (bkey_cmp ( bkey_start_pos ( k . k ), start ) < 0 &&
1015
- bkey_cmp ( k .k -> p , insert -> k .p ) > 0 &&
1017
+ if ((( front_split && back_split ) ||
1018
+ (( front_split || back_split ) && k .k -> p . snapshot != insert -> k .p . snapshot )) &&
1016
1019
(compressed_sectors = bch2_bkey_sectors_compressed (k )))
1017
1020
trans -> extra_journal_res += compressed_sectors ;
1018
1021
1019
- if (bkey_cmp ( bkey_start_pos ( k . k ), start ) < 0 ) {
1022
+ if (front_split ) {
1020
1023
update = bch2_trans_kmalloc (trans , bkey_bytes (k .k ));
1021
1024
if ((ret = PTR_ERR_OR_ZERO (update )))
1022
1025
goto err ;
@@ -1027,6 +1030,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
1027
1030
1028
1031
bch2_trans_iter_init (trans , & update_iter , btree_id , update -> k .p ,
1029
1032
BTREE_ITER_NOT_EXTENTS |
1033
+ BTREE_ITER_ALL_SNAPSHOTS |
1034
+ BTREE_ITER_INTENT );
1035
+ ret = bch2_btree_iter_traverse (& update_iter ) ?:
1036
+ bch2_trans_update (trans , & update_iter , update ,
1037
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE |
1038
+ flags );
1039
+ bch2_trans_iter_exit (trans , & update_iter );
1040
+
1041
+ if (ret )
1042
+ goto err ;
1043
+ }
1044
+
1045
+ if (k .k -> p .snapshot != insert -> k .p .snapshot &&
1046
+ (front_split || back_split )) {
1047
+ update = bch2_trans_kmalloc (trans , bkey_bytes (k .k ));
1048
+ if ((ret = PTR_ERR_OR_ZERO (update )))
1049
+ goto err ;
1050
+
1051
+ bkey_reassemble (update , k );
1052
+
1053
+ bch2_cut_front (start , update );
1054
+ bch2_cut_back (insert -> k .p , update );
1055
+
1056
+ bch2_trans_iter_init (trans , & update_iter , btree_id , update -> k .p ,
1057
+ BTREE_ITER_NOT_EXTENTS |
1058
+ BTREE_ITER_ALL_SNAPSHOTS |
1030
1059
BTREE_ITER_INTENT );
1031
1060
ret = bch2_btree_iter_traverse (& update_iter ) ?:
1032
1061
bch2_trans_update (trans , & update_iter , update ,
@@ -1038,23 +1067,48 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
1038
1067
}
1039
1068
1040
1069
if (bkey_cmp (k .k -> p , insert -> k .p ) <= 0 ) {
1041
- ret = bch2_btree_delete_at (trans , & iter , flags );
1070
+ update = bch2_trans_kmalloc (trans , sizeof (* update ));
1071
+ if ((ret = PTR_ERR_OR_ZERO (update )))
1072
+ goto err ;
1073
+
1074
+ bkey_init (& update -> k );
1075
+ update -> k .p = k .k -> p ;
1076
+
1077
+ if (insert -> k .p .snapshot != k .k -> p .snapshot ) {
1078
+ update -> k .p .snapshot = insert -> k .p .snapshot ;
1079
+ update -> k .type = KEY_TYPE_whiteout ;
1080
+ }
1081
+
1082
+ bch2_trans_iter_init (trans , & update_iter , btree_id , update -> k .p ,
1083
+ BTREE_ITER_NOT_EXTENTS |
1084
+ BTREE_ITER_INTENT );
1085
+ ret = bch2_btree_iter_traverse (& update_iter ) ?:
1086
+ bch2_trans_update (trans , & update_iter , update ,
1087
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE |
1088
+ flags );
1089
+ bch2_trans_iter_exit (trans , & update_iter );
1090
+
1042
1091
if (ret )
1043
1092
goto err ;
1044
1093
}
1045
1094
1046
- if (bkey_cmp ( k . k -> p , insert -> k . p ) > 0 ) {
1095
+ if (back_split ) {
1047
1096
update = bch2_trans_kmalloc (trans , bkey_bytes (k .k ));
1048
1097
if ((ret = PTR_ERR_OR_ZERO (update )))
1049
1098
goto err ;
1050
1099
1051
1100
bkey_reassemble (update , k );
1052
1101
bch2_cut_front (insert -> k .p , update );
1053
1102
1054
- ret = bch2_trans_update (trans , & iter , update , flags );
1103
+ bch2_trans_copy_iter (& update_iter , & iter );
1104
+ update_iter .pos = update -> k .p ;
1105
+ ret = bch2_trans_update (trans , & update_iter , update ,
1106
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE |
1107
+ flags );
1108
+ bch2_trans_iter_exit (trans , & update_iter );
1109
+
1055
1110
if (ret )
1056
1111
goto err ;
1057
-
1058
1112
goto out ;
1059
1113
}
1060
1114
next :
@@ -1086,6 +1140,39 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
1086
1140
return ret ;
1087
1141
}
1088
1142
1143
+ /*
1144
+ * When deleting, check if we need to emit a whiteout (because we're overwriting
1145
+ * something in an ancestor snapshot)
1146
+ */
1147
+ static int need_whiteout_for_snapshot (struct btree_trans * trans ,
1148
+ enum btree_id btree_id , struct bpos pos )
1149
+ {
1150
+ struct btree_iter iter ;
1151
+ struct bkey_s_c k ;
1152
+ u32 snapshot = pos .snapshot ;
1153
+ int ret ;
1154
+
1155
+ if (!bch2_snapshot_parent (trans -> c , pos .snapshot ))
1156
+ return 0 ;
1157
+
1158
+ pos .snapshot ++ ;
1159
+
1160
+ for_each_btree_key (trans , iter , btree_id , pos ,
1161
+ BTREE_ITER_ALL_SNAPSHOTS , k , ret ) {
1162
+ if (bkey_cmp (k .k -> p , pos ))
1163
+ break ;
1164
+
1165
+ if (bch2_snapshot_is_ancestor (trans -> c , snapshot ,
1166
+ k .k -> p .snapshot )) {
1167
+ ret = !bkey_whiteout (k .k );
1168
+ break ;
1169
+ }
1170
+ }
1171
+ bch2_trans_iter_exit (trans , & iter );
1172
+
1173
+ return ret ;
1174
+ }
1175
+
1089
1176
int bch2_trans_update (struct btree_trans * trans , struct btree_iter * iter ,
1090
1177
struct bkey_i * k , enum btree_update_flags flags )
1091
1178
{
@@ -1118,6 +1205,16 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
1118
1205
btree_insert_entry_cmp (i - 1 , i ) >= 0 );
1119
1206
#endif
1120
1207
1208
+ if (bkey_deleted (& n .k -> k ) &&
1209
+ (iter -> flags & BTREE_ITER_FILTER_SNAPSHOTS )) {
1210
+ int ret = need_whiteout_for_snapshot (trans , n .btree_id , n .k -> k .p );
1211
+ if (unlikely (ret < 0 ))
1212
+ return ret ;
1213
+
1214
+ if (ret )
1215
+ n .k -> k .type = KEY_TYPE_whiteout ;
1216
+ }
1217
+
1121
1218
/*
1122
1219
* Pending updates are kept sorted: first, find position of new update,
1123
1220
* then delete/trim any updates the new update overwrites:
0 commit comments