diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index 575a4fb..fc17d03 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -2182,7 +2182,6 @@ btree_remove(hammer_cursor_t cursor) } parent = cursor->parent; - hammer_cursor_removed_node(node, parent, cursor->parent_index); /* * Attempt to remove the parent's reference to the child. If the @@ -2202,12 +2201,33 @@ btree_remove(hammer_cursor_t cursor) * node exclusively locked and referenced, leaves the * original parent locked (as the new node), and locks the * new parent. It can return EDEADLK. + * + * We cannot call hammer_cursor_removed_node() until we are + * actually able to remove the node. If we did then tracked + * cursors in the middle of iterations could be repointed + * to a parent node. If this occurs they could end up + * scanning newly inserted records into the node (that could + * not be deleted) when they push down again. + * + * Due to the way the recursion works the final parent is left + * in cursor->parent after the recursion returns. Each + * layer on the way back up is thus able to call + * hammer_cursor_removed_node() and 'jump' the node up to + * the (same) final parent. + * + * NOTE! The local variable 'parent' is invalid after we + * call hammer_cursor_up_locked(). */ error = hammer_cursor_up_locked(cursor); + parent = NULL; + if (error == 0) { hammer_cursor_deleted_element(cursor->node, 0); error = btree_remove(cursor); if (error == 0) { + hammer_cursor_removed_node( + node, cursor->parent, + cursor->parent_index); hammer_modify_node_all(cursor->trans, node); ondisk = node->ondisk; ondisk->type = HAMMER_BTREE_TYPE_DELETED; @@ -2277,6 +2297,7 @@ btree_remove(hammer_cursor_t cursor) (ondisk->count - cursor->parent_index) * esize); --ondisk->count; hammer_modify_node_done(parent); + hammer_cursor_removed_node(node, parent, cursor->parent_index); hammer_cursor_deleted_element(parent, cursor->parent_index); hammer_flush_node(node); hammer_delete_node(cursor->trans, node); diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index a4d08be..01e613c 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -371,7 +371,10 @@ hammer_cursor_up(hammer_cursor_t cursor) * Special cursor up given a locked cursor. The orignal node is not * unlocked or released and the cursor is not downgraded. * - * This function will recover from deadlocks. EDEADLK cannot be returned. + * This function can fail with EDEADLK. + * + * This function is only run when recursively deleting parent nodes + * to get rid of an empty leaf. */ int hammer_cursor_up_locked(hammer_cursor_t cursor) @@ -699,8 +702,10 @@ hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode) } /* - * node is being removed, cursors in deadlock recovery are seeked upward - * to the parent. + * We have removed from the parent and collapsed the parent. + * + * Cursors in deadlock recovery are seeked upward to the parent so the + * btree_remove() recursion works properly. */ void hammer_cursor_removed_node(hammer_node_t node, hammer_node_t parent, int index)