diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index 575a4fb..b0e4310 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -2182,7 +2182,6 @@ btree_remove(hammer_cursor_t cursor) } parent = cursor->parent; - hammer_cursor_removed_node(node, parent, cursor->parent_index); /* * Attempt to remove the parent's reference to the child. If the @@ -2202,12 +2201,33 @@ btree_remove(hammer_cursor_t cursor) * node exclusively locked and referenced, leaves the * original parent locked (as the new node), and locks the * new parent. It can return EDEADLK. + * + * We cannot call hammer_cursor_removed_node() until we are + * actually able to remove the node. If we did then tracked + * cursors in the middle of iterations could be repointed + * to a parent node. If this occurs they could end up + * scanning newly inserted records into the node (that could + * not be deleted) when they push down again. + * + * Due to the way the recursion works the final parent is left + * in cursor->parent after the recursion returns. Each + * layer on the way back up is thus able to call + * hammer_cursor_removed_node() and 'jump' the node up to + * the (same) final parent. + * + * NOTE! The local variable 'parent' is invalid after we + * call hammer_cursor_up_locked(). */ error = hammer_cursor_up_locked(cursor); + parent = NULL; + if (error == 0) { hammer_cursor_deleted_element(cursor->node, 0); error = btree_remove(cursor); if (error == 0) { + hammer_cursor_removed_node( + node, cursor->parent, + cursor->parent_index); hammer_modify_node_all(cursor->trans, node); ondisk = node->ondisk; ondisk->type = HAMMER_BTREE_TYPE_DELETED; @@ -2271,12 +2291,14 @@ btree_remove(hammer_cursor_t cursor) } /* - * Delete the subtree reference in the parent + * Delete the subtree reference in the parent. Include + * boundary element at end. */ bcopy(&elm[1], &elm[0], (ondisk->count - cursor->parent_index) * esize); --ondisk->count; hammer_modify_node_done(parent); + hammer_cursor_removed_node(node, parent, cursor->parent_index); hammer_cursor_deleted_element(parent, cursor->parent_index); hammer_flush_node(node); hammer_delete_node(cursor->trans, node); diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index a4d08be..ae546c4 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -371,13 +371,17 @@ hammer_cursor_up(hammer_cursor_t cursor) * Special cursor up given a locked cursor. The orignal node is not * unlocked or released and the cursor is not downgraded. * - * This function will recover from deadlocks. EDEADLK cannot be returned. + * This function can fail with EDEADLK. + * + * This function is only run when recursively deleting parent nodes + * to get rid of an empty leaf. */ int hammer_cursor_up_locked(hammer_cursor_t cursor) { hammer_node_t save; int error; + int save_index; /* * If the parent is NULL we are at the root of the B-Tree and @@ -387,6 +391,7 @@ hammer_cursor_up_locked(hammer_cursor_t cursor) return (ENOENT); save = cursor->node; + save_index = cursor->index; /* * Set the node to its parent. @@ -399,14 +404,18 @@ hammer_cursor_up_locked(hammer_cursor_t cursor) /* * load the new parent, attempt to exclusively lock it. Note that * we are still holding the old parent (now cursor->node) exclusively - * locked. This can return EDEADLK. + * locked. + * + * This can return EDEADLK. Undo the operation on any error. These + * up sequences can occur during iterations so be sure to restore + * the index. */ error = hammer_load_cursor_parent(cursor, 1); if (error) { cursor->parent = cursor->node; cursor->parent_index = cursor->index; cursor->node = save; - cursor->index = 0; + cursor->index = save_index; } return(error); } @@ -699,8 +708,10 @@ hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode) } /* - * node is being removed, cursors in deadlock recovery are seeked upward - * to the parent. + * We have removed from the parent and collapsed the parent. + * + * Cursors in deadlock recovery are seeked upward to the parent so the + * btree_remove() recursion works properly. */ void hammer_cursor_removed_node(hammer_node_t node, hammer_node_t parent, int index) diff --git a/sys/vfs/hammer/hammer_mirror.c b/sys/vfs/hammer/hammer_mirror.c index 574b74d..05e12a0 100644 --- a/sys/vfs/hammer/hammer_mirror.c +++ b/sys/vfs/hammer/hammer_mirror.c @@ -462,8 +462,10 @@ hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip, */ if (error == EDEADLK) { while (error == EDEADLK) { + hammer_sync_lock_sh(trans); hammer_recover_cursor(&cursor); error = hammer_cursor_upgrade(&cursor); + hammer_sync_unlock(trans); } } else { if (error == EALREADY) diff --git a/sys/vfs/hammer/hammer_rebalance.c b/sys/vfs/hammer/hammer_rebalance.c index 012254e..dab6fa8 100644 --- a/sys/vfs/hammer/hammer_rebalance.c +++ b/sys/vfs/hammer/hammer_rebalance.c @@ -147,6 +147,14 @@ retry: * Update returned scan position and do a flush if * necessary. * + * WARNING: We extract the base using the leaf element + * type but this could be an internal node. The + * base is the same either way. + * + * However, due to the rebalancing operation the + * cursor position may have exceeded the right-hand + * boundary. + * * WARNING: See warnings in hammer_unlock_cursor() * function. */ @@ -163,6 +171,19 @@ retry: } /* + * Before iterating check if the rebalance operation caused + * the cursor to index past the right-hand boundary and make + * sure to stop if it does. Otherwise the iteration may + * panic e.g. due to the key maxing out its fields and no + * longer being within the strict bounds of the root node. + */ + if (hammer_btree_cmp(&rebal->key_cur, &cursor.key_end) > 0) { + kprintf("HAMMER: Debug: rebalance hit right bndry\n"); + rebal->key_cur = cursor.key_end; + break; + } + + /* * Iterate, stop if a signal was received. */ if ((error = hammer_signal_check(trans->hmp)) != 0)