diff --git a/theseus/optimizer/nonlinear/levenberg_marquardt.py b/theseus/optimizer/nonlinear/levenberg_marquardt.py
index ce1902a3..7e1b502f 100644
--- a/theseus/optimizer/nonlinear/levenberg_marquardt.py
+++ b/theseus/optimizer/nonlinear/levenberg_marquardt.py
@@ -190,11 +190,18 @@ def _check_accept(
         den = (delta * (damping * delta + linearization.Atb.squeeze(2))).sum(dim=1) / 2
         rho = (previous_err - err) / den
         reject_indices = rho <= damping_accept
+        previous_damping = self._damping.clone()
         self._damping = torch.where(
             reject_indices,
             self._damping * up_damping_ratio,
             self._damping / down_damping_ratio,
         )
+        # If some batch have already converged, the damping factor should not be updated
+        self._damping = torch.where(
+            rho.abs() < 1e-16,
+            previous_damping,
+            self._damping,
+        )
         self._damping = self._damping.clamp(
             LevenbergMarquardt._MIN_DAMPING, LevenbergMarquardt._MAX_DAMPING
         )
diff --git a/theseus/optimizer/nonlinear/nonlinear_least_squares.py b/theseus/optimizer/nonlinear/nonlinear_least_squares.py
index a823f4a0..42110d10 100644
--- a/theseus/optimizer/nonlinear/nonlinear_least_squares.py
+++ b/theseus/optimizer/nonlinear/nonlinear_least_squares.py
@@ -170,7 +170,7 @@ def _optimize_loop(
 
             # For now, step size is combined with delta. If we add more sophisticated
             # line search, will probably need to pass it separately, or compute inside.
-            err, all_rejected = self._step(
+            err, all_rejected, reject_indices = self._step(
                 delta * steps_tensor,
                 info.last_err,
                 converged_indices,
@@ -196,6 +196,8 @@ def _optimize_loop(
                         f"Error: {err.mean().item()}"
                     )
                 converged_indices = self._check_convergence(err, info.last_err)
+                if (reject_indices is not None) and reject_indices.any():
+                    converged_indices[reject_indices] = False
                 info.status[
                     converged_indices.cpu().numpy()
                 ] = NonlinearOptimizerStatus.CONVERGED
@@ -355,14 +357,18 @@ def _step(
         else:
             reject_indices = self._complete_step(delta, err, previous_err, **kwargs)
 
+        # If the step is converged, it should not be considered as rejected
+        if reject_indices is not None:
+            reject_indices[converged_indices] = False
+            
         if reject_indices is not None and reject_indices.all():
-            return previous_err, True
+            return previous_err, True, reject_indices
 
         self.objective.update(tensor_dict, batch_ignore_mask=reject_indices)
         if reject_indices is not None and reject_indices.any():
             # Some steps were rejected so the error computed above is not accurate
             err = self.objective.error_metric()
-        return err, False
+        return err, False, reject_indices
 
     # Resets any internal state needed by the optimizer for a new optimization
     # problem. Optimizer loop will pass all optimizer kwargs to this method.