diff --git a/theseus/optimizer/nonlinear/levenberg_marquardt.py b/theseus/optimizer/nonlinear/levenberg_marquardt.py index ce1902a3..7e1b502f 100644 --- a/theseus/optimizer/nonlinear/levenberg_marquardt.py +++ b/theseus/optimizer/nonlinear/levenberg_marquardt.py @@ -190,11 +190,18 @@ def _check_accept( den = (delta * (damping * delta + linearization.Atb.squeeze(2))).sum(dim=1) / 2 rho = (previous_err - err) / den reject_indices = rho <= damping_accept + previous_damping = self._damping.clone() self._damping = torch.where( reject_indices, self._damping * up_damping_ratio, self._damping / down_damping_ratio, ) + # If some batch have already converged, the damping factor should not be updated + self._damping = torch.where( + rho.abs() < 1e-16, + previous_damping, + self._damping, + ) self._damping = self._damping.clamp( LevenbergMarquardt._MIN_DAMPING, LevenbergMarquardt._MAX_DAMPING ) diff --git a/theseus/optimizer/nonlinear/nonlinear_least_squares.py b/theseus/optimizer/nonlinear/nonlinear_least_squares.py index a823f4a0..42110d10 100644 --- a/theseus/optimizer/nonlinear/nonlinear_least_squares.py +++ b/theseus/optimizer/nonlinear/nonlinear_least_squares.py @@ -170,7 +170,7 @@ def _optimize_loop( # For now, step size is combined with delta. If we add more sophisticated # line search, will probably need to pass it separately, or compute inside. - err, all_rejected = self._step( + err, all_rejected, reject_indices = self._step( delta * steps_tensor, info.last_err, converged_indices, @@ -196,6 +196,8 @@ def _optimize_loop( f"Error: {err.mean().item()}" ) converged_indices = self._check_convergence(err, info.last_err) + if (reject_indices is not None) and reject_indices.any(): + converged_indices[reject_indices] = False info.status[ converged_indices.cpu().numpy() ] = NonlinearOptimizerStatus.CONVERGED @@ -355,14 +357,18 @@ def _step( else: reject_indices = self._complete_step(delta, err, previous_err, **kwargs) + # If the step is converged, it should not be considered as rejected + if reject_indices is not None: + reject_indices[converged_indices] = False + if reject_indices is not None and reject_indices.all(): - return previous_err, True + return previous_err, True, reject_indices self.objective.update(tensor_dict, batch_ignore_mask=reject_indices) if reject_indices is not None and reject_indices.any(): # Some steps were rejected so the error computed above is not accurate err = self.objective.error_metric() - return err, False + return err, False, reject_indices # Resets any internal state needed by the optimizer for a new optimization # problem. Optimizer loop will pass all optimizer kwargs to this method.