Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions libs/cua-driver/rust/crates/platform-macos/src/input/ax_actions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,36 @@ pub fn perform_ax_action(element_ptr: usize, action: &str) -> anyhow::Result<()>
}
}

/// Perform a native AX scroll action when the target element advertises one.
pub fn perform_ax_scroll_action_if_supported(
element_ptr: usize,
direction: &str,
by: &str,
amount: usize,
) -> anyhow::Result<Option<&'static str>> {
let actions = unsafe { copy_action_names(element_ptr as AXUIElementRef) };
let Some(candidates) = scroll_action_candidates(direction, by) else {
return Ok(None);
};
let Some(action) = candidates
.iter()
.copied()
.find(|candidate| actions.iter().any(|advertised| advertised.as_str() == *candidate))
else {
return Ok(None);
};

for _ in 0..amount {
let err = unsafe { perform_action(element_ptr as AXUIElementRef, action) };
if err != kAXErrorSuccess {
anyhow::bail!("AXUIElementPerformAction({action}) failed with error {err}");
}
std::thread::sleep(std::time::Duration::from_millis(50));
}

Ok(Some(action))
}

fn map_action(action: &str) -> &'static str {
match action.to_lowercase().as_str() {
"press" | "click" => "AXPress",
Expand All @@ -28,6 +58,20 @@ fn map_action(action: &str) -> &'static str {
}
}

fn scroll_action_candidates(direction: &str, by: &str) -> Option<[&'static str; 2]> {
Some(match (direction, by) {
("up", "page") => ["AXScrollUpByPage", "AXScrollUp"],
("down", "page") => ["AXScrollDownByPage", "AXScrollDown"],
("left", "page") => ["AXScrollLeftByPage", "AXScrollLeft"],
("right", "page") => ["AXScrollRightByPage", "AXScrollRight"],
("up", _) => ["AXScrollUp", "AXScrollUpByPage"],
("down", _) => ["AXScrollDown", "AXScrollDownByPage"],
("left", _) => ["AXScrollLeft", "AXScrollLeftByPage"],
("right", _) => ["AXScrollRight", "AXScrollRightByPage"],
_ => return None,
})
}

/// Set AXFocused=true on an element (for pre-focusing before key press).
pub fn focus_element(element_ptr: usize) -> anyhow::Result<()> {
let err = unsafe {
Expand Down
37 changes: 28 additions & 9 deletions libs/cua-driver/rust/crates/platform-macos/src/tools/scroll.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ static DEF: std::sync::OnceLock<ToolDef> = std::sync::OnceLock::new();
fn def() -> &'static ToolDef {
DEF.get_or_init(|| ToolDef {
name: "scroll".into(),
description: "Scroll the target pid's focused region by synthesized keystrokes.\n\n\
Mapping: by='page' → PageDown/PageUp × amount; by='line' → DownArrow/UpArrow × amount. \
Horizontal variants use Left/Right arrow keys.\n\n\
description: "Scroll the target pid's focused region.\n\n\
If the target element advertises a native AX scroll action, that action is used first. \
Otherwise, by='page' maps to PageDown/PageUp × amount and by='line' maps to \
DownArrow/UpArrow × amount. Horizontal variants use Left/Right arrow keys.\n\n\
Comment on lines +25 to +28

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📐 Maintainability & Code Quality | 🟡 Minor | ⚡ Quick win

Make amount wording include native AX actions.

With the AX-first path, amount is no longer only “keystroke repetitions”; it also controls native AX action repetitions. Update the schema text so clients get the right contract.

Proposed fix
-                    "description": "Number of keystroke repetitions. Default: 3."
+                    "description": "Number of scroll action repetitions. Default: 3."

Also applies to: 46-50

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@libs/cua-driver/rust/crates/platform-macos/src/tools/scroll.rs` around lines
25 - 28, The schema description for the scroll tool is outdated because `amount`
is described only as keystroke repetitions even though `scroll.rs` now uses
native AX actions first when available. Update the tool text in the scroll tool
definition so the `amount` contract explicitly covers both native AX action
repetitions and fallback keypress repetitions, keeping the `by='page'` and
`by='line'` behavior description aligned with the AX-first path.

Optional element_index + window_id pre-focuses the element before scrolling.".into(),
input_schema: serde_json::json!({
"type": "object",
Expand All @@ -46,7 +47,7 @@ fn def() -> &'static ToolDef {
"type": "integer",
"minimum": 1,
"maximum": 50,
"description": "Number of keystroke repetitions. Default: 3."
"description": "Number of native AX scroll action or fallback keystroke repetitions. Default: 3."
},
"window_id": { "type": "integer" },
"element_index": { "type": "integer" },
Expand Down Expand Up @@ -115,9 +116,11 @@ impl Tool for ScrollTool {
_ => "down",
};
let key = key.to_owned();
let ax_direction = direction.to_owned();
let ax_by = by.to_owned();

// ── Focus-suppression wrap (Swift WindowChangeDetector + FocusGuard) ──
// Scroll keystrokes (PageDown / arrow) into search-box autocomplete
// Scroll actions or fallback keystrokes into search-box autocomplete
// can spawn floating helper windows; rare but real. Wrap for parity
// with the other action tools.
//
Expand All @@ -130,7 +133,7 @@ impl Tool for ScrollTool {
let result = focus_guard::with_focus_suppressed(
Some(pid),
prior_front,
"scroll.CGEvent",
"scroll",
|| async move {
// Pre-focus the element under suppression so its
// side-effects are captured by the snapshot + lease.
Expand All @@ -139,6 +142,22 @@ impl Tool for ScrollTool {
crate::input::ax_actions::focus_element(element_ptr)
}).await;
tokio::time::sleep(std::time::Duration::from_millis(30)).await;

let direction = ax_direction.clone();
let by = ax_by.clone();
match tokio::task::spawn_blocking(move || {
crate::input::ax_actions::perform_ax_scroll_action_if_supported(
element_ptr,
&direction,
&by,
amount,
)
}).await {
Ok(Ok(Some(action))) => return Ok(Ok(action.to_owned())),
Ok(Ok(None)) => {},
Ok(Err(e)) => return Ok(Err(e)),
Err(e) => return Err(e),
}
}

tokio::task::spawn_blocking(move || {
Expand All @@ -148,7 +167,7 @@ impl Tool for ScrollTool {
}
std::thread::sleep(std::time::Duration::from_millis(50));
}
Ok(())
Ok("key synthesis".to_owned())
})
.await
},
Expand All @@ -158,8 +177,8 @@ impl Tool for ScrollTool {
let changes = snapshot.detect_async().await;

match result {
Ok(Ok(())) => ToolResult::text(format!(
"Scrolled {direction} by {by} × {amount}.{}",
Ok(Ok(method)) => ToolResult::text(format!(
"Scrolled {direction} by {by} × {amount} via {method}.{}",
changes.result_suffix()
)),
Ok(Err(e)) => ToolResult::error(format!("Scroll failed: {e}")),
Expand Down
Loading