Skip to content

Commit

Permalink
Merge pull request #14 from reworkd/rohan
Browse files Browse the repository at this point in the history
Tag interfering with Xpath fix
  • Loading branch information
awtkns authored Nov 27, 2023
2 parents 0e0e691 + 833da9f commit 9c8e352
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 27 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "tarsier",
"description": "Vision utilities for web interaction agents",
"private": true,
"version": "0.4.2",
"version": "0.4.3",
"author": "Reworkd AI, INC.",
"license": "MIT",
"scripts": {
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tarsier"
version = "0.4.2"
version = "0.4.3"
description = "Vision utilities for web interaction agents"
authors = ["Rohan Pandey", "Adam Watkins", "Asim Shrestha"]
readme = "README.md"
Expand Down
57 changes: 32 additions & 25 deletions tarsier/tag_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ const isInteractable = (el: HTMLElement) =>
(el.tagName.toLowerCase() === "input" && el.type !== "hidden") ||
el.role === "button";

const isTextInsertable = (el: HTMLElement) =>
(["input", "textarea"].includes(el.tagName.toLowerCase()));

const emptyTagWhitelist = ["input", "textarea", "select", "button"];
const isEmpty = (el: HTMLElement) => {
const tagName = el.tagName.toLowerCase();
Expand Down Expand Up @@ -155,43 +158,47 @@ window.tagifyWebpage = (tagLeafTexts = false) => {
}
});

const inputTags = ["input", "textarea", "select"];

for (let el of allElements) {
if (isEmpty(el) || !elIsClean(el)) {
continue;
}

const intractable = isInteractable(el);
const elTagName = el.tagName.toLowerCase();
const idStr = inputTags.includes(elTagName) ? `{${idNum}}` : `[${idNum}]`;
idToXpath[idNum] = getElementXPath(el);

// create the span for the id tag
if (isInteractable(el)) {
idNum++;
} else {
for (let child of Array.from(el.childNodes)) {
if (child.nodeType === Node.TEXT_NODE && /\S/.test(child.textContent || "")) {
// This is a text node with non-whitespace text
idNum++;
}
}
}
}

idNum = 0;
for (let el of allElements) {
if (isEmpty(el) || !elIsClean(el)) {
continue;
}

const idStr = isTextInsertable(el) ? `{${idNum}}` : `[${idNum}]`;
let idSpan = create_tagged_span(idStr);

if (intractable) {
if (!inputTags.includes(elTagName)) {
el.prepend(idSpan);
} else if (elTagName === "textarea" || elTagName === "input") {
el.prepend(idSpan);
} else if (elTagName === "select") {
// leave select blank - we'll give a tag ID to the options
}
if (isInteractable(el)) {
el.prepend(idSpan);
idNum++;
} else {
if (
tagLeafTexts &&
/\S/.test(el.textContent || "") &&
Array.from(el.childNodes).every(
(node) => node.nodeType === Node.TEXT_NODE,
)
) {
// This is a leaf element with non-whitespace text
el.prepend(idSpan);
for (let child of Array.from(el.childNodes)) {
if (child.nodeType === Node.TEXT_NODE && /\S/.test(child.textContent || "")) {
// This is a text node with non-whitespace text
let idSpan = create_tagged_span(idStr);
el.insertBefore(idSpan, child);
idNum++;
}
}
}

idNum++;
}

return idToXpath;
Expand Down

0 comments on commit 9c8e352

Please sign in to comment.