[JavaScript] Traverse DOM Tree


Traverse DOM tree to find out all text nodes via JavaScript.

"use strict";

function traverse(elm) {
  if (elm.nodeType == Node.ELEMENT_NODE || elm.nodeType == Node.DOCUMENT_NODE) {
    for (var i=0; i < elm.childNodes.length; i++) {
      // recursively call to traverse
      traverse(elm.childNodes[i]);
    }
  }

  if (elm.nodeType == Node.TEXT_NODE) {
    console.log(elm.nodeValue);
  }
}

traverse(document);

Recursive call to traverse function to visit all nodes in the DOM tree. In traverse function, we check the node type of the element. If it is ELEMENT_NODE or DOCUMENT_NODE, recursively call self with child nodes of current element as the argument. If it is TEXT_NODE, then we are done.

Sometimes we need only text nodes with visible texts. The invisible texts in elements, such as style, or texts consisting of only spaces are not needed. The following code can help traverse the DOM tree to find out only visible text nodes.

"use strict";

function isExcluded(elm) {
  if (elm.tagName == "STYLE") {
    return true;
  }
  if (elm.tagName == "SCRIPT") {
    return true;
  }
  if (elm.tagName == "NOSCRIPT") {
    return true;
  }
  if (elm.tagName == "IFRAME") {
    return true;
  }
  if (elm.tagName == "OBJECT") {
    return true;
  }
  return false
}

function traverse(elm) {
  if (elm.nodeType == Node.ELEMENT_NODE || elm.nodeType == Node.DOCUMENT_NODE) {

    // exclude elements with invisible text nodes
    if (isExcluded(elm)) {
      return
    }

    for (var i=0; i < elm.childNodes.length; i++) {
      // recursively call to traverse
      traverse(elm.childNodes[i]);
    }

  }

  if (elm.nodeType == Node.TEXT_NODE) {

    // exclude text node consisting of only spaces
    if (elm.nodeValue.trim() == "") {
      return
    }

    console.log(elm.nodeValue);
  }
}

traverse(document);

Tested on: Chromium Version 58.0.3029.81 Built on Ubuntu , running on Ubuntu 17.04 (64-bit)


References:

[1]
[2]
[3]javascript - TEXT_NODE: returns ONLY text? - Stack Overflow
[4]HTML DOM tagName Property