[Golang] Get HTML Title via net/html Package


Introduction

Get HTML title via net/html package in Golang (Go programming language).

Install net/html Package

$ go get -u golang.org/x/net/html

Source Code

title.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
package title

import (
	"golang.org/x/net/html"
	"io"
)

func isTitleElement(n *html.Node) bool {
	return n.Type == html.ElementNode && n.Data == "title"
}

func traverse(n *html.Node) (string, bool) {
	if isTitleElement(n) {
		return n.FirstChild.Data, true
	}

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		result, ok := traverse(c)
		if ok {
			return result, ok
		}
	}

	return "", false
}

func GetHtmlTitle(r io.Reader) (string, bool) {
	doc, err := html.Parse(r)
	if err != nil {
		panic("Fail to parse html")
	}

	return traverse(doc)
}

Usage:

title_test.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
package title

import (
	"net/http"
	"testing"
)

func TestHtmlToRst(t *testing.T) {
	resp, err := http.Get("http://nanda.online-dhamma.net/")
	//resp, err := http.Get("https://siongui.github.io/zh/2016/03/14/pillow-useful-items-for-me-notes/")
	if err != nil {
		panic(err)
	}
	defer resp.Body.Close()

	if title, ok := GetHtmlTitle(resp.Body); ok {
		println(title)
	} else {
		println("Fail to get HTML title")
	}
}

Tested on: Ubuntu Linux 16.04, Go 1.6.2.


References:

[1]html - GoDoc
[2][Golang] Get HTML Title via goquery