[Golang] Get HTML Title via goquery


A simple example to read HTML title via goquery in Golang.

Install goquery:

$ go get -u github.com/PuerkitoBio/goquery

Source code:

title.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
package main

import (
	"flag"
	"fmt"
	"github.com/PuerkitoBio/goquery"
	"os"
)

func processHTML(path string) {
	f, err := os.Open(path)
	if err != nil {
		panic(err)
	}
	defer f.Close()

	doc, err := goquery.NewDocumentFromReader(f)
	if err != nil {
		panic(err)
	}

	title := doc.Find("title").Text()
	fmt.Println(title)
}

func main() {
	pPath := flag.String("input", "", "Path of file to be processed")
	flag.Parse()
	path := *pPath
	if path == "" {
		fmt.Fprintf(os.Stderr, "Error: empty path!\n")
		return
	}

	processHTML(path)
}

Command line usage:

$ go run title.go -input=index.html

Tested on: Ubuntu Linux 15.10, Go 1.6.


References:

[1]
[2]
[3]read html title · twnanda/twnanda@5d81787 · GitHub