[Golang] Minify HTML


Minify HTML via Golang.

The steps:

  1. Remove HTML comments [2]
  2. Remove all leading and trailing white space of each line.
  3. Pad a single space to the line if its length > 0.
minhtml.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
package minhtml

import (
	"bufio"
	"bytes"
	"regexp"
	"strings"
)

func RemoveHTMLComments(content []byte) []byte {
	// https://www.google.com/search?q=regex+html+comments
	// http://stackoverflow.com/a/1084759
	htmlcmt := regexp.MustCompile(`<!--[^>]*-->`)
	return htmlcmt.ReplaceAll(content, []byte(""))
}

func MinifyHTML(html []byte) string {
	// read line by line
	minifiedHTML := ""
	scanner := bufio.NewScanner(bytes.NewReader(RemoveHTMLComments(html)))
	for scanner.Scan() {
		// all leading and trailing white space of each line are removed
		lineTrimmed := strings.TrimSpace(scanner.Text())
		minifiedHTML += lineTrimmed
		if len(lineTrimmed) > 0 {
			// in case of following trimmed line:
			// <div id="foo"
			minifiedHTML += " "
		}
	}
	if err := scanner.Err(); err != nil {
		panic(err)
	}

	return minifiedHTML
}
minhtml_test.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
package minhtml

import (
	"io/ioutil"
	"testing"
)

var html = []byte(`
<!--- hello --->
<div id="foo"
name="foo2"></div>
<!--
world
-->
<span></span><!--ddd-->
`)

var htmlNoComments = []byte(`

<div id="foo"
name="foo2"></div>

<span></span>
`)

var htmlMinified = `<div id="foo" name="foo2"></div> <span></span> `

func TestRemoveHTMLComments(t *testing.T) {
	if string(RemoveHTMLComments(html)) != string(htmlNoComments) {
		t.Error("Remove HTML comments failure!")
	}
	//println(string(RemoveHTMLComments(html)))
}

func TestMinifyHTML(t *testing.T) {
	if MinifyHTML(html) != htmlMinified {
		t.Error("Minify HTML failure!")
	}
	//println(MinifyHTML(html))
}

func TestHTMLFile(t *testing.T) {
	htmlPath := "../../../output/index.html"
	b, err := ioutil.ReadFile(htmlPath)
	if err != nil {
		panic(err)
	}
	minifiedHTML := MinifyHTML(b)
	println(minifiedHTML)
	if err = ioutil.WriteFile(htmlPath, []byte(minifiedHTML), 0644); err != nil {
		panic(err)
	}
}

Tested on: Ubuntu Linux 15.10, Go 1.6.


References:

[1]regex html comments
[2]php - RegExp to strip HTML comments - Stack Overflow
[3]regexp - The Go Programming Language
[4]ioutil - The Go Programming Language
[5]bufio - The Go Programming Language
[6]bytes - The Go Programming Language
[7]strings - The Go Programming Language