Google App Engine Go - HTML Link to reStructuredText


Given a webpage URL. Fetch the title of the webpage and output reStructuredText link on Google App Engine Go.

Demo

Makefile: Edit the variable in Makefile according to your development environment.

Makefile | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
export PATH := $(PATH):$(realpath ../../../../go_appengine/)
PROJECT_DIR=$(CURDIR)
PROJECT_ID=golden-operator-130720
PROJECT_VERSION=v1

default:
	@echo "\033[92mRun development web server ...\033[0m"
	@cd ../; goapp serve ${PROJECT_DIR}

fmt:
	@echo "\033[92mGo fmt source code ...\033[0m"
	@goapp fmt *.go

deploy:
	cd ../; appcfg.py -A ${PROJECT_ID} -V ${PROJECT_VERSION} update ${PROJECT_DIR}
	@echo "\033[92mDeployed URL: http://${PROJECT_VERSION}.${PROJECT_ID}.appspot.com/\033[0m"

install:
	@echo "\033[92mInstall golang.org/x/net/html ...\033[0m"
	@goapp get -u golang.org/x/net/html
	@echo "\033[92mInstall google.golang.org/appengine ...\033[0m"
	@goapp get -u google.golang.org/appengine

GAE Go Environment Setting:

app.yaml | repository | view raw
1
2
3
4
5
6
runtime: go
api_version: go1

handlers:
- url: /.*
  script: _go_app

Source code:

link2rst.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package link2rst

import (
	"html/template"
	"net/http"
)

type TemplateValue struct {
	Textarea string
}

var index = `<!doctype html>
<html>
<head>
  <title>HTML Link to Rst</title>
</head>
<body>
  <form action="/" method="post">
    URL: <input name="url" size="80">
    <button>Send</button>
  </form><br>
  <textarea id="ta" rows="5" cols="80">{{.Textarea}}</textarea><br>
  <button type="button" id="copy">Copy textarea to clipboard</button>
<script>
  var textareaElm = document.getElementById("ta");
  var copyElm = document.getElementById("copy");
  copyElm.onclick = function(event) {
    textareaElm.select();
    var isSuccessful = document.execCommand('copy');
    if (isSuccessful) {
      textareaElm.value = "Copy OK";
    } else {
      textareaElm.value = "Copy Fail";
    }
  }
</script>

</body>
</html>`

var tmpl = template.Must(template.New("link2rst").Parse(index))

func init() {
	http.HandleFunc("/", handler)
}

func handler(w http.ResponseWriter, r *http.Request) {
	val := TemplateValue{}
	if r.Method == "POST" {
		val.Textarea = getLinkRst(r.PostFormValue("url"), r)
	}

	if err := tmpl.Execute(w, &val); err != nil {
		panic(err)
	}
}
title.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
package link2rst

import (
	"golang.org/x/net/html"
	"io"
)

func isTitleElement(n *html.Node) bool {
	return n.Type == html.ElementNode && n.Data == "title"
}

func traverse(n *html.Node) (string, bool) {
	if isTitleElement(n) {
		return n.FirstChild.Data, true
	}

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		result, ok := traverse(c)
		if ok {
			return result, ok
		}
	}

	return "", false
}

func GetHtmlTitle(r io.Reader) (string, bool) {
	doc, err := html.Parse(r)
	if err != nil {
		panic("Fail to parse html")
	}

	return traverse(doc)
}
fetch.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
package link2rst

import (
	"net/http"

	"google.golang.org/appengine"
	"google.golang.org/appengine/urlfetch"
)

func getLinkRst(url string, r *http.Request) string {
	ctx := appengine.NewContext(r)
	client := urlfetch.Client(ctx)
	resp, err := client.Get(url)
	if err != nil {
		panic(err)
	}
	defer resp.Body.Close()

	if title, ok := GetHtmlTitle(resp.Body); ok {
		return "`" + title + " <" + url + ">`_"
	}
	return ""
}

Tested on: Ubuntu Linux 16.04, Google App Engine SDK for Go 1.9.37.


References:

[1][Golang] Server Get Form POST Value
[2][Golang] Get HTML Title via net/html Package
[3][Golang] Hacker News Link to reStructuredText
[4]Issuing HTTP(S) Requests - Go — Google Cloud Platform