Find Pāli word in Velthuis scheme, and replace them with unicode
via Golang (Go programming language).
replace.go |
repository |
view raw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118 | package velthuis
import (
"regexp"
"strings"
)
var paliWordsInVelthuisScheme = regexp.MustCompile(`[abcdeghijklmnoprstuvyABCDEGHIJKLMNOPRSTUVY"~.]+`)
func velthuis2unicode(str string) (string, bool) {
previousLetter := ""
output := ""
for i := 0; i < len(str); i++ {
currentLetter := str[i : i+1]
if i == 0 {
output += currentLetter
previousLetter = currentLetter
continue
}
if previousLetter == "." {
if currentLetter == "n" {
output = output[0:len(output)-1] + "ṇ"
previousLetter = currentLetter
continue
}
if currentLetter == "m" {
output = output[0:len(output)-1] + "ṃ"
previousLetter = currentLetter
continue
}
if currentLetter == "t" {
output = output[0:len(output)-1] + "ṭ"
previousLetter = currentLetter
continue
}
if currentLetter == "d" {
output = output[0:len(output)-1] + "ḍ"
previousLetter = currentLetter
continue
}
if currentLetter == "l" {
output = output[0:len(output)-1] + "ḷ"
previousLetter = currentLetter
continue
}
return "", false
}
if previousLetter == "~" {
if currentLetter == "n" {
output = output[0:len(output)-1] + "ñ"
previousLetter = currentLetter
continue
}
return "", false
}
if previousLetter == "\"" {
if currentLetter == "n" {
output = output[0:len(output)-1] + "ṅ"
previousLetter = currentLetter
continue
}
return "", false
}
if previousLetter == "a" && currentLetter == "a" {
output = output[0:len(output)-1] + "ā"
previousLetter = currentLetter
continue
}
if previousLetter == "i" && currentLetter == "i" {
output = output[0:len(output)-1] + "ī"
previousLetter = currentLetter
continue
}
if previousLetter == "u" && currentLetter == "u" {
output = output[0:len(output)-1] + "ū"
previousLetter = currentLetter
continue
}
output += currentLetter
previousLetter = currentLetter
}
if output[len(output)-1:len(output)] == "." {
return "", false
}
if output[len(output)-1:len(output)] == "\"" {
return "", false
}
if str == output {
return "", false
}
return output, true
}
func replacePaliWordsInVelthuisSchemeWithUnicode(b []byte) []byte {
if len(b) == 1 {
return b
}
str := strings.ToLower(string(b))
if strings.HasSuffix(str, ".net") {
return b
}
if strings.HasSuffix(str, ".tm") {
return b
}
if strings.HasSuffix(str, ".lk") {
return b
}
if output, ok := velthuis2unicode(str); ok {
println(str + " => " + output)
return []byte(output)
}
return b
}
func ProcessBytes(b []byte) []byte {
return paliWordsInVelthuisScheme.ReplaceAllFunc(b,
replacePaliWordsInVelthuisSchemeWithUnicode)
}
|
replace_test.go |
repository |
view raw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 | package velthuis
import (
"io/ioutil"
"testing"
)
func TestProcessBytes(t *testing.T) {
path := "/home/siongui/dev/nanda/content/articles/tipitaka/tipitaka%zh.rst"
b, err := ioutil.ReadFile(path)
if err != nil {
panic(err)
}
ProcessBytes(b)
//b2 := ProcessBytes(b)
//ioutil.WriteFile(path, b2, 0644)
}
|
Tested on: Ubuntu Linux 16.04, Go 1.6.2.
References: