[Golang] Levenshtein Distance Recursive Implementation


I read post of the BK-tree referred by HN [1], and in the post, an interesting algorithm called Levenshtein distance [2] is used. Levenshtein distance (LD) is a measure of the similarity between two strings, the minimum number of single-character edits (insertions, deletions or substitutions) required to change one string into the other.

The recursive implementation of LD in Wikipedia is not very difficult, so I port the code from C to Go as an exercise.

If you want a more efficient algorithm (Wagner-Fischer algorithm), see [4].

Run Code on Go Playground

util.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
package ld

func StringToRuneSlice(s string) []rune {
	var r []rune
	for _, runeValue := range s {
		r = append(r, runeValue)
	}
	return r
}

func minimum(a, b, c int) int {
	min := a
	if min > b {
		min = b
	}
	if min > c {
		min = c
	}
	return min
}
recursive.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
package ld

// lenS and lenT are the number of characters in string s and t respectively
func Recursive(s []rune, lenS int, t []rune, lenT int) int {
	var cost int

	// base case: empty strings
	if lenS == 0 {
		return lenT
	}
	if lenT == 0 {
		return lenS
	}

	// test if last characters of the strings match
	if s[lenS-1] == t[lenT-1] {
		cost = 0
	} else {
		cost = 1
	}

	// return minimum of delete char from s, delete char from t, and delete char from both
	return minimum(
		Recursive(s, lenS-1, t, lenT)+1,
		Recursive(s, lenS, t, lenT-1)+1,
		Recursive(s, lenS-1, t, lenT-1)+cost,
	)
}

func LevenshteinDistance(s, t string) int {
	srs := StringToRuneSlice(s)
	trs := StringToRuneSlice(t)
	return Recursive(srs, len(srs), trs, len(trs))
}
ld_test.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
package ld

import (
	"testing"
)

func TestLevenshteinDistance(t *testing.T) {
	if LevenshteinDistance("soccer", "otter") != 3 {
		t.Error(`"soccer", "otter"`)
	}

	if LevenshteinDistance("你好他他是誰", "好我我是誰") != 3 {
		t.Error(`"你好他他是誰", "好我我是誰"`)
	}

	if LevenshteinDistance("kitten", "sitten") != 1 {
		t.Error(`"kitten", "sitten"`)
	}

	if LevenshteinDistance("sitten", "sittin") != 1 {
		t.Error(`"sitten", "sittin"`)
	}

	if LevenshteinDistance("sittin", "sitting") != 1 {
		t.Error(`"sittin", "sitting"`)
	}

	if LevenshteinDistance("sort", "some") != 2 {
		t.Error(`"sort", "some"`)
	}

	if LevenshteinDistance("sort", "same") != 3 {
		t.Error(`"sort", "same"`)
	}

	if LevenshteinDistance("sort", "soft") != 1 {
		t.Error(`"sort", "soft"`)
	}
}

Tested on:


References:

[1]
[2]Levenshtein distance - Wikipedia
[3][Golang] Iterate Over UTF-8 Strings (non-ASCII strings)
[4][Golang] Wagner-Fischer algorithm for Edit Distance of Two Strings