From 754679552005a97dbee3a2b67ec6c97b7eaa9bfd Mon Sep 17 00:00:00 2001 From: yigithankarabulut Date: Tue, 30 Apr 2024 02:24:48 +0300 Subject: [PATCH 1/2] replacing the jaro-winkler algorithm usage with an internal function for suggestion. --- suggestions.go | 77 +++++++++++++++++++++++++++++++++++++++++++-- suggestions_test.go | 27 ++++++++++++++++ 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/suggestions.go b/suggestions.go index 607de09deb..19683a4114 100644 --- a/suggestions.go +++ b/suggestions.go @@ -1,7 +1,7 @@ package cli import ( - "github.com/xrash/smetrics" + "math" ) const suggestDidYouMeanTemplate = "Did you mean %q?" @@ -16,13 +16,84 @@ type SuggestFlagFunc func(flags []Flag, provided string, hideHelp bool) string type SuggestCommandFunc func(commands []*Command, provided string) string +// Jaro is the measure of similarity between two strings. +// The result is 1 for equal strings, and 0 for completely different strings. +func jaroDistance(a, b string) float64 { + if len(a) == 0 && len(b) == 0 { + return 1 + } + if len(a) == 0 || len(b) == 0 { + return 0 + } + + lenA := float64(len(a)) + lenB := float64(len(b)) + hashA := make([]bool, len(a)) + hashB := make([]bool, len(b)) + maxDistance := int(math.Max(0, math.Floor(math.Max(lenA, lenB)/2.0)-1)) + + var matches float64 + for i := 0; i < len(a); i++ { + start := int(math.Max(0, float64(i-maxDistance))) + end := int(math.Min(lenB-1, float64(i+maxDistance))) + + for j := start; j <= end; j++ { + if hashB[j] { + continue + } + if a[i] == b[j] { + hashA[i] = true + hashB[j] = true + matches++ + break + } + } + } + if matches == 0 { + return 0 + } + + var transpositions float64 + var j int + for i := 0; i < len(a); i++ { + if !hashA[i] { + continue + } + for !hashB[j] { + j++ + } + if a[i] != b[j] { + transpositions++ + } + j++ + } + + transpositions /= 2 + return ((matches / lenA) + (matches / lenB) + ((matches - transpositions) / matches)) / 3.0 +} + +// jaroWinkler is more accurate when strings have a common prefix up to a defined maximum length. func jaroWinkler(a, b string) float64 { - // magic values are from https://github.com/xrash/smetrics/blob/039620a656736e6ad994090895784a7af15e0b80/jaro-winkler.go#L8 const ( boostThreshold = 0.7 prefixSize = 4 ) - return smetrics.JaroWinkler(a, b, boostThreshold, prefixSize) + jaroDist := jaroDistance(a, b) + if jaroDist <= boostThreshold { + return jaroDist + } + + prefix := int(math.Min(float64(len(a)), math.Min(float64(prefixSize), float64(len(b))))) + + var prefixMatch float64 + for i := 0; i < prefix; i++ { + if a[i] == b[i] { + prefixMatch++ + } else { + break + } + } + return jaroDist + 0.1*prefixMatch*(1.0-jaroDist) } func suggestFlag(flags []Flag, provided string, hideHelp bool) string { diff --git a/suggestions_test.go b/suggestions_test.go index 979fbe0cb4..b1e962104e 100644 --- a/suggestions_test.go +++ b/suggestions_test.go @@ -8,6 +8,33 @@ import ( "github.com/stretchr/testify/assert" ) +func TestJaroWinkler(t *testing.T) { + // Given + for _, testCase := range []struct { + a, b string + expected float64 + }{ + {"", "", 1}, + {"a", "", 0}, + {"", "a", 0}, + {"a", "a", 1}, + {"a", "b", 0}, + {"aa", "aa", 1}, + {"aa", "bb", 0}, + {"aaa", "aaa", 1}, + {"aa", "ab", 0.6666666666666666}, + {"aa", "ba", 0.6666666666666666}, + {"ba", "aa", 0.6666666666666666}, + {"ab", "aa", 0.6666666666666666}, + } { + // When + res := jaroWinkler(testCase.a, testCase.b) + + // Then + assert.Equal(t, testCase.expected, res) + } +} + func TestSuggestFlag(t *testing.T) { // Given app := buildExtendedTestCommand() From efa78e79a864419d4e1787eecf42a78e07fb9c30 Mon Sep 17 00:00:00 2001 From: Eng Zer Jun Date: Tue, 30 Apr 2024 22:02:20 +0800 Subject: [PATCH 2/2] Fix go.mod and add comments Signed-off-by: Eng Zer Jun --- go.mod | 5 +---- go.sum | 2 -- suggestions.go | 12 +++++++++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index ccc913baf8..2425a9b7fc 100644 --- a/go.mod +++ b/go.mod @@ -2,10 +2,7 @@ module github.com/urfave/cli/v3 go 1.18 -require ( - github.com/stretchr/testify v1.8.4 - github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 -) +require github.com/stretchr/testify v1.8.4 require ( github.com/BurntSushi/toml v1.3.2 // indirect diff --git a/go.sum b/go.sum index 39b6c7abd3..1ce67a49f3 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,6 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/urfave/cli-altsrc/v3 v3.0.0-alpha2 h1:j4SaBpPB8++L0c0KuTnz/Yus3UQoWJ54hQjhIMW8rCM= github.com/urfave/cli-altsrc/v3 v3.0.0-alpha2/go.mod h1:Q79oyIY/z4jtzIrKEK6MUeWC7/szGr46x4QdOaOAIWc= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/suggestions.go b/suggestions.go index 19683a4114..6f29f12213 100644 --- a/suggestions.go +++ b/suggestions.go @@ -16,8 +16,11 @@ type SuggestFlagFunc func(flags []Flag, provided string, hideHelp bool) string type SuggestCommandFunc func(commands []*Command, provided string) string -// Jaro is the measure of similarity between two strings. -// The result is 1 for equal strings, and 0 for completely different strings. +// jaroDistance is the measure of similarity between two strings. It returns a +// value between 0 and 1, where 1 indicates identical strings and 0 indicates +// completely different strings. +// +// Adapted from https://github.com/xrash/smetrics/blob/5f08fbb34913bc8ab95bb4f2a89a0637ca922666/jaro.go. func jaroDistance(a, b string) float64 { if len(a) == 0 && len(b) == 0 { return 1 @@ -72,7 +75,10 @@ func jaroDistance(a, b string) float64 { return ((matches / lenA) + (matches / lenB) + ((matches - transpositions) / matches)) / 3.0 } -// jaroWinkler is more accurate when strings have a common prefix up to a defined maximum length. +// jaroWinkler is more accurate when strings have a common prefix up to a +// defined maximum length. +// +// Adapted from https://github.com/xrash/smetrics/blob/5f08fbb34913bc8ab95bb4f2a89a0637ca922666/jaro-winkler.go. func jaroWinkler(a, b string) float64 { const ( boostThreshold = 0.7