Algorithm Implementation/Strings/Levenshtein distance
From Wikibooks, the open-content textbooks collection
The implementations of the Levenshtein algorithm on this page are illustrative only. Applications will, in most cases, use implementations which use heap allocations sparingly, in particular when large lists of words are compared to each other. The following remarks indicate some of the variations on this and related topics:
- Most implementations use one- or two-dimensional arrays to store the distances of prefixes of the words compared. In most applications the size of these structures is previously known. This is the case, when, for instance the distance is relevant only if it is below a certain maximally allowed distance (this happens when words are selected from a dictionary to approximately match a given word). In this case the arrays can be preallocated and reused over the various runs of the algorithm over successive words.
- Using a maximum allowed distance puts an upper bound on the search time. The search can be stopped as soon as the minimum Levenshtein distance between prefixes of the strings exceeds the maximum allowed distance.
- Deletion, insertion, and replacement of characters can be assigned different weights. The usual choice is to set all three weights to 1. Different values for these weights allows for more flexible search strategies in lists of words.
[edit] Ada
function Levenshtein(Left, Right : String) return Natural is D : array(0 .. Left'Last, 0 .. Right'Last) of Natural; begin for I in D'range(1) loop D(I, 0) := I;end loop; for J in D'range(2) loop D(0, J) := J;end loop; for I in Left'range loop for J in Right'range loop D(I, J) := Natural'Min(D(I - 1, J), D(I, J - 1)) + 1; D(I, J) := Natural'Min(D(I, J), D(I - 1, J - 1) + Boolean'Pos(Left(I) /= Right(J))); end loop; end loop; return D(D'Last(1), D'Last(2)); end Levenshtein;
[edit] Common Lisp
(defun levenshtein-distance (str1 str2) "Calculates the Levenshtein distance between str1 and str2, returns an editing distance (int)." (let ((n (length str1)) (m (length str2))) ;; Check trivial cases (cond ((= 0 n) (return-from levenshtein-distance m)) ((= 0 m) (return-from levenshtein-distance n))) (let ((col (make-array (1+ m) :element-type 'integer)) (prev-col (make-array (1+ m) :element-type 'integer))) ;; We need to store only two columns---the current one that ;; is being built and the previous one (dotimes (i (1+ m)) (setf (svref prev-col i) i)) ;; Loop across all chars of each string (dotimes (i n) (setf (svref col 0) (1+ i)) (dotimes (j m) (setf (svref col (1+ j)) (min (1+ (svref col j)) (1+ (svref prev-col (1+ j))) (+ (svref prev-col j) (if (char-equal (schar str1 i) (schar str2 j)) 0 1))))) (rotatef col prev-col)) (svref prev-col m))))
[edit] C++
template <class T> unsigned int edit_distance(const T& s1, const T& s2) { const size_t len1 = s1.size(), len2 = s2.size(); vector<vector<unsigned int> > d(len1 + 1, vector<unsigned int>(len2 + 1)); d[0][0] = 0; for(unsigned int i = 1; i <= len1; ++i) d[i][0] = i; for(unsigned int i = 1; i <= len2; ++i) d[0][i] = i; for(unsigned int i = 1; i <= len1; ++i) for(unsigned int j = 1; j <= len2; ++j) d[i][j] = std::min( std::min(d[i - 1][j] + 1,d[i][j - 1] + 1), d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1) ); return d[len1][len2]; }
[edit] C#
private Int32 levenshtein(String a, String b)
{
if (string.IsNullOrEmpty(a))
{
if (!string.IsNullOrEmpty(b))
{
return b.Length;
}
return 0;
}
if (string.IsNullOrEmpty(b))
{
if (!string.IsNullOrEmpty(a))
{
return a.Length;
}
return 0;
}
Int32 cost;
Int32[,] d = new int[a.Length + 1, b.Length + 1];
Int32 min1;
Int32 min2;
Int32 min3;
for (Int32 i = 0; i <= d.GetUpperBound(0); i += 1)
{
d[i, 0] = i;
}
for (Int32 i = 0; i <= d.GetUpperBound(1); i += 1)
{
d[0, i] = i;
}
for (Int32 i = 1; i <= d.GetUpperBound(0); i += 1)
{
for (Int32 j = 1; j <= d.GetUpperBound(1); j += 1)
{
cost = Convert.ToInt32(!(a[i-1] == b[j - 1]));
min1 = d[i - 1, j] + 1;
min2 = d[i, j - 1] + 1;
min3 = d[i - 1, j - 1] + cost;
d[i, j] = Math.Min(Math.Min(min1, min2), min3);
}
}
return d[d.GetUpperBound(0), d.GetUpperBound(1)];
}
[edit] Emacs Lisp
(defun levenshtein-distance (str1 str2) "Return the edit distance between strings STR1 and STR2." (if (not (stringp str1)) (error "Argument was not a string: %s" str1)) (if (not (stringp str2)) (error "Argument was not a string: %s" str2)) (let* ((make-table (function (lambda (columns rows init) (make-vector rows (make-vector columns init))))) (tref (function (lambda (table x y) (aref (aref table y) x)))) (tset (function (lambda (table x y object) (let ((row (copy-sequence (aref table y)))) (aset row x object) (aset table y row) object)))) (length-str1 (length str1)) (length-str2 (length str2)) (d (funcall make-table (1+ length-str1) (1+ length-str2) 0))) (let ((i 0) (j 0)) (while (<= i length-str1) (funcall tset d i 0 i) (setq i (1+ i))) (while (<= j length-str2) (funcall tset d 0 j j) (setq j (1+ j)))) (let ((i 1)) (while (<= i length-str1) (let ((j 1)) (while (<= j length-str2) (let* ((cost (if (equal (aref str1 (1- i)) (aref str2 (1- j))) 0 1)) (deletion (1+ (funcall tref d (1- i) j))) (insertion (1+ (funcall tref d i (1- j)))) (substitution (+ (funcall tref d (1- i) (1- j)) cost))) (funcall tset d i j (min insertion deletion substitution))) (setq j (1+ j)))) (setq i (1+ i)))) (funcall tref d length-str1 length-str2)))
[edit] Haskell
Tested with GHCi.
levenshtein::String->String->Int
levenshtein s t =
d!!(length s)!!(length t)
where d = [[distance m n|n<-[0..length t]]|m<-[0..length s]]
distance i 0 = i
distance 0 j = j
distance i j = minimum [d!!(i-1)!!j+1, d!!i!!(j-1)+1, d!!(i-1)!!(j-1) + (if s!!(i-1)==t!!(j-1) then 0 else 1)]
For large strings, using arrays is much faster
import Data.Array.Unboxed
import Data.Array.ST
import Control.Monad.ST
for_ xs f = mapM_ f xs
levenshtein :: [Char] -> [Char] -> Int
levenshtein s t = d ! (ls , lt)
where s' = array (0,ls) [ (i,x) | (i,x) <- zip [0..] s ]::UArray Int Char
t' = array (0,lt) [ (i,x) | (i,x) <- zip [0..] t ]::UArray Int Char
ls = length s
lt = length t
(l,h) = ((0,0),(length s,length t))
d = runSTUArray $ do
m <- newArray (l,h) 0 :: ST s (STUArray s (Int,Int) Int)
for_ [0..ls] $ \i -> writeArray m (i,0) i
for_ [0..lt] $ \j -> writeArray m (0,j) j
for_ [1..lt] $ \j -> do
for_ [1..ls] $ \i -> do
let c = if s'!(i-1)==t'! (j-1)
then 0 else 1
x <- readArray m (i-1,j)
y <- readArray m (i,j-1)
z <- readArray m (i-1,j-1)
writeArray m (i,j) $ minimum [x+1, y+1, z+c ]
return m
And finally: fast but cryptic implementation
levenshtein2 sa sb = last $ foldl transform [0..length sa] sb
where
transform xs@(x:xs') c = scanl compute (x+1) (zip3 sa xs xs')
where
compute z (c', x, y) = minimum [y+1, z+1, x + fromEnum (c' /= c)]
[edit] Io
Levenshtein := method(left, right, if(right size < left size, return Levenshtein(right, left)) current := 0 to(left size) asList right foreach(i, righti, previous := current current = List clone with(i + 1) left foreach(j, leftj, current append((current at(j) + 1) min(previous at(j + 1) + 1) min(previous at(j) + if(leftj == righti, 0, 1))) ) ) current last )
[edit] JavaScript
function levenshtein(str1, str2) { var l1 = str1.length, l2 = str2.length; if (Math.min(l1, l2) === 0) { return Math.max(l1, l2); } var i = 0, j = 0, d = []; for (i = 0 ; i <= l1 ; i++) { d[i] = []; d[i][0] = i; } for (j = 0 ; j <= l2 ; j++) { d[0][j] = j; } for (i = 1 ; i <= l1 ; i++) { for (j = 1 ; j <= l2 ; j++) { d[i][j] = Math.min( d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + (str1.charAt(i - 1) === str2.charAt(j - 1) ? 0 : 1) ); } } return d[l1][l2]; }
[edit] Java
public class LevenshteinDistance { private static int minimum(int a, int b, int c) { return Math.min(Math.min(a, b), c); } public static int computeLevenshteinDistance(CharSequence str1, CharSequence str2) { int[][] distance = new int[str1.length() + 1][str2.length() + 1]; for (int i = 0; i <= str1.length(); i++) distance[i][0] = i; for (int j = 0; j <= str2.length(); j++) distance[0][j] = j; for (int i = 1; i <= str1.length(); i++) for (int j = 1; j <= str2.length(); j++) distance[i][j] = minimum( distance[i - 1][j] + 1, distance[i][j - 1] + 1, distance[i - 1][j - 1] + ((str1.charAt(i - 1) == str2.charAt(j - 1)) ? 0 : 1)); return distance[str1.length()][str2.length()]; } }
[edit] OCaml
(* Minimum of three integers. This function is deliberately * not polymorphic because (1) we only need to compare integers * and (2) the OCaml compilers do not perform type specialization * for user-defined functions. *) let minimum (x:int) y z = let m' (a:int) b = if a < b then a else b in m' (m' x y) z (* Matrix initialization. *) let init_matrix n m = let init_col = Array.init m in Array.init n (function | 0 -> init_col (function j -> j) | i -> init_col (function 0 -> i | _ -> 0) ) (* Computes the Levenshtein distance between two unistring. * If you want to run it faster, add the -unsafe option when * compiling or use Array.unsafe_* functions (but be carefull * with these well-named unsafe features). *) let distance_utf8 x y = match Array.length x, Array.length y with | 0, n -> n | m, 0 -> m | m, n -> let matrix = init_matrix (m + 1) (n + 1) in for i = 1 to m do let s = matrix.(i) and t = matrix.(i - 1) in for j = 1 to n do let cost = abs (compare x.(i - 1) y.(j - 1)) in s.(j) <- minimum (t.(j) + 1) (s.(j - 1) + 1) (t.(j - 1) + cost) done done; matrix.(m).(n) (* This function takes two strings, convert them to unistring (int array) * and then call distance_utf8, so we can compare utf8 strings. Please * note that you need Glib (see LablGTK). *) let distance x y = distance_utf8 (Glib.Utf8.to_unistring x) (Glib.Utf8.to_unistring y)
[edit] Octave And MATLAB
function [dist,L]=levenshtein_distance(str1,str2) L1=length(str1)+1; L2=length(str2)+1; L=zeros(L1,L2); g=+1;%just constant m=+0;%match is cheaper, we seek to minimize d=+1;%not-a-match is more costly. %do BC's L(:,1)=([0:L1-1]*g)'; L(1,:)=[0:L2-1]*g; m4=0;%loop invariant for idx=2:L1; for idy=2:L2 if(str1(idx-1)==str2(idy-1)) score=m; else score=d; end m1=L(idx-1,idy-1) + score; m2=L(idx-1,idy) + g; m3=L(idx,idy-1) + g; L(idx,idy)=min(m1,min(m2,m3)); end end dist=L(L1,L2); return end
[edit] Perl
# The function expects two string parameters # Usage: levenshtein( <string1>, <string2> ) # # Algorithm adopted from here: http://www.merriampark.com/ldperl.htm use strict; sub levenshtein { my ( $a, $b ) = @_; my ( $len1, $len2 ) = ( length $a, length $b ); return $len2 if ( $len1 == 0 ); return $len1 if ( $len2 == 0 ); my %d; for ( my $i = 0; $i <= $len1; ++$i ) { for ( my $j = 0; $j <= $len2; ++$j ) { $d{ $i }{ $j } = 0; $d{ 0 }{ $j } = $j; } $d{ $i }{ 0 } = $i; } # Populate arrays of characters to compare my @ar1 = split( //, $a ); my @ar2 = split( //, $b ); for ( my $i = 1; $i <= $len1; ++$i ) { for ( my $j = 1; $j <= $len2; ++$j ) { my $cost = ( $ar1[ $i - 1 ] eq $ar2[ $j - 1 ] ) ? 0 : 1; my $min1 = $d{ $i - 1 }{ $j } + 1; my $min2 = $d{ $i }{ $j - 1 } + 1; my $min3 = $d{ $i - 1 }{ $j - 1 } + $cost; if ( $min1 <= $min2 && $min1 <= $min3 ) { $d{ $i }{ $j } = $min1; } elsif ( $min2 <= $min1 && $min2 <= $min3 ) { $d{ $i }{ $j } = $min2; } else { $d{ $i }{ $j } = $min3; } } } return $d{ $len1 }{ $len2 }; }
[edit] PHP
Please note that there is a standard library call levenshtein() in PHP as of version 4.0.1. It is limited to comparing strings of no more than 255 characters in length, however, limiting its utility.
function lev($s,$t) { $m = strlen($s); $n = strlen($t); for($i=0;$i<=$m;$i++) $d[$i][0] = $i; for($j=0;$j<=$n;$j++) $d[0][$j] = $j; for($i=1;$i<=$m;$i++) { for($j=1;$j<=$n;$j++) { $c = ($s[$i-1] == $t[$j-1])?0:1; $d[$i][$j] = min($d[$i-1][$j]+1,$d[$i][$j-1]+1,$d[$i-1][$j-1]+$c); } } return $d[$m][$n]; }
[edit] Python
First version:
def levenshtein(s1, s2): if len(s1) < len(s2): return levenshtein(s2, s1) if not s1: return len(s2) previous_row = xrange(len(s2) + 1) for i, c1 in enumerate(s1): current_row = [i + 1] for j, c2 in enumerate(s2): insertions = previous_row[j + 1] + 1 deletions = current_row[j] + 1 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1]
Second version:
def lev(a, b): if not a: return len(b) if not b: return len(a) return min(lev(a[1:], b[1:])+(a[0] != b[0]), lev(a[1:], b)+1, lev(a, b[1:])+1)
(Note that while compact, the runtime of this implementation is relatively poor.)
[edit] rexx
/* rexx levenshtein calculates the edit distance Karlocai 2009-01-18 between 2 strings s and t This implementation of the Levenshtein algorithm uses one row only (0..n), containing - values of the previous line in columns [i-1]..n and - values of the current line in columns 1..[i-2] The current left value is kept in the variable lc i: column 1..n of s, 1st index j: row 1..m of t, 2nd index */ Parse Arg s,t -- gets 2 strings as parameter n = Length(s) -- checks the parameters m = Length(t) If n = 0 Then Return m If m = 0 Then Return n Do i = 0 To n -- initializes the 1st row r.i = i End Do j = 1 To m -- for each row lc = j -- left column start value Do i = 1 To n -- for each column nv = Min(r.i + 1, , lc + 1, , r.[i-1] + (Substr(s,i,1) <> Substr(t,j,1))) r.[i-1] = lc -- sets previous left column lc = nv -- current left column End r.n = lc -- sets last current value End Return r.n
[edit] Ruby
This Ruby version is simpler and works with any Array with elements that implement '=='.
def levenshtein(a, b) case when a.empty?: b.length when b.empty?: a.length else [(a[0] == b[0] ? 0 : 1) + levenshtein(a[1..-1], b[1..-1]), 1 + levenshtein(a[1..-1], b), 1 + levenshtein(a, b[1..-1])].min end end
[edit] Scala
Imperative version. A functional version would likely be far more concise.
def levenshtein(str1: String, str2: String): Int = { val lenStr1 = str1.length val lenStr2 = str2.length val d: Array[Array[Int]] = new Array(lenStr1 + 1, lenStr2 + 1) for (val i <- 0 to lenStr1) d(i)(0) = i for (val j <- 0 to lenStr2) d(0)(j) = j for (val i <- 1 to lenStr1; val j <- 1 to lenStr2) { val cost = if (str1(i - 1) == str2(j-1)) 0 else 1 d(i)(j) = minimum( d(i-1)(j ) + 1, // deletion d(i )(j-1) + 1, // insertion d(i-1)(j-1) + cost // substitution ) } return d(lenStr1)(lenStr2) }
[edit] VBScript
This version is identical to JavaScript and PHP implementations in this article.
Function levenshtein( a, b ) Dim i,j,cost,d,min1,min2,min3 ' Avoid calculations where there there are empty words If Len( a ) = 0 Then levenshtein = Len( b ): Exit Function If Len( b ) = 0 Then levenshtein = Len( a ): Exit Function ' Array initialization ReDim d( Len( a ), Len( b ) ) For i = 0 To Len( a ): d( i, 0 ) = i: Next For j = 0 To Len( b ): d( 0, j ) = j: Next ' Actual calculation For i = 1 To Len( a ) For j = 1 To Len( b ) cost = ABS(Mid( a, i, 1 ) = Mid( b, j, 1 )) ' Since min() function is not a part of VBScript, we'll "emulate" it below min1 = ( d( i - 1, j ) + 1 ) min2 = ( d( i, j - 1 ) + 1 ) min3 = ( d( i - 1, j - 1 ) + cost ) If min1 <= min2 And min1 <= min3 Then d( i, j ) = min1 ElseIf min2 <= min1 And min2 <= min3 Then d( i, j ) = min2 Else d( i, j ) = min3 End If Next Next levenshtein = d( Len( a ), Len( b ) ) End Function
[edit] Visual Basic for Applications (no Damerau extension)
This version is identical to JavaScript and PHP implementations in this article. I had problems when I tried to use the other VBA implementation in this article, so I had to adopt the version below.
Application.WorksheetFunction.Min() method is Excel-specific. If you implement it with other VBA-enabled applications, uncomment the conditional block and comment out the Application.WorksheetFunction.Min() line.
Function levenshtein(a As String, b As String) As Integer Dim i As Integer Dim j As Integer Dim cost As Integer Dim d() As Integer Dim min1 As Integer Dim min2 As Integer Dim min3 As Integer If Len( a ) = 0 Then levenshtein = Len( b ) Exit Function End If If Len( b ) = 0 Then levenshtein = Len( a ) Exit Function End If ReDim d(Len(a), Len(b)) For i = 0 To Len(a) d(i, 0) = i Next For j = 0 To Len(b) d(0, j) = j Next For i = 1 To Len(a) For j = 1 To Len(b) If Mid(a, i, 1) = Mid(b, j, 1) Then cost = 0 Else cost = 1 End If ' Since Min() function is not a part of VBA, we'll "emulate" it below min1 = (d(i - 1, j) + 1) min2 = (d(i, j - 1) + 1) min3 = (d(i - 1, j - 1) + cost) ' If min1 <= min2 And min1 <= min3 Then ' d(i, j) = min1 ' ElseIf min2 <= min1 And min2 <= min3 Then ' d(i, j) = min2 ' Else ' d(i, j) = min3 ' End If ' In Excel we can use Min() function that is included ' as a method of WorksheetFunction object d(i, j) = Application.WorksheetFunction.Min(min1, min2, min3) Next Next levenshtein = d(Len(a), Len(b)) End Function
[edit] Visual Basic for Applications (with Damerau extension)
This VBA version uses recursion and a maximum allowed distance. The analysis to identify quasi-duplicated strings or spelling mistakes can be narrowed down to a corridor in the result matrix. The Damerau extension has also been added to the Levenshtein algorithm.
Function damerau_levenshtein(s1 As String, s2 As String, Optional limit As Variant, Optional result As Variant) As Integer 'This function returns the Levenshtein distance capped by the limit parameter. 'Usage : e.g. damerau_levenshtein("Thibault","Gorisse") to get the exact distance ' or damerau_levenshtein("correctly written words","corectly writen words",4) to identify similar spellings Dim diagonal As Integer Dim horizontal As Integer Dim vertical As Integer Dim swap As Integer Dim final As Integer 'set a maximum limit if not set If IsMissing(limit) Then limit = Len(s1) + Len(s2) End If 'create the result matrix to store intermediary results If IsMissing(result) Then Dim i, j As Integer ' pointeur ReDim result(Len(s1), Len(s2)) As Integer End If 'Start of the strings analysis If result(Len(s1), Len(s2)) < 1 Then If Abs(Len(s1) - Len(s2)) >= limit Then final = limit Else If Len(s1) = 0 Or Len(s2) = 0 Then 'End of recursivity final = Len(s1) + Len(s2) Else 'Core of levenshtein algorithm If Mid(s1, 1, 1) = Mid(s2, 1, 1) Then final = damerau_levenshtein(Mid(s1, 2), Mid(s2, 2), limit, result) Else If Mid(s1, 1, 1) = Mid(s2, 2, 1) And Mid(s1, 2, 1) = Mid(s2, 1, 1) Then 'Damerau extension counting swapped letters swap = damerau_levenshtein(Mid(s1, 3), Mid(s2, 3), limit - 1, result) final = 1 + swap Else 'The function minimum is implemented via the limit parameter. 'The diagonal search usually reaches the limit the quickest. diagonal = damerau_levenshtein(Mid(s1, 2), Mid(s2, 2), limit - 1, result) horizontal = damerau_levenshtein(Mid(s1, 2), s2, diagonal, result) vertical = damerau_levenshtein(s1, Mid(s2, 2), horizontal, result) final = 1 + vertical End If End If End If End If Else 'retrieve intermediate result final = result(Len(s1), Len(s2)) - 1 End If 'returns the distance capped by the limit If final < limit Then damerau_levenshtein = final 'store intermediate result result(Len(s1), Len(s2)) = final + 1 Else damerau_levenshtein = limit End If End Function
[edit] Teslock
This is the Levenshtein distance calculation in Teslock Machine Language.
.declare singlecall virtual LevenshteinDistance[args(2) string s1, string s2]: out unsigned int
main
(
define unsigned int constant "cost_ins" == 1;
define unsigned int constant "cost_del" == 1;
define unsigned int constant "cost_sub" == 1;
define unsigned int variable "n1" == calculate::string_operations>length("s1");
define unsigned int variable "n2" == calculate::string_operations>length("s2");
define unsigned int_array(calculate::string_operations>array_instantiation("p")>fixed_length("n2", ++1));
define unsigned int_array(calculate::string_operations>array_instantiation("q")>fixed_length("n2", ++1));
define unsigned int_array(calculate::string_operations>array_instantiation("r")>variable_length);
p>array_vector>0 == 0;
loop_for>finalized(define unsigned int variable "j" == 1)>break_condition("j" <= "n2")>forward_condition(++j)
(
p>array_vector>j == p>array_vector>j::access>+constants::cost_ins;
)
q>array_vector>0 == 0;
loop_for>finalized(define unsigned int variable "i" == 1)>break_condition("i" <= "n1")>forward_condition(++i)
(
q>array_vector>0 == p>array_vector>0::access>+constants::cost_del;
loop for>finalized(define unsigned int variable "j" == 1)>break_condition("j" <= "n2")>forward_condition(++j)
(
define unsigned int variable "d_del" == p>array_vector>j::access>+constants::cost_del;
define unsigned int variable "d_ins" == q>array_vector>j[delegate_handle::j::access>-1]>+constants::cost_ins;
define unsigned int veriable "d_sub" == p>array_vector>j[delegate_handle::j::access>-1]>+logical_operations>xor_result["s1"::access>"s1"[delegate_handle::j::access>-1]?0>return_handle_as_result constants::"cost_sub"];
q>array_vector>j == dll_extern::math_interop_singlecall(min)::[args(3) (d_del, d_ins), d_sub;
)
local>"r" == "p"(self_typecast::ignore_unsafe_condition);
local>"p" == "q"(self_typecast);
local>"q" == "r"(self_typecast);
)
logical_result(param p::singlecall)::define>"return" == p>array_vector>"n2";
)
[edit] Abap
REPORT zlevenshtein.
*----------------------------------------------------------------------*
* CLASS lcl_levenshtein DEFINITION
*----------------------------------------------------------------------*
*
*----------------------------------------------------------------------*
CLASS lcl_levenshtein DEFINITION.
PUBLIC SECTION.
CLASS-METHODS:
distance IMPORTING i_s TYPE csequence
i_t TYPE csequence
RETURNING value(r) TYPE i.
ENDCLASS. "lcl_c DEFINITION
*----------------------------------------------------------------------*
* CLASS lcl_levenshtein IMPLEMENTATION
*----------------------------------------------------------------------*
*
*----------------------------------------------------------------------*
CLASS lcl_levenshtein IMPLEMENTATION.
METHOD distance.
DEFINE m_get.
l_m_index = ( l_m_j * ( l_m_i + ( &1 ) ) ) + ( l_m_j + ( &2 ) ) + 1.
read table l_d into r index l_m_index.
add &3 to r.
insert r into table l_v.
END-OF-DEFINITION.
DATA: l_d TYPE STANDARD TABLE OF i,
l_v TYPE SORTED TABLE OF i WITH UNIQUE KEY table_line,
l_cost TYPE i,
l_m_i TYPE i,
l_m_j TYPE i,
l_m_index TYPE i,
l_l_s TYPE i,
l_l_t TYPE i.
l_l_s = STRLEN( i_s ).
l_l_t = STRLEN( i_t ).
DO l_l_s TIMES.
l_m_i = sy-index - 1.
DO l_l_t TIMES. "#EC CI_NESTED
l_m_j = sy-index - 1.
IF l_m_j = 0.
r = l_m_i.
ELSEIF l_m_i = 0.
r = l_m_j.
ELSE.
IF i_s+l_m_i(1) = i_t+l_m_j(1).
l_cost = 0.
ELSE.
l_cost = 1.
ENDIF.
CLEAR l_v.
m_get: -1 0 1, 0 -1 1, -1 -1 l_cost.
READ TABLE l_v INTO r INDEX 1.
ENDIF.
APPEND r TO l_d.
ENDDO.
ENDDO.
ENDMETHOD. "distance
ENDCLASS. "lcl_levenshtein IMPLEMENTATION
START-OF-SELECTION.
DATA: d TYPE i.
d = lcl_levenshtein=>distance( i_s = 'sitting' i_t = 'kitten' ).
WRITE: / d.
[edit] Pick Basic
IF STRING1 = STRING2 THEN
LD = 0
END ELSE
S.LEN = LEN(STRING1)
C.LEN = LEN(STRING2)
MAT LD.MTX = ''
DIM LD.MTX(100,100)
FOR I = 3 TO S.LEN + 2
LD.MTX(I,1) = STRING1[I-2,1]
NEXT I
FOR I = 3 TO S.LEN + 2
LD.MTX(I,2) = I - 2
NEXT I
FOR I = 3 TO C.LEN + 2
LD.MTX(1,I) = STRING2[I-2,1]
NEXT I
FOR I = 3 TO C.LEN + 2
LD.MTX(2,I) = I - 2
NEXT I
FOR I = 3 TO (S.LEN+2)
S.LETTER = LD.MTX(I,1)
FOR J = 3 TO (C.LEN+2)
C.LETTER = LD.MTX(1,J)
IF C.LETTER = S.LETTER THEN COST = 0 ELSE COST = 1
P1 = LD.MTX(I-1,J) + 1
P2 = LD.MTX(I,J-1) + 1
P3 = LD.MTX(I-1,J-1) + COST
IF P1 < P2 THEN LD.NUM = P1 ELSE LD.NUM = P2
IF P3 < P2 THEN LD.NUM = P3
LD.MTX(I,J) = LD.NUM
NEXT J
NEXT I
LD = LD.MTX(S.LEN+2,C.LEN+2)
END