/* *************************** Definitions ************************** */
define input parameter v-one as character no-undo.
define input parameter v-two as character no-undo.
define variable v-shorter as character no-undo.
define variable v-longer as character no-undo.
define variable v-searchrange as integer no-undo.
define variable v-aux1 as character no-undo.
define variable v-aux2 as character no-undo.
define variable v-distance as decimal no-undo.
define variable v-t as decimal no-undo.
define variable v-calc as decimal no-undo.
define variable v-cnt as integer no-undo.
define variable v-l as integer no-undo.
define temp-table tt-word
field letter as character
field pos as integer.
define temp-table tt-word2
field letter as character
field pos as integer.
/* *************************** Main Block *************************** */
procedure jaro.ip:
define input parameter v-first as character.
define input parameter v-second as character.
define input parameter v-limit as integer.
define input-output parameter v-new as character.
define input-output parameter v-trans as decimal.
define variable v-calculated as character no-undo.
define variable v-thischar as character no-undo.
define variable v-found as logical no-undo initial ?.
define variable v-cnt as integer no-undo.
define variable v-min as integer no-undo.
define variable v-max as integer no-undo.
define variable v-diff as integer no-undo.
empty temp-table tt-word.
empty temp-table tt-word2.
assign
v-new = ""
v-trans = 0.
repeat v-cnt = 1 to length(v-first) :
create tt-word.
assign
tt-word.pos = v-cnt
tt-word.letter = substring(v-first,v-cnt,1).
end.
repeat v-cnt = 1 to length(v-second) :
create tt-word2.
assign
tt-word2.pos = v-cnt
tt-word2.letter = substring(v-second,v-cnt,1).
end.
v-new = "".
for each tt-word exclusive-lock:
find first tt-word2 no-lock
where tt-word2.pos = tt-word.pos no-error.
if available tt-word2 then
do:
if tt-word2.letter = tt-word.letter then
do:
v-new = v-new + tt-word.letter.
end.
end.
find first tt-word2 no-lock
where tt-word2.letter = tt-word.letter
and tt-word2.pos <> tt-word.pos no-error.
if available tt-word2 then
do:
/*see if it is in search range*/
v-diff = absolute(tt-word2.pos - tt-word.pos).
if v-diff <= v-limit then
do:
assign
v-trans = v-trans + 1
v-new = v-new + tt-word.letter.
end.
end.
end.
end procedure.
/*********************************************************/
/*MAIN*/
/*********************************************************/
if length(v-one) > length(v-two) then
do:
assign
v-longer = lc(v-one)
v-shorter = lc(v-two).
end.
else
do:
assign
v-longer = lc(v-two)
v-shorter = lc(v-one).
end.
assign
v-searchrange = ( length(v-longer) / 2) - 1.
run jaro.ip(v-shorter,v-longer, v-searchrange,input-output v-aux1,input-output v-t).
run jaro.ip(v-longer,v-shorter, v-searchrange,input-output v-aux2,input-output v-t).
v-t = v-t / 2.
if length(v-aux1) = 0 or length(v-aux2) = 0 then
do:
v-distance = 0.
end.
if length(v-aux1) <> length(v-aux2) then
do:
v-distance = 0.
end.
v-distance = (( length(v-aux1) / length(v-shorter) ) + ( length(v-aux2) / length(v-longer) ) + ((length(v-aux1) - v-t) / length(v-aux1))) / 3.
if v-distance > 0.7 then
do:
constantblock:
repeat v-cnt = 1 to length(v-shorter):
if substring(v-shorter,v-cnt,1) = substring(v-longer,v-cnt,1) then
do:
v-l = v-l + 1.
end.
else
do:
leave constantblock.
end.
end.
end.
v-calc = v-distance + v-l * 0.1 * (1 - v-distance).
Based on:
https://codegolf.stackexchange.com/questions/95619/string-similarity-using-jaro-winkler
I created a procedure that takes two strings as input parameters and returns the jaro winkler score for their similarity.
https://codegolf.stackexchange.com/questions/95619/string-similarity-using-jaro-winkler
I created a procedure that takes two strings as input parameters and returns the jaro winkler score for their similarity.
3 Responses
Write a comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.