1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
module type S = sig
type ch
type t
val length : t -> int
val get : t -> int -> ch
val equal : ch -> ch -> bool
end
module type NFA_t = sig
module StateSet : sig
type t
val start : k:int -> t
val min_cost_opt : t -> int option
end
module Transitions : sig
val all_transitions : StateSet.t -> BitVec.t -> k:int -> StateSet.t
end
end
module Make (St : S) (NFA : NFA_t) = struct
module GBV = StringOps.BitVecOps (St)
type nfa_state = {nfa : NFA.StateSet.t option; k : int; str: St.t; str_len: int; fed_so_far: int}
let start ~k ~str =
if (k < 0) then
failwith "the limit k cannot be negative"
else if (k > ((Sys.int_size - 1) / 2)) then
failwith "the limit k cannot be larger than ((int_size - 1) / 2)"
else
{nfa = Some (NFA.StateSet.start ~k); k; str; str_len = St.length str; fed_so_far = 0 }
let feed {nfa;k;str;str_len;fed_so_far} ~ch =
let index = fed_so_far + 1 in
if Option.is_none nfa
|| index > str_len + k then
{nfa = None; k; str; str_len; fed_so_far = index}
else
let bv = GBV.bit_vec_of ch str ~index ~k in
let nfa = NFA.Transitions.all_transitions (Option.get nfa) bv ~k in
{nfa = Some nfa;k;str;str_len;fed_so_far = index}
let current_error {nfa;_} : int option =
match nfa with
| None -> None
| Some nfa -> NFA.StateSet.min_cost_opt nfa
let end_input {nfa;k;str=_;str_len;fed_so_far} : int option =
let size_diff = str_len - fed_so_far in
if Option.is_none nfa
|| size_diff > k then
None
else
let sentinels = str_len - fed_so_far + k in
let nfa =
BitVec.pos_fold
~f:(fun n nfa ->
let bv = GBV.bit_vec_of_sentinel ~str_len ~index:(fed_so_far + 1 + sentinels - n) ~k in
NFA.Transitions.all_transitions nfa bv ~k)
~init:(Option.get nfa)
sentinels
in
NFA.StateSet.min_cost_opt nfa
let feed_str nfa_state ~str =
let len = St.length str in
BitVec.pos_fold
~f:(fun n nfa -> feed nfa ~ch:(St.get str (len - n)))
~init:nfa_state
len
let get_distance ~k str1 str2 =
let len_diff =
let len1 = St.length str1 in
let len2 = St.length str2 in
Int.abs (len1 - len2)
in
if len_diff > k then
None
else
let start = start ~k ~str:str1 in
let end_ = feed_str start ~str:str2 in
let cost = end_input end_ in
cost
end