Source file fuzzy_search.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
open! Core
module Case_mode = struct
type t = Smart
end
module Query = struct
type t =
{ queries : string array
; raw : string
; case_sensitive : bool
}
let create query =
let queries =
String.split query ~on:' '
|> List.filter ~f:(fun s -> not (String.is_empty s))
|> Array.of_list
in
let case_mode = Case_mode.Smart in
let case_sensitive =
match case_mode with
| Smart -> String.exists query ~f:Char.is_uppercase
in
{ queries; raw = query; case_sensitive }
;;
let is_empty t = String.is_empty t.raw
end
module Start_end_indices = struct
type t =
{ mutable found : bool
; mutable start_idx : int
; mutable end_idx : int
}
let the_one_and_only = { found = false; start_idx = 0; end_idx = 0 }
end
let equal_ignore_case char1 char2 =
Char.equal (Char.lowercase char1) (Char.lowercase char2)
;;
let[@inline] find_start_end_indices ~query ~item () =
let dst = Start_end_indices.the_one_and_only in
let item_idx = ref 0 in
let query_idx = ref 0 in
while !item_idx < String.length item && !query_idx < String.length query do
let item_char = String.get item !item_idx in
let query_char = String.get query !query_idx in
if equal_ignore_case item_char query_char then incr query_idx;
incr item_idx
done;
if !query_idx < String.length query
then dst.found <- false
else (
let end_idx = !item_idx in
let item_idx = ref (end_idx - 1) in
let query_idx = ref (String.length query - 1) in
while !query_idx >= 0 do
let item_char = String.get item !item_idx in
let query_char = String.get query !query_idx in
if equal_ignore_case item_char query_char then decr query_idx;
decr item_idx
done;
let start_idx = !item_idx + 1 in
dst.found <- true;
dst.start_idx <- start_idx;
dst.end_idx <- end_idx);
dst
;;
let fold_matching_indices_single_query query ~item ~init ~f =
let { Start_end_indices.start_idx; end_idx; found } =
find_start_end_indices ~query ~item ()
in
let acc = ref init in
if found
then (
let query_idx = ref 0 in
for item_idx = start_idx to end_idx - 1 do
let item_char = String.get item item_idx in
let query_char = String.get query !query_idx in
if equal_ignore_case item_char query_char
then (
acc := f !acc item_idx;
incr query_idx)
done);
!acc
;;
let matching_indices (query : Query.t) ~item =
if Query.is_empty query
then Some [||]
else (
let indices =
Array.fold query.queries ~init:Int.Set.empty ~f:(fun set query ->
fold_matching_indices_single_query query ~item ~init:set ~f:Set.add)
in
if Set.is_empty indices then None else Some (Set.to_array indices))
;;
let split_by_matching_sections (query : Query.t) ~item =
match matching_indices query ~item with
| None -> None
| Some [||] -> Some [ `Not_matching, item ]
| Some matches ->
let sections = Queue.create () in
let add_section matching start end_inclusive =
Queue.enqueue
sections
(matching, String.sub item ~pos:start ~len:(end_inclusive - start + 1))
in
let first = matches.(0) in
if first > 0 then add_section `Not_matching 0 (first - 1);
let matching_range_start = ref first in
let matching_range_end = ref first in
Array.iter matches ~f:(fun idx ->
if idx > !matching_range_end + 1
then (
add_section `Matching !matching_range_start !matching_range_end;
add_section `Not_matching (!matching_range_end + 1) (idx - 1);
matching_range_start := idx);
matching_range_end := idx);
add_section `Matching !matching_range_start !matching_range_end;
if !matching_range_end < String.length item - 1
then add_section `Not_matching (!matching_range_end + 1) (String.length item - 1);
Some (Queue.to_list sections)
;;
module Char_class = struct
type t =
| Upper
| Lower
| Digit
| Non_word
let of_char = function
| 'A' .. 'Z' -> Upper
| 'a' .. 'z' -> Lower
| '0' .. '9' -> Digit
| _ -> Non_word
;;
end
let start_of_item_bonus = 1
let start_of_word_bonus = 480
let camel_case_bonus = 360
let non_word_bonus = 480
let match_bonus = 320
let start_gap_penalty = 60
let continue_gap_penalty = 20
let wrong_case_penalty = 120
let first_char_multiplier = 2
let score_upper_bound ~query_length =
((start_of_item_bonus + start_of_word_bonus) * first_char_multiplier * query_length) + 1
;;
let score_single_query query ~item ~case_sensitive =
let { Start_end_indices.found; start_idx; end_idx } =
find_start_end_indices ~query ~item ()
in
if not found
then 0
else (
let start_char_class =
if start_idx = 0
then Char_class.Non_word
else Char_class.of_char (String.get item (start_idx - 1))
in
let query_idx = ref 0 in
let score = ref 0 in
let in_gap = ref false in
let prev_char_class = ref start_char_class in
let prev_char_score = ref 0 in
for item_idx = start_idx to end_idx - 1 do
let item_char = String.get item item_idx in
let query_char = String.get query !query_idx in
if equal_ignore_case item_char query_char
then (
let char_class = Char_class.of_char item_char in
let this_char_score =
let base_score =
match !prev_char_class, char_class with
| Non_word, (Upper | Lower | Digit) -> start_of_word_bonus
| Lower, Upper | _, Digit -> camel_case_bonus
| _, Non_word -> non_word_bonus
| _, _ -> match_bonus
in
let with_start_of_item_bonus =
if item_idx = 0 then base_score + start_of_item_bonus else base_score
in
let with_consecutive_bonus =
if !in_gap
then with_start_of_item_bonus
else Int.max with_start_of_item_bonus !prev_char_score
in
let with_wrong_case_penalty =
if Char.equal item_char query_char || not case_sensitive
then with_consecutive_bonus
else with_consecutive_bonus - wrong_case_penalty
in
if !query_idx = 0
then first_char_multiplier * with_wrong_case_penalty
else with_wrong_case_penalty
in
incr query_idx;
score := !score + this_char_score;
prev_char_class := char_class;
prev_char_score := this_char_score;
in_gap := false)
else (
let this_char_score =
if !in_gap then -continue_gap_penalty else -start_gap_penalty
in
score := !score + this_char_score;
in_gap := true)
done;
!score)
;;
let score query ~item =
match String.is_empty item, Query.is_empty query with
| true, _ -> 0
| _, true -> 1
| _, _ ->
let { Query.queries; raw; case_sensitive } = query in
let raw_score = ref 0 in
let any_mismatch = ref false in
for i = 0 to Array.length queries - 1 do
let query = queries.(i) in
let score = score_single_query query ~item ~case_sensitive in
if score = 0 then any_mismatch := true;
raw_score := !raw_score + score
done;
(match !raw_score with
| 0 -> 0
| _ ->
if !any_mismatch
then 0
else score_upper_bound ~query_length:(String.length raw) - !raw_score)
;;
let score_opt query ~item =
match score query ~item with
| 0 -> None
| x -> Some x
;;
module For_testing = struct
let find_start_end_indices ~query ~item () =
let { Start_end_indices.found; start_idx; end_idx } =
find_start_end_indices ~query ~item ()
in
Option.some_if found (start_idx, end_idx)
;;
end
let search query ~items =
List.filter_map items ~f:(fun item ->
match score query ~item with
| 0 -> None
| score ->
Some (score, String.length item, item))
|> List.sort ~compare:[%compare: int * int * string]
|> List.map ~f:Tuple3.get3
;;
let search' query ~items =
let items_by_score =
Array.filter_map items ~f:(fun item ->
match score query ~item with
| 0 -> None
| score ->
Some (score, String.length item, item))
in
Array.sort items_by_score ~compare:[%compare: int * int * string];
Array.map items_by_score ~f:Tuple3.get3
;;