Source file db_field.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
open Core
open Freetds

type t =
  | Bignum of Bignum.t
  | Bool of bool
  | Float of float
  | Int of int
  | Int32 of int32
  | Int64 of int64
  | String of string
  | Date of Time.t
  | Array of t list
[@@deriving compare, sexp]

(** [recode ~src ~dst str] decodes [str] from the character set given by [~src],
    i.e. "UTF-8", "CP1252", etc. and then encodes it into the character set
    [~dst].

    We need to do character set conversions because SQL Server can't handle
    UTF-8 in any reasonable way, so most DB's are going to be using CP1252.

    If [str] is not a valid string in the [~src] encoding, we give up on nice
    conversions and just ASCIIfy the input and return it (just stripping out
    all non-ASCII characters)

    If [str] contains a character that can't be represented in [~dst], we
    skip that character and move on. *)
let recode ~src ~dst input =
  (*  Note that we originally used //TRANSLIT and //IGNORE to do this in iconv,
      but iconv is inconsistent between platforms so we do the conversion one
      char at a time. *)
  try
    let decoder = Encoding.decoder src
    and encoder = Encoding.encoder dst
    and dec_i = ref 0
    and enc_i = ref 0
    (* Using string like this is not recommended, but the library we're using
       doesn't support bytes as an input to Encoding.encode

       Note that we make a buffer with n * 4 bytes long because UTF-8 characters
       can be a maximum of 4 bytes. This is very pessimistic, but resizing a
       string constantly would be annoying and slow.

       https://stijndewitt.com/2014/08/09/max-bytes-in-a-utf-8-char/ *)
    and output = Bytes.create (String.length input * 4) |> Bytes.to_string in
    let input_len = String.length input
    and output_len = String.length output in
    while !dec_i < input_len do
      Encoding.decode decoder input !dec_i (input_len - !dec_i)
      |> function
      | Encoding.Dec_ok (c, n) ->
        dec_i := !dec_i + n;
        Encoding.encode encoder output !enc_i (output_len - !enc_i) c
        |> (function
        | Encoding.Enc_ok n -> enc_i := !enc_i + n
        | Enc_error -> (* skip characters that can't be translated *) ()
        | Enc_need_more -> failwith "Encoder is out of space")
      | Dec_error -> failwith "Decode error"
      | Dec_need_more -> failwith "Decoder ended with partial character"
    done;
    String.sub output ~pos:0 ~len:!enc_i
  with
  | exn ->
    Logger.info !"Recoding error, falling back to ascii filter %{sexp: exn} %s" exn input;
    String.filter input ~f:(fun c -> Char.to_int c < 128)
;;

let of_data ~month_offset data =
  match data with
  | Dblib.BIT b -> Some (Bool b)
  | INT i | SMALL i | TINY i -> Some (Int i)
  | INT32 i -> Some (Int32 i)
  | INT64 i -> Some (Int64 i)
  | FLOAT f | MONEY f -> Some (Float f)
  | DECIMAL s | NUMERIC s -> Some (Bignum (Bignum.of_string s))
  | BINARY s -> Some (String s)
  | STRING s -> Some (String (recode ~src:"CP1252" ~dst:"UTF-8" s))
  | DATETIME (y, mo, day, hr, min, sec, ms, _zone) ->
    (* FIXME: Timezones don't match in FreeTDS 0.91 and 1.0, so for now we
       just assume everything in UTC. *)
    let mo = mo + month_offset in
    let date = Date.create_exn ~y ~m:(Month.of_int_exn mo) ~d:day in
    let time = Time.Ofday.create ~hr ~min ~sec ~ms ~us:0 () in
    let datetime = Time.of_date_ofday date time ~zone:Time.Zone.utc in
    Some (Date datetime)
  | NULL -> None
;;

let rec to_string ~quote_string = function
  | None -> "NULL"
  | Some p ->
    (match p with
    | Bignum n -> Bignum.to_string_hum n |> quote_string
    | Bool b -> if b then "1" else "0"
    | Float f -> Float.to_string f
    | Int i -> Int.to_string i
    | Int32 i -> Int32.to_string i
    | Int64 i -> Int64.to_string i
    | String s -> s |> quote_string
    | Date t -> Time.format ~zone:Time.Zone.utc t "%Y-%m-%dT%H:%M:%S" |> quote_string
    | Array l ->
      List.map l ~f:(fun p -> Some p |> to_string ~quote_string)
      |> String.concat ~sep:", ")
;;

let to_string_escaped =
  (* Quote the string by replacing ' with '' and null with CHAR(0). This
     is somewhat complicated because I couldn't find a way to escape a
     null character without closing the string and adding +CHAR(0)+.
     I couldn't do this with String.concat since that would force us to
     concat every CHAR, which is inefficient (i.e. "asdf" would be passed as
     'a'+'s'+'d'+'f'). *)
  let quote_string s =
    (* Need to convert to CP1252 since SQL Server can't handle UTF-8 in any
       reasonable way. *)
    let s = recode ~src:"UTF-8" ~dst:"CP1252" s in
    (* len * 2 will always hold the resulting string unless it has null
       chars, so this should make the standard case fast without wasting much
       memory. *)
    let buf = Buffer.create (String.length s * 2) in
    let in_str = ref false in
    let first = ref true in
    for i = 0 to String.length s - 1 do
      let c = s.[i] in
      if Char.equal c '\x00'
      then (
        if !in_str
        then (
          Buffer.add_char buf '\'';
          in_str := false);
        if not !first then Buffer.add_char buf '+';
        Buffer.add_string buf "CHAR(0)")
      else (
        if not !in_str
        then (
          if not !first then Buffer.add_char buf '+';
          Buffer.add_char buf '\'';
          in_str := true);
        if Char.equal c '\'' then Buffer.add_string buf "''" else Buffer.add_char buf c);
      first := false
    done;
    if !first
    then (
      Buffer.add_char buf '\'';
      in_str := true);
    if !in_str then Buffer.add_char buf '\'';
    Buffer.contents buf
  in
  to_string ~quote_string
;;

let to_string = to_string ~quote_string:Fn.id

let with_error_msg ?column ~f type_name t =
  try f t with
  | Assert_failure _ ->
    let column_info =
      match column with
      | None -> ""
      | Some column -> sprintf " column %s" column
    in
    failwithf !"Failed to convert%s %{sexp: t} to type %s" column_info t type_name ()
;;

let bignum ?column =
  with_error_msg ?column "float" ~f:(function
      | Bignum b -> b
      | Float f -> Bignum.of_float_dyadic f
      | Int i -> Bignum.of_int i
      | Int32 i -> Int.of_int32_exn i |> Bignum.of_int
      | Int64 i -> Int64.to_string i |> Bignum.of_string
      | _ -> assert false)
;;

let float ?column =
  with_error_msg ?column "float" ~f:(function
      | Bignum b -> Bignum.to_float b
      | Float f -> f
      | Int i -> Float.of_int i
      | Int32 i -> Int.of_int32_exn i |> Float.of_int
      | Int64 i -> Float.of_int64 i
      | _ -> assert false)
;;

let int ?column =
  with_error_msg ?column "int" ~f:(function
      | Bignum b -> Bignum.to_int_exn b
      | Bool false -> 0
      | Bool true -> 1
      | Float f -> Int.of_float f
      | Int i -> i
      | Int32 i -> Int32.to_int_exn i
      | Int64 i -> Int64.to_int_exn i
      | _ -> assert false)
;;

let int32 ?column =
  with_error_msg ?column "int32" ~f:(function
      | Bignum b -> Bignum.to_int_exn b |> Int32.of_int_exn
      | Bool false -> Int32.zero
      | Bool true -> Int32.one
      | Float f -> Int32.of_float f
      | Int i -> Int32.of_int_exn i
      | Int32 i -> i
      | _ -> assert false)
;;

let int64 ?column =
  with_error_msg ?column "int64" ~f:(function
      | Bignum b -> Bignum.to_int_exn b |> Int64.of_int_exn
      | Bool false -> Int64.zero
      | Bool true -> Int64.one
      | Float f -> Int64.of_float f
      | Int i -> Int64.of_int i
      | Int32 i -> Int64.of_int32 i
      | Int64 i -> i
      | _ -> assert false)
;;

let bool ?column =
  with_error_msg ?column "bool" ~f:(function
      | Bool b -> b
      (* MSSQL's native BIT type is 0 or 1, so conversions from 0 or 1 ints
       make sense *)
      | Int i when i = 0 -> false
      | Int i when i = 1 -> true
      | Int32 i when Int32.equal i Int32.zero -> false
      | Int32 i when Int32.equal i Int32.one -> true
      | Int64 i when Int64.equal i Int64.zero -> false
      | Int64 i when Int64.equal i Int64.one -> true
      | _ -> assert false)
;;

let str ?column =
  with_error_msg ?column "string" ~f:(function
      | Bignum b -> Bignum.to_string_hum b
      | Bool b -> Bool.to_string b
      | Float f -> Float.to_string f
      | Int i -> Int.to_string i
      | Int32 i -> Int32.to_string i
      | Int64 i -> Int64.to_string i
      | String s -> s
      | Date t -> Time.to_string_abs ~zone:Time.Zone.utc t
      | Array _ -> assert false)
;;

let date ?column =
  with_error_msg ?column "date" ~f:(function
      | Date d -> Date.of_time ~zone:Time.Zone.utc d
      | String s -> Date.of_string s
      | _ -> assert false)
;;

let datetime ?column =
  with_error_msg ?column "datetime" ~f:(function
      | Date d -> d
      | String s -> Time.of_string_gen ~if_no_timezone:(`Use_this_one Time.Zone.utc) s
      | _ -> assert false)
;;