123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260openCoreopenFreetdstypet=|BignumofBignum.t|Boolofbool|Floatoffloat|Intofint|Int32ofint32|Int64ofint64|Stringofstring|DateofTime.t|Arrayoftlist[@@derivingcompare,sexp](** [recode ~src ~dst str] decodes [str] from the character set given by [~src],
i.e. "UTF-8", "CP1252", etc. and then encodes it into the character set
[~dst].
We need to do character set conversions because SQL Server can't handle
UTF-8 in any reasonable way, so most DB's are going to be using CP1252.
If [str] is not a valid string in the [~src] encoding, we give up on nice
conversions and just ASCIIfy the input and return it (just stripping out
all non-ASCII characters)
If [str] contains a character that can't be represented in [~dst], we
skip that character and move on. *)letrecode~src~dstinput=(* Note that we originally used //TRANSLIT and //IGNORE to do this in iconv,
but iconv is inconsistent between platforms so we do the conversion one
char at a time. *)tryletdecoder=Encoding.decodersrcandencoder=Encoding.encoderdstanddec_i=ref0andenc_i=ref0(* Using string like this is not recommended, but the library we're using
doesn't support bytes as an input to Encoding.encode
Note that we make a buffer with n * 4 bytes long because UTF-8 characters
can be a maximum of 4 bytes. This is very pessimistic, but resizing a
string constantly would be annoying and slow.
https://stijndewitt.com/2014/08/09/max-bytes-in-a-utf-8-char/ *)andoutput=Bytes.create(String.lengthinput*4)|>Bytes.to_stringinletinput_len=String.lengthinputandoutput_len=String.lengthoutputinwhile!dec_i<input_lendoEncoding.decodedecoderinput!dec_i(input_len-!dec_i)|>function|Encoding.Dec_ok(c,n)->dec_i:=!dec_i+n;Encoding.encodeencoderoutput!enc_i(output_len-!enc_i)c|>(function|Encoding.Enc_okn->enc_i:=!enc_i+n|Enc_error->(* skip characters that can't be translated *)()|Enc_need_more->failwith"Encoder is out of space")|Dec_error->failwith"Decode error"|Dec_need_more->failwith"Decoder ended with partial character"done;String.suboutput~pos:0~len:!enc_iwith|exn->Logger.info!"Recoding error, falling back to ascii filter %{sexp: exn} %s"exninput;String.filterinput~f:(func->Char.to_intc<128);;letof_data~month_offsetdata=matchdatawith|Dblib.BITb->Some(Boolb)|INTi|SMALLi|TINYi->Some(Inti)|INT32i->Some(Int32i)|INT64i->Some(Int64i)|FLOATf|MONEYf->Some(Floatf)|DECIMALs|NUMERICs->Some(Bignum(Bignum.of_strings))|BINARYs->Some(Strings)|STRINGs->Some(String(recode~src:"CP1252"~dst:"UTF-8"s))|DATETIME(y,mo,day,hr,min,sec,ms,_zone)->(* FIXME: Timezones don't match in FreeTDS 0.91 and 1.0, so for now we
just assume everything in UTC. *)letmo=mo+month_offsetinletdate=Date.create_exn~y~m:(Month.of_int_exnmo)~d:dayinlettime=Time.Ofday.create~hr~min~sec~ms~us:0()inletdatetime=Time.of_date_ofdaydatetime~zone:Time.Zone.utcinSome(Datedatetime)|NULL->None;;letrecto_string~quote_string=function|None->"NULL"|Somep->(matchpwith|Bignumn->Bignum.to_string_humn|>quote_string|Boolb->ifbthen"1"else"0"|Floatf->Float.to_stringf|Inti->Int.to_stringi|Int32i->Int32.to_stringi|Int64i->Int64.to_stringi|Strings->s|>quote_string|Datet->Time.format~zone:Time.Zone.utct"%Y-%m-%dT%H:%M:%S"|>quote_string|Arrayl->List.mapl~f:(funp->Somep|>to_string~quote_string)|>String.concat~sep:", ");;letto_string_escaped=(* Quote the string by replacing ' with '' and null with CHAR(0). This
is somewhat complicated because I couldn't find a way to escape a
null character without closing the string and adding +CHAR(0)+.
I couldn't do this with String.concat since that would force us to
concat every CHAR, which is inefficient (i.e. "asdf" would be passed as
'a'+'s'+'d'+'f'). *)letquote_strings=(* Need to convert to CP1252 since SQL Server can't handle UTF-8 in any
reasonable way. *)lets=recode~src:"UTF-8"~dst:"CP1252"sin(* len * 2 will always hold the resulting string unless it has null
chars, so this should make the standard case fast without wasting much
memory. *)letbuf=Buffer.create(String.lengths*2)inletin_str=reffalseinletfirst=reftrueinfori=0toString.lengths-1doletc=s.[i]inifChar.equalc'\x00'then(if!in_strthen(Buffer.add_charbuf'\'';in_str:=false);ifnot!firstthenBuffer.add_charbuf'+';Buffer.add_stringbuf"CHAR(0)")else(ifnot!in_strthen(ifnot!firstthenBuffer.add_charbuf'+';Buffer.add_charbuf'\'';in_str:=true);ifChar.equalc'\''thenBuffer.add_stringbuf"''"elseBuffer.add_charbufc);first:=falsedone;if!firstthen(Buffer.add_charbuf'\'';in_str:=true);if!in_strthenBuffer.add_charbuf'\'';Buffer.contentsbufinto_string~quote_string;;letto_string=to_string~quote_string:Fn.idletwith_error_msg?column~ftype_namet=tryftwith|Assert_failure_->letcolumn_info=matchcolumnwith|None->""|Somecolumn->sprintf" column %s"columninfailwithf!"Failed to convert%s %{sexp: t} to type %s"column_infottype_name();;letbignum?column=with_error_msg?column"float"~f:(function|Bignumb->b|Floatf->Bignum.of_float_dyadicf|Inti->Bignum.of_inti|Int32i->Int.of_int32_exni|>Bignum.of_int|Int64i->Int64.to_stringi|>Bignum.of_string|_->assertfalse);;letfloat?column=with_error_msg?column"float"~f:(function|Bignumb->Bignum.to_floatb|Floatf->f|Inti->Float.of_inti|Int32i->Int.of_int32_exni|>Float.of_int|Int64i->Float.of_int64i|_->assertfalse);;letint?column=with_error_msg?column"int"~f:(function|Bignumb->Bignum.to_int_exnb|Boolfalse->0|Booltrue->1|Floatf->Int.of_floatf|Inti->i|Int32i->Int32.to_int_exni|Int64i->Int64.to_int_exni|_->assertfalse);;letint32?column=with_error_msg?column"int32"~f:(function|Bignumb->Bignum.to_int_exnb|>Int32.of_int_exn|Boolfalse->Int32.zero|Booltrue->Int32.one|Floatf->Int32.of_floatf|Inti->Int32.of_int_exni|Int32i->i|_->assertfalse);;letint64?column=with_error_msg?column"int64"~f:(function|Bignumb->Bignum.to_int_exnb|>Int64.of_int_exn|Boolfalse->Int64.zero|Booltrue->Int64.one|Floatf->Int64.of_floatf|Inti->Int64.of_inti|Int32i->Int64.of_int32i|Int64i->i|_->assertfalse);;letbool?column=with_error_msg?column"bool"~f:(function|Boolb->b(* MSSQL's native BIT type is 0 or 1, so conversions from 0 or 1 ints
make sense *)|Intiwheni=0->false|Intiwheni=1->true|Int32iwhenInt32.equaliInt32.zero->false|Int32iwhenInt32.equaliInt32.one->true|Int64iwhenInt64.equaliInt64.zero->false|Int64iwhenInt64.equaliInt64.one->true|_->assertfalse);;letstr?column=with_error_msg?column"string"~f:(function|Bignumb->Bignum.to_string_humb|Boolb->Bool.to_stringb|Floatf->Float.to_stringf|Inti->Int.to_stringi|Int32i->Int32.to_stringi|Int64i->Int64.to_stringi|Strings->s|Datet->Time.to_string_abs~zone:Time.Zone.utct|Array_->assertfalse);;letdate?column=with_error_msg?column"date"~f:(function|Dated->Date.of_time~zone:Time.Zone.utcd|Strings->Date.of_strings|_->assertfalse);;letdatetime?column=with_error_msg?column"datetime"~f:(function|Dated->d|Strings->Time.of_string_gen~if_no_timezone:(`Use_this_oneTime.Zone.utc)s|_->assertfalse);;