123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401(**************************************************************************)(* This file is part of BINSEC. *)(* *)(* Copyright (C) 2016-2026 *)(* CEA (Commissariat à l'énergie atomique et aux énergies *)(* alternatives) *)(* *)(* you can redistribute it and/or modify it under the terms of the GNU *)(* Lesser General Public License as published by the Free Software *)(* Foundation, version 2.1. *)(* *)(* It is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* See the GNU Lesser General Public License version 2.1 *)(* for more details (enclosed in the file licenses/LGPLv2.1). *)(* *)(**************************************************************************)(* Xtrasec main algorithm, which consists in analyzing the sequence
of instructions given by the pintool, and decode it into either a
formula or a LLVM module. *)(* Note: this function is x86-specific. *)letregister_to_size=function|"CF"->1|"DF"->1|"ZF"->1|"OF"->1|"SF"->1|"AF"->1|"PF"->1|"eax"->32|"ecx"->32|"edx"->32|"ebx"->32|"esp"->32|"ebp"->32|"esi"->32|"edi"->32|"mm0"->64|"mm1"->64|"mm2"->64|"mm3"->64|"mm4"->64|"mm5"->64|"mm6"->64|"mm7"->64|"st0"->80|"st1"->80|"st2"->80|"st3"->80|"st4"->80|"st5"->80|"st6"->80|"st7"->80|"xmm0"->128|"xmm1"->128|"xmm2"->128|"xmm3"->128|"xmm4"->128|"xmm5"->128|"xmm6"->128|"xmm7"->128|"fs"->16|"gs"->16|reg->failwith("register_to_size: unknown register "^reg)(******** The algorithm is parametrized by the wanted output. ********)moduleMake(Param:sigincludeGeneric_decoder_sig.Instr_Input(* Says that we do not know anything after memory, for instance
after a system call. *)valclear_memory:State.t->State.tend):sigvalparse:Param.State.t->string->Param.State.tend=struct(* Adds the concretization informations to an existing Param. *)moduleParam2:sigincludeGeneric_decoder_sig.Instr_InputwithmoduleState=Param.Stateandtypeboolean=Param.booleanandtypebinary=Param.binaryvalload_addr_info:Virtual_address.toptionrefvalstore_addr_info:Virtual_address.toptionrefend=structincludeParamletload_addr_info=refNoneletstore_addr_info=refNoneletadd_assertionstateaddressv=matchXtrasec_options.Concretize_mem.get()with|`No->state|`Exact->letv,state=Param.Binary.biconst~size:32(Virtual_address.to_bigintv)stateinletassertion,state=Param.Binary.beq~size:32addressvstateinsnd@@Param.assumeassertionstate|`Approximatei->letassertion_inf,state=(* Saturation if underflow. *)letaddr=Z.of_int@@max0@@(-i+Virtual_address.to_intv)inletaddr,state=Param.Binary.biconst~size:32addrstateinParam.Binary.biule~size:32addraddressstateinletassertion_sup,state=(* Saturation if overflow *)letaddr=Z.of_int@@min((1lsl32)-1)@@(i+Virtual_address.to_intv)inletaddr,state=Param.Binary.biconst~size:32addrstateinParam.Binary.biule~size:32addressaddrstateinletassertion,state=Param.Boolean.(&&)assertion_infassertion_supstateinsnd@@Param.assumeassertionstateletstore~sizeendianaddrvaluestate=letstate=match!store_addr_infowith|None->state|Somev->add_assertionstateaddrvinParam.store~sizeendianaddrvaluestateletload~sizeendianaddrstate=letstate=match!load_addr_infowith|None->state|Somev->add_assertionstateaddrvinParam.load~sizeendianaddrstateendmoduleM=Generic_decoder.Decode_Instr(Param2)(* The algorithm explore traces non-deterministically, and
back-up when it finds out that we did not followed the
correct trace (e.g. when the next instruction in the trace is
incompatible with the destination of a branch.
Note: This algorithm is problematic in the case when several
paths in the DBA can lead to the same next instruction in the
trace, which does not seem to happen on x86. A better
algorithm should be using path-merging for these cases. *)typetrace=Final_StateofParam.State.t|Wrong_Trace(* Unhandled corresponds to instructions that cannot be decoded into
DBA. *)typedhunk_type=Unhandled|DhunkofDhunk.ttypeenriched_ins={(* The parsed instruction given by xtrasec. *)ins:Parsepin.ins;(* Normally, we realize that we are on a wrong trace when the
final jump of an instruction does not match the address of the
next instruction in the trace. Some instructions are handled
specially (e.g. we unroll instructions that have a REP
prefix); in this case we do not perform the check. *)check_outgoing_edge:bool;(* Dhunk if present. *)dhunk:dhunk_type;(* Mnemonic, for debugging. *)mnemonic:string;(* Size of the instruction in bytes. *)size:int;}exceptionLastInstruction(* Wrapper around Parsepin.pop_ins to add the additional
informations. *)letpop_enriched_insx=matchParsepin.pop_insxwith|None->raiseLastInstruction|Some(ins,lp)->(letopenParsepininlet{address=addr;code;_}=insin(* Remove rep and repne prefix; the xtrasec instrumentation
repeats the instruction in the trace, so we known how many of
them to put (i.e. we can unroll the rep precisely). *)letrep,code=ifcode.[0]='f'&&(code.[1]=='2'||code.[1]=='3')then(true,String.subcode2(String.lengthcode-2))else(false,code)in(* We remove the rep prefix, so the ougoing edge may not be the
right one, as the instruction may appear again. *)letcheck_outgoing_edge=notrepintryletinst,dhunk=X86toDba.decode_binstream~base_addr:addr(Binstream.of_nibblescode)inletsize=Size.Byte.to_intinst.X86Instruction.sizeinletmnemonic=Format.asprintf"%a"X86Instruction.pp_mnemonicinstin(* Xtrasec_options.Logger.result "parsed ins %d %x %s res %s"
count (Virtual_address.to_int addr) code mnemonic; *)({ins;check_outgoing_edge;dhunk=Dhunkdhunk;size;mnemonic},lp)withDecoder.InstructionUnhandled_->(* Note: we cannot assume that these instructions jump to
the next in sequence; this is not the case e.g. for
sysenter instructions on Linux. *)(* let next_addr = Bitvector.create (Bigint.big_int_of_int
((Virtual_address.to_int addr) + (String.length code/2))) 32 in
* Xtrasec_options.Logger.result "next_addr: %a"
Bitvector.pp_hex next_addr; *)letmnemonic="binsec_unhandled"inletsize=String.lengthcode/2in({ins;check_outgoing_edge;dhunk=Unhandled;size;mnemonic},lp))(* Handle the next instruction. It must have been already parsed
because of the edge checking, so:
- it is passed as an argument here
- lp points to the further instruction.
- it has not been handled by acc yet. *)letrecdo_nextaccinslp=letopenParsepininlet{count;address;code;_}=ins.insinletcomment=Format.asprintf"ins %08d @0x%x %s %s"count(Virtual_address.to_intaddress)codeins.mnemonicinletacc=Param.add_commentcommentaccinmatchinswith|{dhunk=Unhandled;_}->letacc=Param.clear_memoryaccinletins,lp=pop_enriched_inslpindo_nextaccinslp|{dhunk=Dhunkdhunk;_}asins->(* Provide the concretization informations to the
decoder. Note that when there are several loads, we don't
know which address correspond to which load, so we cannot
make use of the information. *)let()=Param2.load_addr_info:=letopenParsepininmatchins.ins.Parsepin.mem_readwith|Zero|Several->None|Onex->Somexinlet()=Param2.store_addr_info:=letopenParsepininmatchins.ins.Parsepin.mem_writtenwith|Zero|Several->None|Onex->Somexin(* Process DBA instruction i in dhunk. *)letrecdo_dba_instracci=letinstr=matchDhunk.instdhunkiwithNone->assertfalse|Somex->xinletjt,acc=M.instructionaccinstrinletopenGeneric_decoder_siginmatchjtwith|JKStop->Final_Stateacc|JKAssume_->assertfalse|JKJumpjt->do_edgeaccjt|JKIf(cond,targ1,targ2)->((* TODO: Some instructions really have two
behaviours. This should be handled with path merging,
or using the concretization to re-compute cond. *)letres1=let(),acc=Param.assumecondaccindo_edgeacctarg1inmatchres1with|Final_Statex->Final_Statex|Wrong_Trace->letncond,acc=Param.Boolean.notcondaccinlet(),acc=Param.assumencondaccindo_edgeacctarg2)(* Perform the necessary actions according to whether we are
leaving [ins]. *)anddo_edgeacc=letopenGeneric_decoder_siginfunction|Static(Dba.JInneri')->do_dba_instracci'|Static(Dba.JOutera)->letacc=do_instruction_endaccinsindo_outeraaccinslp|Dynamicx->letacc=do_instruction_endaccinsindo_dynamicxaccinslpindo_dba_instracc0(* (Dhunk.start dhunk) *)(* Modify the state at the end of the instruction. This is mainly to
add assertions regarding concretization. *)anddo_instruction_endaccins=letadd_assertionregvalueacc=letsize=register_to_sizereginletreg,acc=Param.get_var~sizeregaccinletvalue,acc=Param.Binary.biconst~size(Virtual_address.to_bigintvalue)accinletassertion,acc=Param.Binary.beq~sizeregvalueaccinlet(),acc=Param.assumeassertionaccinaccinletregs_to_concretize=Xtrasec_options.Concretize_regs.get()inList.fold_left(funacc(reg,value)->matchregwith|"eflags"->acc(* Need to be handled specially, ignored for now. *)|("esp"|"ebp")whenList.mem`Stackregs_to_concretize->add_assertionregvalueacc|_whenList.mem`Allregs_to_concretize->add_assertionregvalueacc|regwhenList.mem(`Registerreg)regs_to_concretize->add_assertionregvalueacc|_->acc)accins.ins.Parsepin.reg_values(* Handle leaving the instruction with a static jump. *)anddo_outeraaccinslp=assert(a.Dba.id==0);matchpop_enriched_inslpwith|exceptionLastInstruction->Final_Stateacc|ins',lp->ifins.check_outgoing_edge&¬(Virtual_address.equala.Dba.baseins'.ins.Parsepin.address)then(Xtrasec_options.Logger.result"Wrong trace a.base %a addr'%x"Virtual_address.ppa.Dba.base(Virtual_address.to_intins'.ins.Parsepin.address);Wrong_Trace)elsedo_nextaccins'lp(* Handle leaving the instruction with a dynamic jump. *)anddo_dynamicxaccinslp=matchpop_enriched_inslpwith|exceptionLastInstruction->Final_Stateacc|ins',lp->letaddr'=ins'.ins.Parsepin.addressinletacc=ifins.check_outgoing_edgethen(* Asserts that this should be a feasible jump. *)letaddr',acc=Param.Binary.biconst~size:32(Virtual_address.to_bigintaddr')accinletcond,acc=Param.Binary.beq~size:32xaddr'accinsnd@@Param.assumecondaccelseaccindo_nextaccins'lp(* Main algorithm. *)letparseinitial_statefile=letlp=Parsepin.fromfileinletins,lp=pop_enriched_inslpinmatchdo_nextinitial_stateinslpwith|Wrong_Trace->assertfalse|Final_Statestate->stateend(**************** Instantiation on Formulas and LLVM ****************)moduleWith_Formula=Formula_decoder.Instr_to_FormulamoduleParse_formula=Make(With_Formula)letrun_formulainput_fileoutput_file=letstate=Parse_formula.parseWith_Formula.initial_stateinput_fileinletout=open_outoutput_fileinFormat.fprintf(Format.formatter_of_out_channelout)"%a%a@\n(check-sat)@."(Binsec_smtlib.Formula.pp_header~theory:(Smt_options.Theory.get()))()Binsec_smtlib.Formula.pp_formula@@With_Formula.get_formulastate;close_outoutletruninput_file=ifXtrasec_options.Output_smt.is_set()thenrun_formulainput_file@@Xtrasec_options.Output_smt.get()letmain()=ifXtrasec_options.is_enabled()thenifnot@@Xtrasec_options.Trace_file.is_set()thenfailwith"Trace file must be given"elserun@@Xtrasec_options.Trace_file.get()let_=Cli.Boot.enlist~name:"xtrasec run"~f:main