123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131(**************************************************************************)(* This file is part of BINSEC. *)(* *)(* Copyright (C) 2016-2026 *)(* CEA (Commissariat à l'énergie atomique et aux énergies *)(* alternatives) *)(* *)(* you can redistribute it and/or modify it under the terms of the GNU *)(* Lesser General Public License as published by the Free Software *)(* Foundation, version 2.1. *)(* *)(* It is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* See the GNU Lesser General Public License version 2.1 *)(* for more details (enclosed in the file licenses/LGPLv2.1). *)(* *)(**************************************************************************)(* Hand-written parser for pintrace output. *)typeparsed_ins={count:int;addr:int;code:string}typeread_or_write=Read|Writtentypemem={read_or_write:read_or_write;address:Virtual_address.t}typeline=|Insofparsed_ins|Memofmem|Regof{reg:string;value:int}type'anumber=Zero|Oneof'a|Several(* Ins with all the mems that go with it, and the values that were
written to the registers. *)typeins={count:int;address:Virtual_address.t;(* Where was the instruction executed. *)code:string;reg_values:(String.t*Virtual_address.t)list;(* alist. *)(* When there are more than one access at different addresses, we
cannot know which address corresponds to which DBA instruction. So
we can't use the memory information. Hence the number type.
To alleviate the problem, we separate the load and store
access. This allows handling instructions that read and write at
different memory locations (e.g. x86 can push and read at the
same time).
This should handle most of the cases, so several should rarely
happen in practice and we should be able to identify almost all
memory accesses.
Note: there are still instructions with several loads at different
locations, e.g. string instructions like cmps.
MAYBE: We should be outputting all the memory accesses; the fact
that when there are several we cannot map them to DBA instructions
should be handled by the tool that make use of this parsed information. *)mem_read:Virtual_address.tnumber;mem_written:Virtual_address.tnumber;}typetrace={channel:in_channel;pos:int}letfromfile={channel=open_infile;pos=0}letparse_linechan=letl=input_linechaninmatchString.subl03with|"ins"->Scanf.sscanfl"ins %d @0x%x: code 0x%s"(funcountaddrcode->Ins{count;addr;code})|"mem"->Scanf.sscanfl"mem %d @0x%x: %c 0x%x"(fun_count_addrread_or_writeaddr->letread_or_write=matchread_or_writewith|'W'->Written|'R'->Read|_->assertfalseinletaddress=Virtual_address.createaddrinMem{read_or_write;address})|"reg"->Scanf.sscanfl"reg %s write 0x%x"(funregvalue->Reg{reg;value})|_->assertfalseletpop_insx=tryseek_inx.channelx.pos;letl=parse_linex.channelinletins=matchlwithInsx->x|_->assertfalseinletacc={count=ins.count;address=Virtual_address.createins.addr;code=ins.code;reg_values=[];mem_read=Zero;mem_written=Zero;}inletrecloopacc=letpos=pos_inx.channelinletl=parse_linex.channelinmatchlwith|Ins_->Some(acc,{xwithpos})|Reg{reg;value}->assert(not@@List.mem_assocregacc.reg_values);letreg_values=(reg,Virtual_address.createvalue)::acc.reg_valuesinloop{accwithreg_values}|Memm->(matchm.read_or_writewith|Read->(matchacc.mem_readwith|Zero->loop{accwithmem_read=Onem.address}|Onem'whenVirtual_address.equalm'm.address->loopacc|One_|Several->loop{accwithmem_read=Several})|Written->(matchacc.mem_writtenwith|Zero->loop{accwithmem_written=Onem.address}|Onem'whenVirtual_address.equalm'm.address->loopacc|One_|Several->loop{accwithmem_written=Several}))inloopaccwithEnd_of_file->None