123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210(**************************************************************************)(* This file is part of the Codex semantics library. *)(* *)(* Copyright (C) 2013-2025 *)(* CEA (Commissariat à l'énergie atomique et aux énergies *)(* alternatives) *)(* *)(* you can redistribute it and/or modify it under the terms of the GNU *)(* Lesser General Public License as published by the Free Software *)(* Foundation, version 2.1. *)(* *)(* It is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* See the GNU Lesser General Public License version 2.1 *)(* for more details (enclosed in the file LICENSE). *)(* *)(**************************************************************************)(* The interface between the analyze and its UI. *)(* Will consist in OCaml objects (that gets marshalled to a file),
but also in other binary files.
Note that some work is required from the server to produce this
output (e.g. to deduplicate strings, etc.) We could use
table-pin-rows for this.
*)openSyntax_treemoduleInt64Map=Map.Make(Int64)typedisassembly_line={binary_code:string;mnemonic:string;symbols_names:stringlist;instructions:exprlist;(** Ideally, would include this: *)(* bdump: (Syntax_tree.expr * string Syntax_tree.Tree.t) array; (\* Dba instructions, and its evaluation. *\) *)(* the strings represents results from the analysis such as "eax -> {0,1}" *)results:stringlist}(* Informations about the disassembly of a file. *)typedisassembly={decompiled:disassembly_lineInt64Map.t;symbol_table:(string*int64)array;}moduleGraph=structmoduleWto=structtypecomponent=Nodeofint|SCCofint*componentlistendtypenode_label={head:int64;tail:int64;call_stack:int64list}typeedge_info={back:bool}(** Each cfg node is a basic block. *)typelight_cfg_node={id:int;label:node_label;(* This field is mutable because the structure is recursive.
You should not mutate it in the webapp. *)mutablesuccs:(edge_info*light_cfg_node)list;}endtypecfg={cfg_entry_node:Graph.light_cfg_node;(** Entry basic block to the CFG. *)wto:Graph.Wto.componentlist;}(* To put information in the code, we use the null character to insert
markup.
The format is:
- null followed by a character whose last two bits is 01 -> beginning of source_color_marker.
Follow by a symbol, and end by null null to close the span.
- null followed by a character whose last two bits is 10 -> definition marker
(we should mark the name, the type of definition, and probably the body of the definition,
so that we can show this body on demand, and fold/unfold it.
- null followed by a character whose last two bits is 11 -> use marker (allows jumping to definitions).
- we should have a way to mark expressions, etc. to make them hoverable. *)typecolor_marker=|Keyword_color|Type_color|Var_color|Fun_colortypemarker=|Color_markerofcolor_marker(*Marker for syntax highlighting*)|Function_Definitionofstring(*Marker to indicate the beginning of a function definition. The string corresponds to the function name*)|Instructionofint(*Marker to indicate the beginning of a C instruction within the source code with the integer referring to [Cil_types.stmt.sid]*)|End_marker(*A closing marker/tag to indicate the end of a marked region*)typetoken=|Markerofmarker(*A parsed marker tag*)|Stringofstring*int*int(** A token for contiguous segment of raw text until a marker or EOF.*)|EOF(*End of text indicator*)letsource_color_marker_code=function|Keyword_color->(0lsl2)lor1|Type_color->(1lsl2)lor1|Var_color->(2lsl2)lor1|Fun_color->(3lsl2)lor1letmarker_to_string=function|End_marker->"\x00\x00"|Color_markersc->letcode=source_color_marker_codescin"\x00"^String.make1(Char.chrcode)|Function_Definitionname->letcode=0b10inletlen=String.lengthnameinlethi=Char.chr((lenlsr8)land0xFF)inletlo=Char.chr(lenland0xFF)in"\x00"^String.make1(Char.chrcode)^String.make1hi^String.make1lo^name|Instructionid->letcode=0b11inletb1=Char.chr((idlsr24)land0xFF)inletb2=Char.chr((idlsr16)land0xFF)inletb3=Char.chr((idlsr8)land0xFF)inletb4=Char.chr(idland0xFF)in"\x00"^String.make1(Char.chrcode)^String.init4(funj->matchjwith|0->b1|1->b2|2->b3|_->b4)letstring_to_markersi=(* Precondition: only call when s.[i] = '\x00' *)assert(s.[i]='\x00');letlen=String.lengthsinifi+1>=lenthenfailwith"Malformed marker: truncated input";letkind=Char.codes.[i+1]land0b11inmatchkindwith|0->(End_marker,i,i+2)|1->letfull_code=Char.codes.[i+1]inletidx=full_codelsr2inletsm=matchidxwith|0->Keyword_color|1->Type_color|2->Var_color|3->Fun_color|_->failwith("Unknown color marker index: "^string_of_intidx)in(Color_markersm,i,i+2)|2->ifi+4>=lenthenfailwith"Malformed Function_Definition marker";lethi=Char.codes.[i+2]inletlo=Char.codes.[i+3]inletname_len=(hilsl8)lorloinifi+4+name_len>lenthenfailwith"Function_Definition name truncated";letname=String.subs(i+4)name_lenin(Function_Definitionname,i,i+4+name_len)|3->ifi+5>=lenthenfailwith"Malformed Instruction marker";letb1=Char.codes.[i+2]andb2=Char.codes.[i+3]andb3=Char.codes.[i+4]andb4=Char.codes.[i+5]inletid=(b1lsl24)lor(b2lsl16)lor(b3lsl8)lorb4in(Instructionid,i,i+6)|_->failwith("Unknown marker kind: "^string_of_intkind)letget_token(s:string)(i:int):int*token=letlen=String.lengthsinifi>=lenthen(i,EOF)elseifs.[i]='\x00'thenletm,_,j=string_to_markersiin(j,Markerm)elseletrecfind_next_markerj=ifj>=len||s.[j]='\x00'thenjelsefind_next_marker(j+1)inletj=find_next_marker(i+1)in(j,String(s,i,j))typesource={text:string;(* A special string, using the null character to insert markup. *)}typemarshalled={disassembly:disassemblyoption;cfg:cfgoption;tracelog:Binarytrace.trace_linelist;source:sourceoption;}