1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788(*Generated by Lem from linker_script.lem.*)openLem_basic_classesopenLem_functionopenLem_stringopenLem_tupleopenLem_boolopenLem_listopenLem_sortingopenLem_numopenLem_maybeopenLem_assert_extraopenLem_set(*import Map*)openByte_patternopenByte_sequenceopenDefault_printingopenErroropenMissing_pervasivesopenShowopenElf_headeropenElf_fileopenElf_interpreted_sectionopenAbisopenCommand_lineopenInput_listopenLinkable_listopenMemory_imageopenElf_memory_image(* HMM -- ideally we'd be ELF-agnostic in this file.
But Abstract_abi is now merged into Elf_memory_image, so never mind. *)openElf_memory_image_of_elf64_fileopenElf_relocationopenElf_symbol_tableopenElf_section_header_tableopenElf_types_native_uintopenMemory_image_orderings(* We model two kinds of linker script: "implicit scripts", which are supplied
* on the command line as input objects, and "control scripts" of which there
* is exactly one per link job. The abstract syntax of each script comes from the
* same grammar.
*
* We define the control script as a bunch of functions, to allow for
* link jobs where we don't have an AST and the script behaviour is hard-coded.
*)(* Input sections come from individual (relocatable) ELF files.
* The name of this file is important!
*
* Each input "section" is always an identified section or common symbol
* *within* some ELF memory image. *)typeinput_section_rec={idx:Nat_big_num.num(* linkable idx *);fname:string;img:elf_memory_image;shndx:Nat_big_num.num;secname:string;isec:elf64_interpreted_section}typeinput_spec=Commonof(Nat_big_num.num*string*elf_memory_image*symbol_definition)(* string is symbol name -- must be a COMMON symbol *)|InputSectionofinput_section_rec(* A control script defines
* - output sections
* - a mapping from output sections to (ordered) input sections
* - extra symbols
* - output format etc. (skip this for now)
*)(* We will have to deal with merging etc. at some point, somewhere
* (maybe here, maybe not); for now we just produce an ordered list
* of sections.
*)(* We can't model linker scripts as plain Lem functions without writing
* them to a very different structure than that of scripts. The reason is that
* certain features of the script language necessitate multiple passes
* over the script structure. For example, to figure out how big an
* output section is, hence where to begin the next section, you need to
* know which of the input sections are marked for KEEP. For that, you need
* a def-use graph over input sections. But for that, you also need to account
* for *all* symbol definitions, and the script itself is allowed to add new
* ones (right in among its input sections). So we have to do one pass to
* enumerate the symbol additions, and another pass to eliminate sections
* that we don't want to KEEP.
*
* Other gotchas include:
*
* - symbol provision and address advancement can occur in among the input
* section queries, but also outside any output section.
*
* - semantics of DATA_SEGMENT_ALIGN depend on future script contents
*
* - ONLY_IF_RO and ONLY_IF_RW are tricky: need to evaluate the input section
* queries
*
* - semantics of empty sections are subtle (". = ." will force an empty section
* to be emitted, but ". = . + 0" will not do so).
*
* Our approach is to define an interpreter for (at present) most of the script
* language.
*)typesymbol_def_policy=AlwaysDefine|ProvideIfUsedtypeinput_selector=input_speclist->input_speclisttypeaddress_expr=Memory_image.exprtypeoutput_guard=AlwaysOutput|OnlyIfRo|OnlyIfRwtypesymbol_spec=(Nat_big_num.num*Uint32_wrapper.uint32*Uint32_wrapper.uint32)(* size, info, other *)typeretain_policy=DefaultKeep|KeepEvenWhenGCtypeaddress_expr_fn_ref=Nat_big_num.numtype'aaddress_expr_fn_map=(address_expr_fn_ref,(Nat_big_num.num->'a->Nat_big_num.num))Pmap.map(* 'a = allocated_sections_map *)typeoutput_section_composition_element=IncludeInputSectionof(retain_policy*input_section_rec)|IncludeCommonSymbolof(retain_policy*string(* file *)*Nat_big_num.num(* linkable_idx *)*symbol_definition*elf_memory_image)|Holeofaddress_expr_fn(* compute the next addr to continue layout at *)|ProvideSymbolof(symbol_def_policy*string*symbol_spec)andsort_policy=DefaultSort(* Use command line sort option, else "seen" order *)|SeenOrder(* Always use "seen" order *)|ByName|ByNameThenAlignment|ByAlignment|ByAlignmentThenName|ByInitPriorityand(* This mirrors the OutputSection constructor, except that the script elements have become
* output_section_composition_elements, and we might store the size here. *)output_section_spec=OutputSectionSpecof(output_guard*Nat_big_num.numoption*string*(output_section_composition_elementlist))andallocated_sections_map=AllocatedSectionsMapof(string,(output_section_spec(* OutputSection element idx *)*Nat_big_num.num))Pmap.mapandaddress_expr_fn=AddressExprFnofaddress_expr_fn_reftypescript_element=DefineSymbolof(symbol_def_policy*string*symbol_spec)|AdvanceAddressofaddress_expr_fn|MarkAndAlignDataSegmentof(Nat_big_num.num*Nat_big_num.num)(* maxpagesize, commonpagesize *)|MarkDataSegmentEnd|MarkDataSegmentRelroEnd(*of (allocated_sections_map -> (natural * (natural -> natural))) DPM: commented out because of positivity constrains in Isabelle *)|OutputSectionof(output_guard*((* address_expr *)address_expr_fnoption)*string*script_elementlist)|DiscardInputofinput_selector(* Input queries can only occur within an output section.
Output sections may not nest within other output sections.
(Ideally we would use something like polymorphic variants to encode this.)
*)|InputQueryof(retain_policy*sort_policy*input_selector)(* A linker control script is a function from inputs to output elements.
* We can define them in syntax (using an interpreter)
* or in Lem directly (as functions). *)typelinker_control_script=script_elementlisttypelabelled_linker_control_script=(script_element*Nat_big_num.num)list(*val all_suffixes : list char -> list (list char)*)letrecall_suffixeschars:((char)list)list=((matchcharswith[]->[[]]|c::morecs->chars::(all_suffixesmorecs)))(*val glob_match : list char -> list char -> bool*)letrecglob_matchpatstr:bool=((match(pat,str)with([],[])->true|('?'::morepat,_::morestr)->glob_matchmorepatmorestr|('*'::morepat,_)->(* if any suffix of the remaining string matches
* the remaining pattern, we've matched the pattern
* from '*' onwards. *)letor_suffix_match=(funmatched->(funnewlist->matched||glob_matchmorepatnewlist))inList.fold_left(or_suffix_match)false(all_suffixesstr)|(patc::morepat,c::morestr)->(patc=c)&&glob_matchmorepatmorestr|([],_)->(* ran out of pattern *)false|(_,[])->(* ran out of str *)false))(*val default_symbol_spec : symbol_spec*)letdefault_symbol_spec:Nat_big_num.num*Uint32_wrapper.uint32*Uint32_wrapper.uint32=((Nat_big_num.of_int0),Uint32_wrapper.of_bigint((Nat_big_num.of_int0)),Uint32_wrapper.of_bigint((Nat_big_num.of_int0)))(*val hidden_symbol_spec : symbol_spec*)lethidden_symbol_spec:Nat_big_num.num*Uint32_wrapper.uint32*Uint32_wrapper.uint32=((Nat_big_num.of_int0),Uint32_wrapper.of_bigint((Nat_big_num.of_int0)),Uint32_wrapper.of_bigintstv_hidden)(* These Lem functions replicate linker script functions or builtin behaviours. *)(*val only_sections : input_selector*)letonly_sectionsinputs:(input_spec)list=(Lem_list.mapMaybe(funi->(matchiwith|InputSection(_)->Some(i)|_->None))inputs)(*val filter_and_concat : (input_spec -> bool) -> input_selector*)(* a.k.a. list input_spec -> list input_spec *)letfilter_and_concatpinputs:(input_spec)list=(List.filterpinputs)(*val name_matches : string -> input_spec -> bool*)letname_matchespatinput:bool=((matchinputwithInputSection(inp)->(*let _ = errln ("Does section name `" ^ inp.secname ^ "' match glob pattern `" ^ pat ^ "'? ") in
let result = *)glob_match(Xstring.explodepat)(Xstring.explodeinp.secname)(*in
let _ = errln (if result then "yes" else "no")
in result*)|_->false))(*val file_matches : string -> input_spec -> bool*)letfile_matchespatinput:bool=((matchinputwithInputSection(inp)->glob_match(Xstring.explodepat)(Xstring.explodeinp.fname)|_->false))letcompareInputSpecByNameThenAlignmenti1i2:int=(lettoPair=(funis->((matchiswithCommon(idx1,fname1,img2,def)->("COMMON"(* FIXME: is this right? *),Ml_bindings.nat_big_num_of_uint64def.def_syment.elf64_st_value)|InputSection(isrec)->(isrec.isec.elf64_section_name_as_string,isrec.isec.elf64_section_align))))in(pairComparecompareNat_big_num.compare(toPairi1)(toPairi2)))letcompareInputSpecByAlignmenti1i2:int=(lettoNatural=(funis->((matchiswithCommon(idx1,fname1,img2,def)->Ml_bindings.nat_big_num_of_uint64def.def_syment.elf64_st_value|InputSection(isrec)->isrec.isec.elf64_section_align)))inNat_big_num.compare(toNaturali1)(toNaturali2))letcompareInputSpecByNamei1i2:int=(lettoString=(funis->((matchiswithCommon(idx1,fname1,img2,def)->"COMMON"|InputSection(isrec)->isrec.isec.elf64_section_name_as_string)))incompare(toStringi1)(toStringi2))letcompareInputSpecByAlignmentThenNamei1i2:int=(lettoPair=(funis->((matchiswithCommon(idx1,fname1,img2,def)->(Ml_bindings.nat_big_num_of_uint64def.def_syment.elf64_st_value,"COMMON"(* FIXME: is this right? *))|InputSection(isrec)->(isrec.isec.elf64_section_align,isrec.isec.elf64_section_name_as_string))))in(pairCompareNat_big_num.comparecompare(toPairi1)(toPairi2)))letcompareInputSpecByInitPriorityi1i2:int=0(* FIXME *)(* DATA_SEGMENT_ALIGN is defined by two formulae
* (over pos and commonpagesize/maxpagesize)
* "... depending on whether the latter uses fewer COMMONPAGESIZE sized
pages for the data segment (area between the result of this
expression and `DATA_SEGMENT_END') than the former or not. If the
latter form is used, it means COMMONPAGESIZE bytes of runtime
memory will be saved at the expense of up to COMMONPAGESIZE wasted
bytes in the on-disk file."
So the amount of padding that gets inserted here depends on the location
of something that comes *later*, namely DATA_SEGMENT_END.
So, we can't model it as a function of the current position.
Instead, we add MarkDataSegmentEnd and friends
to the script_element ADT.
*)lethas_writability:'a->input_spec->bool=(funwritable->(funinput_sec->((matchinput_secwithCommon(_,_,_,_)->(* all common symbols are potentially writable *)true|InputSection(inp)->let(flags:Nat_big_num.num)=((matchelf_memory_image_section_by_indexinp.shndxinp.imgwithSomex->x.elf64_section_flags|None->failwith("impossible: no such section"(*(index " ^ (show inp.shndx) ^ ")""*))))inflag_is_setshf_writeflags))))(* LARGE_COMMON seems to have been defined in this patch set:
https://sourceware.org/ml/binutils/2005-07/txt00014.txt
and at the time was "only for x86-64". It seems to be analogous
to ".lbss", i.e. "large bss". libbfd defines SHF_X86_64_LARGE.
The best comment seems to be in llvm's Support/ELF.h:
0814 // If an object file section does not have this flag set, then it may not hold
0815 // more than 2GB and can be freely referred to in objects using smaller code
0816 // models. Otherwise, only objects using larger code models can refer to them.
0817 // For example, a medium code model object can refer to data in a section that
0818 // sets this flag besides being able to refer to data in a section that does
0819 // not set it; likewise, a small code model object can refer only to code in a
0820 // section that does not set this flag.
*)(*val address_zero : natural -> address_expr_fn_map allocated_sections_map ->
(natural * address_expr_fn_map allocated_sections_map * address_expr_fn)*)letaddress_zerofreshalloc_map:Nat_big_num.num*((Nat_big_num.num),(Nat_big_num.num->allocated_sections_map->Nat_big_num.num))Pmap.map*address_expr_fn=(letalloc_map'=(Pmap.addfresh(funpos->(funsecs->(Nat_big_num.of_int0)))alloc_map)inletfresh'=(Nat_big_num.add((Nat_big_num.of_int1))fresh)in(fresh',alloc_map',AddressExprFnfresh))(*
val output_sec_composition_size : list output_section_composition_element -> natural
let output_sec_composition_size comp = List.foldl (+) 0 (List.map size_of_output_section_composition_element comp)
*)(*val do_output_section_layout_starting_at_addr : natural -> allocated_sections_map -> list output_section_composition_element -> (natural * list natural)*)letdo_output_section_layout_starting_at_addrstart_addr(AllocatedSectionsMapsecs)comps:Nat_big_num.num*(Nat_big_num.num)list=((* map out where we plumb in each section, accounting for their alignment *)List.fold_left(fun(next_free_addr,addr_list)->(funcomp_el->(matchcomp_elwithIncludeInputSection(retain_pol,irec(* fname, linkable_idx, shndx, isec, img *))->letaligned_next_free=(align_up_toirec.isec.elf64_section_alignnext_free_addr)in(*let _ = errln ("Aligned start address up to 0x" ^ hex_string_of_natural aligned_next_free ^
" (align 0x" ^ (hex_string_of_natural irec.isec.elf64_section_align) ^
") for included output section `" ^
irec.isec.elf64_section_name_as_string ^ "' from file `" ^ irec.fname ^ "'")
in*)(Nat_big_num.addaligned_next_freeirec.isec.elf64_section_size,List.rev_append(List.revaddr_list)[aligned_next_free])|IncludeCommonSymbol(retain_pol,fname1,linkable_idx,def,img2)->letaligned_next_free=(align_up_to(Ml_bindings.nat_big_num_of_uint64def.def_syment.elf64_st_value)next_free_addr)in(Nat_big_num.addaligned_next_free(Ml_bindings.nat_big_num_of_uint64def.def_syment.elf64_st_size),List.rev_append(List.revaddr_list)[aligned_next_free])(*| Hole(AddressExprFn f) -> (f next_free_addr secs, addr_list ++ [next_free_addr])*)|ProvideSymbol(pol,name1,spec)->(next_free_addr,List.rev_append(List.revaddr_list)[next_free_addr]))))(start_addr,[])comps)(*val output_sec_composition_size_given_start_addr : natural -> allocated_sections_map -> list output_section_composition_element -> natural*)letoutput_sec_composition_size_given_start_addrstart_addrsecscomp:Nat_big_num.num=(let(end_addr,comp_addrs)=(do_output_section_layout_starting_at_addrstart_addrsecscomp)inNat_big_num.sub_natend_addrstart_addr)(*val sizeof : string -> allocated_sections_map -> natural*)letsizeofsecname1(AllocatedSectionsMapsecs):Nat_big_num.num=((matchPmap.lookupsecname1secswithSome(OutputSectionSpec(_,maybe_addr,_,comp),_)->(matchmaybe_addrwithSomeaddr->output_sec_composition_size_given_start_addraddr(AllocatedSectionsMapsecs)comp|None->failwith("error: sizeof applied to section without defined start address"))|None->failwith("error: sizeof applied to non-existent section name "^secname1)))(*val alignof_output_section_composition_element : output_section_composition_element -> natural*)letalignof_output_section_composition_elementcomp:Nat_big_num.num=((matchcompwithIncludeInputSection(_,irec)->irec.isec.elf64_section_align|IncludeCommonSymbol(_,_,_,def,_)->Ml_bindings.nat_big_num_of_uint64def.def_syment.elf64_st_value|_->(Nat_big_num.of_int1)(* CHECK *)))(*val alignof_output_section : list output_section_composition_element -> natural*)letalignof_output_sectioncomps:Nat_big_num.num=(letaligns=(Lem_list.mapalignof_output_section_composition_elementcomps)inList.fold_left(funacc_lcm->funnext->lcmacc_lcmnext)((Nat_big_num.of_int1))aligns)(*val default_linker_control_script : natural -> address_expr_fn_map allocated_sections_map ->
abi any_abi_feature -> maybe natural -> maybe natural -> maybe natural ->
natural -> (natural * address_expr_fn_map allocated_sections_map * linker_control_script)*)letdefault_linker_control_scriptfreshalloc_mapauser_text_segment_startuser_data_segment_startuser_rodata_segment_startelf_headers_size:Nat_big_num.num*((Nat_big_num.num),(Nat_big_num.num->allocated_sections_map->Nat_big_num.num))Pmap.map*(script_element)list=(letsegment_startname1default=((matchname1with"ldata-segment"->(matchuser_data_segment_startwithNone->default|Someaddr->(* fun _ -> *)addr)|"text-segment"->(matchuser_text_segment_startwithNone->default|Someaddr->(* fun _ -> *)addr)))inletis_large_common=(funinp->(* FIXME: treat large commons separately *)false)inletis_common=(funisec1->(matchisec1withCommon(idx1,fname1,img2,def)->(*let _ = errln ("Common or large-common symbol: " ^ def.def_symname) in *)not(is_large_commonisec1)|_->false))inletalloc_fn1=(fun_->(fun_->Nat_big_num.add(segment_start"text-segment"(Nat_big_num.mul((Nat_big_num.of_int4))((Nat_big_num.of_int1048576))))elf_headers_size))inletalloc_fn1_ref=freshinletalloc_map=(Pmap.addalloc_fn1_refalloc_fn1alloc_map)inletfresh=(Nat_big_num.add((Nat_big_num.of_int1))fresh)inletalloc_fn2=(funaddr->(fun_->Nat_big_num.sub_nat(* (align_up_to a.maxpagesize addr) - (natural_land (a.maxpagesize - addr) (a.maxpagesize - 1)) *)(*
FIXME: understand the intention of this assignment.
Evaluating a simple example of this (from true-static-uClibc)
(ALIGN (0x200000) - ((0x200000 - .) & 0x1fffff))
starting from 0x00000000004017dc
means
0x600000 - ((0x200000 - 0x4017dc) & 0x1fffff)
i.e.
0x600000 - (((-0x2017dc)) & 0x1fffff)
i.e.
0x600000 - ( -0x2017dc
& 0x1fffff )
which really does come to (according to bash) 0x4017dc
i.e. we subtract 0x1fe824 from 0x600000
and end up back where we started.
What does ANDing a negative number mean?
It doesn't seem to work for us.
Well, to take the negation we flip every bit and add one.
So if we don't want to do a subtraction that might go negative,
we can instead add the complement.
*)(align_up_toa.maxpagesizeaddr)(Nat_big_num.bitwise_and(Nat_big_num.adda.maxpagesize(compl64addr))(Nat_big_num.sub_nata.maxpagesize((Nat_big_num.of_int1))))))inlet(fresh,alloc_map,(address_zero_fn:address_expr_fn))=(address_zerofreshalloc_map)inletalloc_fn2_ref=freshinletalloc_map=(Pmap.addalloc_fn2_refalloc_fn2alloc_map)inletfresh=(Nat_big_num.add((Nat_big_num.of_int1))fresh)inletalloc_fn3=(funpos->(funsecs->align_up_to(ifNat_big_num.equalpos((Nat_big_num.of_int0))then(Nat_big_num.div((Nat_big_num.of_int64))((Nat_big_num.of_int8)))else(Nat_big_num.of_int1))pos))inletalloc_fn3_ref=freshinletalloc_map=(Pmap.addalloc_fn3_refalloc_fn3alloc_map)inletfresh=(Nat_big_num.add((Nat_big_num.of_int1))fresh)inletalloc_fn4=(funpos->(funsecs->align_up_to(Nat_big_num.div((Nat_big_num.of_int64))((Nat_big_num.of_int8)))pos))inletalloc_fn4_ref=freshinletalloc_map=(Pmap.addalloc_fn4_refalloc_fn4alloc_map)inletfresh=(Nat_big_num.add((Nat_big_num.of_int1))fresh)inletalloc_fn5=(funpos->(funsecs->segment_start"ldata-segment"pos))inletalloc_fn5_ref=freshinletalloc_map=(Pmap.addalloc_fn5_refalloc_fn5alloc_map)inletfresh=(Nat_big_num.add((Nat_big_num.of_int1))fresh)inletalloc_fn6=(funpos->funsecs->align_up_to(Nat_big_num.adda.maxpagesize(Nat_big_num.sub_nat(Nat_big_num.bitwise_andposa.maxpagesize)((Nat_big_num.of_int1))))pos)inletalloc_fn6_ref=freshinletalloc_map=(Pmap.addalloc_fn6_refalloc_fn6alloc_map)inletfresh=(Nat_big_num.add((Nat_big_num.of_int1))fresh)inletalloc_fn7=(funpos->(funsecs->(ifnot(Nat_big_num.equalpos((Nat_big_num.of_int0)))thenNat_big_num.div((Nat_big_num.of_int64))((Nat_big_num.of_int8))else(Nat_big_num.of_int1))))inletalloc_fn7_ref=freshinletalloc_map=(Pmap.addalloc_fn7_refalloc_fn7alloc_map)inletfresh=(Nat_big_num.add((Nat_big_num.of_int1))fresh)inletalloc_fn8=(funpos->(funsecs->align_up_to(Nat_big_num.div((Nat_big_num.of_int64))((Nat_big_num.of_int8)))pos))inletalloc_fn8_ref=freshinletalloc_map=(Pmap.addalloc_fn8_refalloc_fn8alloc_map)inletfresh=(Nat_big_num.add((Nat_big_num.of_int1))fresh)in(fresh,alloc_map,[(* For now, we base our script on the GNU bfd linker's scripts.
Here's the static -z combreloc one.
/* Script for -z combreloc: combine and sort reloc sections */
/* Copyright (C) 2014 Free Software Foundation, Inc.
Copying and distribution of this script, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. */
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
"elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
ENTRY(_start)
SEARCH_DIR("=/usr/x86_64-linux-gnu/lib64"); SEARCH_DIR("=/usr/local/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib/x86_64-linux-gnu"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib");
SECTIONS
{
/* Read-only sections, merged into text segment: */
PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS;
.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.dyn :
{
*(.rela.init)
*(.rela.text .rela.text.* .rela.gnu.linkonce.t.* )
*(.rela.fini)
*(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.* )
*(.rela.data .rela.data.* .rela.gnu.linkonce.d.* )
*(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.* )
*(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.* )
*(.rela.ctors)
*(.rela.dtors)
*(.rela.got)
*(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.* )
*(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.* )
*(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.* )
*(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.* )
*(.rela.ifunc)
}
.rela.plt :
{
*(.rela.plt)
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.init :
{
KEEP ( *(SORT_NONE(.init)))
}
.plt : { *(.plt) *(.iplt) }
.plt.bnd : { *(.plt.bnd) }
.text :
{
*(.text.unlikely .text.*_unlikely .text.unlikely.* )
*(.text.exit .text.exit.* )
*(.text.startup .text.startup.* )
*(.text.hot .text.hot.* )
*(.text .stub .text.* .gnu.linkonce.t.* )
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
}
.fini :
{
KEEP ( *(SORT_NONE(.fini)))
}
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.* ) }
.rodata1 : { *(.rodata1) }
.eh_frame_hdr : { *(.eh_frame_hdr) }
.eh_frame : ONLY_IF_RO { KEEP ( *(.eh_frame)) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
.gcc_except_table.* ) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges
.exception_ranges* ) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP ( *(.eh_frame)) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.* ) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges* ) }
/* Thread Local Storage sections */
.tdata : { *(.tdata .tdata.* .gnu.linkonce.td.* ) }
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.* ) *(.tcommon) }
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP ( *(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP ( *(SORT_BY_INIT_PRIORITY(.init_array.* ) SORT_BY_INIT_PRIORITY(.ctors.* )))
KEEP ( *(.init_array EXCLUDE_FILE ( *crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
PROVIDE_HIDDEN (__init_array_end = .);
}
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP ( *(SORT_BY_INIT_PRIORITY(.fini_array.* ) SORT_BY_INIT_PRIORITY(.dtors.* )))
KEEP ( *(.fini_array EXCLUDE_FILE ( *crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP ( *crtbegin.o(.ctors))
KEEP ( *crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP ( *(EXCLUDE_FILE ( *crtend.o *crtend?.o ) .ctors))
KEEP ( *(SORT(.ctors.* )))
KEEP ( *(.ctors))
}
.dtors :
{
KEEP ( *crtbegin.o(.dtors))
KEEP ( *crtbegin?.o(.dtors))
KEEP ( *(EXCLUDE_FILE ( *crtend.o *crtend?.o ) .dtors))
KEEP ( *(SORT(.dtors.* )))
KEEP ( *(.dtors))
}
.jcr : { KEEP ( *(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.* ) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.* ) }
.dynamic : { *(.dynamic) }
.got : { *(.got) *(.igot) }
. = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
.got.plt : { *(.got.plt) *(.igot.plt) }
.data :
{
*(.data .data.* .gnu.linkonce.d.* )
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.* )
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we don't
pad the .data section. */
. = ALIGN(. != 0 ? 64 / 8 : 1);
}
.lbss :
{
*(.dynlbss)
*(.lbss .lbss.* .gnu.linkonce.lb.* )
*(LARGE_COMMON)
}
. = ALIGN(64 / 8);
. = SEGMENT_START("ldata-segment", .);
.lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
{
*(.lrodata .lrodata.* .gnu.linkonce.lr.* )
}
.ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
{
*(.ldata .ldata.* .gnu.linkonce.l.* )
. = ALIGN(. != 0 ? 64 / 8 : 1);
}
. = ALIGN(64 / 8);
_end = .; PROVIDE (end = .);
. = DATA_SEGMENT_END (.);
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.* ) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
/* DWARF Extension. */
.debug_macro 0 : { *(.debug_macro) }
.gnu.attributes 0 : { KEEP ( *(.gnu.attributes)) }
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_* ) }
}
*)(* function from
inputs and configuration
to
output sections-with-address-and-policy, output symbols-with-address-and-attributes,
discards, orphans
BUT
1. policy is not a property of output sections, but of *inputs within outputs*
i.e. KEEP( *(.init))
what's helpful for writing such functions?
e.g. only_if_ro (input_query) (output ):
i.e. ++ only_if_ro OutputSection(AlwaysOutput, Nothing, ".eh_frame", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".eh_frame"))])
want to take a bunch of outputs
and return a bunch of outputs?
if so, need to return a "current address"
*)(DefineSymbol(ProvideIfUsed,"__executable_start",default_symbol_spec));AdvanceAddress(AddressExprFnalloc_fn1_ref);OutputSection(AlwaysOutput,None,".interp",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".interp"))]);OutputSection(AlwaysOutput,None,".note.gnu.build-id",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".note.gnu.build-id"))]);OutputSection(AlwaysOutput,None,".hash",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".hash"))]);OutputSection(AlwaysOutput,None,".gnu.hash",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".gnu.hash"))]);OutputSection(AlwaysOutput,None,".dynsym",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".dynsym"))]);OutputSection(AlwaysOutput,None,".dynstr",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".dynstr"))]);OutputSection(AlwaysOutput,None,".gnu.version",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".gnu.version"))]);OutputSection(AlwaysOutput,None,".gnu.version_d",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".gnu.version_d"))]);OutputSection(AlwaysOutput,None,".gnu.version_r",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".gnu.version_r"))]);OutputSection(AlwaysOutput,None,".rela.dyn",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".rela.init"));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".rela.text"s||(name_matches".rela.text.*"s||name_matches".rela.gnu.linkonce.t.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".rela.rodata"s||(name_matches".rela.rodata.*"s||name_matches".rela.gnu.linkonce.r.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".rela.data"s||(name_matches".rela.data.*"s||name_matches".rela.gnu.linkonce.d.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".rela.tdata"s||(name_matches".rela.tdata.*"s||name_matches".rela.gnu.linkonce.td.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".rela.tbss"s||(name_matches".rela.tbss.*"s||name_matches".rela.gnu.linkonce.tb.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".rela.ctors"));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".rela.got"));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".rela.bss"s||(name_matches".rela.bss.*"s||name_matches".rela.gnu.linkonce.b.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".rela.ldata"s||(name_matches".rela.ldata.*"s||name_matches".rela.gnu.linkonce.l.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".rela.lbss"s||(name_matches".rela.lbss.*"s||name_matches".rela.gnu.linkonce.lb.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".rela.ifunc"))]);OutputSection(AlwaysOutput,None,".rela.plt",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".rela.plt"));DefineSymbol(ProvideIfUsed,"__rela_iplt_start",((Nat_big_num.of_int0),make_symbol_infostb_localstt_notype(* FIXME *),make_symbol_otherstv_hidden));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".rela.iplt"));DefineSymbol(ProvideIfUsed,"__rela_iplt_end",((Nat_big_num.of_int0),make_symbol_infostb_localstt_notype(* FIXME *),make_symbol_otherstv_hidden))]);OutputSection(AlwaysOutput,None,".init",[InputQuery(KeepEvenWhenGC,SeenOrder,filter_and_concat(name_matches".init"))]);OutputSection(AlwaysOutput,None,".plt",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".plt"));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".iplt"))]);OutputSection(AlwaysOutput,None,".plt.bnd",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".plt.bnd"))]);OutputSection(AlwaysOutput,None,".text",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".text.unlikely"s||(name_matches".text.*_unlikely"s||name_matches".text.unlikely.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".text.exit"s||name_matches".text.exit.*"s));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".text.startup"s||name_matches".text.startup.*"s));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".text.hot"s||name_matches".text.hot.*"s));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".text"s||(name_matches".stub"s||(name_matches".text.*"s||name_matches".gnu.linkonce.t.*"s))));InputQuery(DefaultKeep,DefaultSort,filter_and_concat((* ".gnu.warning sections are handled specially by elf32.em."
* GAH. That means that what we specify here is not (completely) what
* needs to happen with these sections. *)funs->name_matches".gnu_warning"s))]);OutputSection(AlwaysOutput,None,".fini",[InputQuery(KeepEvenWhenGC,SeenOrder,filter_and_concat(name_matches".fini"))]);DefineSymbol(ProvideIfUsed,"__etext",default_symbol_spec);DefineSymbol(ProvideIfUsed,"_etext",default_symbol_spec);DefineSymbol(ProvideIfUsed,"etext",default_symbol_spec);OutputSection(AlwaysOutput,None,".rodata",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".rodata"s||(name_matches".rodata.*"s||name_matches".gnu.linkonce.r.*"s)))]);OutputSection(AlwaysOutput,None,".eh_frame_hdr",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".eh_frame_hdr"))]);OutputSection(OnlyIfRo,None,".eh_frame",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".eh_frame"))]);OutputSection(OnlyIfRo,None,".gcc_except_table",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".gcc_except_table"s||name_matches".gcc_except_table.*"s))]);OutputSection(OnlyIfRo,None,".exception_ranges",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".exception_ranges"s||name_matches".exception_ranges*"s))]);AdvanceAddress(AddressExprFnalloc_fn2_ref);MarkAndAlignDataSegment(Nat_big_num.mul(Nat_big_num.mul((* a.maxpagesize *)(Nat_big_num.of_int2))((Nat_big_num.of_int1024)))((Nat_big_num.of_int1024))(* <-- for some reason binutils assumes 2MB max page size,
even if ABI says smaller *),a.commonpagesize);OutputSection(OnlyIfRw,None,".eh_frame",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".eh_frame"))]);OutputSection(OnlyIfRw,None,".gcc_except_table",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".gcc_except_table"s||name_matches".gcc_except_table.*"s))]);OutputSection(OnlyIfRw,None,".exception_ranges",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".exception_ranges"s||name_matches".exception_ranges*"s))]);OutputSection(AlwaysOutput,None,".tdata",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".tdata"s||(name_matches".tdata.*"s||name_matches".gnu.linkonce.td.*"s)))]);OutputSection(AlwaysOutput,None,".tbss",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".tbss"s||(name_matches".tbss.*"s||name_matches".gnu.linkonce.tb.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".tcommon"))]);OutputSection(AlwaysOutput,None,".preinit_array",[DefineSymbol(ProvideIfUsed,"__preinit_array_start",default_symbol_spec);InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(funs->name_matches".preinit_array"s));DefineSymbol(ProvideIfUsed,"__preinit_array_end",default_symbol_spec)]);OutputSection(AlwaysOutput,None,".init_array",[DefineSymbol(ProvideIfUsed,"__init_array_start",default_symbol_spec);InputQuery(KeepEvenWhenGC,ByInitPriority,filter_and_concat(funs->name_matches".init_array.*"s));InputQuery(KeepEvenWhenGC,ByInitPriority,filter_and_concat(funs->name_matches".ctors.*"s));InputQuery(KeepEvenWhenGC,ByInitPriority,filter_and_concat(funs->name_matches".init_array"s||(name_matches".ctors"s&¬(file_matches"*crtbegin.o"s||(file_matches"*crtbegin?.o"s||(file_matches"*crtend.o"s||file_matches"*crtend?.o "s))))));DefineSymbol(ProvideIfUsed,"__init_array_end",default_symbol_spec)]);OutputSection(AlwaysOutput,None,".fini_array",[DefineSymbol(ProvideIfUsed,"__fini_array_start",default_symbol_spec);InputQuery(KeepEvenWhenGC,ByInitPriority,filter_and_concat(funs->name_matches".fini_array.*"s));InputQuery(KeepEvenWhenGC,ByInitPriority,filter_and_concat(funs->name_matches".dtors.*"s));InputQuery(KeepEvenWhenGC,ByInitPriority,filter_and_concat(funs->name_matches".fini_array"s||(name_matches".dtors"s&¬(file_matches"*crtbegin.o"s||(file_matches"*crtbegin?.o"s||(file_matches"*crtend.o"s||file_matches"*crtend?.o "s))))));DefineSymbol(ProvideIfUsed,"__fini_array_end",default_symbol_spec)]);OutputSection(AlwaysOutput,None,".ctors",[InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(funs->file_matches"*crtbegin.o"s&&name_matches".ctors"s));InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(funs->file_matches"*crtbegin?.o"s&&name_matches".ctors"s));InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(funs->not(file_matches"*crtend.o"s||file_matches"*crtend?.o"s)&&name_matches".ctors"s));InputQuery(KeepEvenWhenGC,ByName,filter_and_concat(funs->name_matches".ctors.*"s));InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(funs->(file_matches"*crtend.o"s||file_matches"*crtend?.o"s)&&name_matches".ctors"s))(* NOTE: this exclusion is implicit in the usual linker script,
* because it won't match an input section more than once. We should
* just replicate this behaviour, since other parts of the script might rely on it
* less obviously. *)]);OutputSection(AlwaysOutput,None,".dtors",[InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(funs->file_matches"*crtbegin.o"s&&name_matches".dtors"s));InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(funs->file_matches"*crtbegin?.o"s&&name_matches".dtors"s));InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(funs->not(file_matches"*crtend.o"s||file_matches"*crtend?.o"s)&&name_matches".dtors"s));InputQuery(KeepEvenWhenGC,ByName,filter_and_concat(funs->name_matches".dtors.*"s));InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(funs->(file_matches"*crtend.o"s||file_matches"*crtend?.o"s)&&name_matches".dtors"s))]);OutputSection(AlwaysOutput,None,".jcr",[InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(name_matches".jcr"))]);OutputSection(AlwaysOutput,None,".data.rel.ro",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".data.rel.ro.local*"s||name_matches".gnu.linkonce.d.rel.ro.local.*"s));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".data.rel.ro"s||(name_matches".data.rel.ro.*"s||name_matches".gnu.linkonce.d.rel.ro.*"s)))]);OutputSection(AlwaysOutput,None,".dynamic",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".dynamic"))]);OutputSection(AlwaysOutput,None,".got",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".got"));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".igot"))]);MarkDataSegmentRelroEnd(*(fun secs -> (if (sizeof ".got.plt" secs) >= 24 then 24 else 0, (fun pos -> pos)))*);OutputSection(AlwaysOutput,None,".got.plt",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".got.plt"));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".igot.plt"))]);OutputSection(AlwaysOutput,None,".data",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".data"s||(name_matches".data.*"s||name_matches".gnu.linkonce.d.*"s)))(* the script also has SORT(CONSTRUCTORS) here, but it has no effect for ELF (I think) *)]);OutputSection(AlwaysOutput,None,".data1",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".data1"))]);DefineSymbol(AlwaysDefine,"_edata",default_symbol_spec);DefineSymbol(ProvideIfUsed,"edata",default_symbol_spec);(* . = .; <-- does this do anything? YES! It forces an output section to be emitted.
Since it occurs *outside* any output section,
it is assumed to start
*)DefineSymbol(AlwaysDefine,"__bss_start",default_symbol_spec);OutputSection(AlwaysOutput,None,".bss",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".dynbss"));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".bss"s||(name_matches".bss.*"s||name_matches".gnu.linkonce.b.*"s)));InputQuery(DefaultKeep,DefaultSort,(funinputlist->(*let _ = errln "Looking for commons" in *)letresult=(filter_and_concatis_commoninputlist)in(*let _ = errln ("Got " ^ (show (length (result))) ^ " commons; sanity check: input list contains " ^
(show (length inputlist)) ^ " of which " ^
(show (length (List.filter (fun inp -> match inp with
Common _ -> true
| _ -> false
end) inputlist))) ^ " are commons."
)
in*)result))]);AdvanceAddress(AddressExprFnalloc_fn3_ref);OutputSection(AlwaysOutput,None,".lbss",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".dynlbss"));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".dynlbss"));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".lbss"s||(name_matches".lbss.*"s||name_matches".gnu.linkonce.lb.*"s)));InputQuery(DefaultKeep,DefaultSort,filter_and_concat(is_large_common))]);AdvanceAddress(AddressExprFnalloc_fn4_ref);AdvanceAddress(AddressExprFnalloc_fn5_ref);OutputSection(AlwaysOutput,Some(AddressExprFnalloc_fn6_ref),".lrodata",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".lrodata"s||(name_matches".lrodata.*"s||name_matches".gnu.linkonce.lr.*"s)));AdvanceAddress(AddressExprFnalloc_fn7_ref)]);AdvanceAddress(AddressExprFnalloc_fn8_ref);DefineSymbol(AlwaysDefine,"_end",default_symbol_spec);DefineSymbol(ProvideIfUsed,"end",default_symbol_spec);MarkDataSegmentEnd;OutputSection(AlwaysOutput,Someaddress_zero_fn,".stab",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".stab"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".stabstr",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".stabstr"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".stab.excl",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".stab.excl"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".stab.exclstr",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".stab.exclstr"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".stab.index",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".stab.index"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".stab.indexstr",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".stab.indexstr"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".comment",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".comment"))])(* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. *)(* DWARF 1 *);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".line",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".line"))])(* GNU DWARF 1 extensions *);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_srcinfo",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_srcinfo"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_sfnames",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_sfname"))])(* DWARF 1.1 and DWARF 2 *);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_aranges",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_aranges"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_pubnames",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_pubnames"))])(* DWARF 2 *);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_info",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".debug_info"s||name_matches".gnu.linkonce.wi.*"s))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_abbrev",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_abbrev"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_line",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(funs->name_matches".debug_line"s||(name_matches".debug_line.*"s||name_matches".debug_line_end"s)))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_frame",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_frame"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_str",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_str"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_loc",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_loc"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_macinfo",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_macinfo"))])(* SGI/MIPS DWARF 2 extensions *);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_weaknames",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_weaknames"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_funcnames",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_funcnames"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_typenames",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_typenames"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_varnames",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_varnames"))])(* DWARF 3 *);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_pubtypes",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_pubtypes"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_ranges",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_ranges"))])(* DWARF Extension. *);OutputSection(AlwaysOutput,Someaddress_zero_fn,".debug_macro",[InputQuery(DefaultKeep,DefaultSort,filter_and_concat(name_matches".debug_macro"))]);OutputSection(AlwaysOutput,Someaddress_zero_fn,".gnu.attributes",[InputQuery(KeepEvenWhenGC,DefaultSort,filter_and_concat(name_matches".gnu.attributes"))]);DiscardInput(filter_and_concat(funs->name_matches".note.GNU-stack"s||(name_matches".gnu_debuglink"s||name_matches".gnu.lto_*"s)))(* NOTE: orphan sections are dealt with in the core linking logic,
not the script. *)]))letinterpret_guardguardcompname1:bool=((matchguardwithalways0->true|OnlyIfRo->letv=(List.for_all(funcomp_el->(matchcomp_elwithIncludeInputSection(retainpol,(* fname, linkable_idx, shndx, isec, img *)irec)->Nat_big_num.equal((* is this section read-only? if it doesn't have shf_write, yes *)(Nat_big_num.of_int0))(Nat_big_num.bitwise_andirec.isec.elf64_section_flagsshf_write)|_->(* holes, common symbols and provided symbols shouldn't prevent ONLY_IF_RO *)true))comp)in(*let _ = errln ("only_if_ro evaluated " ^ (show v) ^ " for output section " ^ name)
in*)v|OnlyIfRw->letv=(List.for_all(funcomp_el->(matchcomp_elwithIncludeInputSection(retainpol,(* fname, linkable_idx, shndx, isec, img *)irec)->not(Nat_big_num.equal((* is this section read-only? if it doesn't have shf_write, yes *)(Nat_big_num.of_int0))(Nat_big_num.bitwise_andirec.isec.elf64_section_flagsshf_write))|_->(* holes etc. shouldn't prevent ONLY_IF_RW *)true))comp)in(*let _ = errln ("only_if_rw evaluated " ^ (show v) ^ " for output section " ^ name)
in *)v))(* Passes over the script:
*
* 1. assign input sections to output sections (or discard) and define symbols.
*
* 2. compute def-use and optionally GC, removing unwanted sections and symbols
*
* 3. build image, assigning addresses as we go.
*
* Some passes require matching/retrieving what a previous pass on the same node did.
* So we give each script element a natural "idx" label.
*)(*val label_script_aux : natural -> linker_control_script -> labelled_linker_control_script*)letlabel_script_auxstartscript1:(script_element*Nat_big_num.num)list=(mapi(funi->funel->(el,(Nat_big_num.addstart(Nat_big_num.of_inti))))script1)(*val label_script : linker_control_script -> labelled_linker_control_script*)letlabel_scriptscript1:(script_element*Nat_big_num.num)list=(label_script_aux((Nat_big_num.of_int0))script1)typeinput_output_assignment=(input_speclist*(output_section_spec*Nat_big_num.num)list)(*val assign_inputs_to_output_sections :
input_output_assignment -> (* accumulator: list of discards, list of output compositions (these include symbols) *)
set (natural * natural) -> (* used sections *)
set (natural * natural * natural) -> (* used commons *)
list input_spec -> (* remaining inputs *)
maybe (output_section_spec * natural) -> (* cur_sec -- the current output section spec and its OutputSection script item idx *)
maybe input_spec -> (* last input section to be output -- might not have one *)
(input_spec -> input_spec -> Basic_classes.ordering) (* "seen ordering" *) ->
labelled_linker_control_script ->
input_output_assignment*)(* accumulated result *)letrecassign_inputs_to_output_sectionsaccused_sectionsused_commonsinputs(cur_output_sec:(output_section_spec*Nat_big_num.num)option)last_input_secseen_orderingscript1:(input_spec)list*(output_section_spec*Nat_big_num.num)list=(let(rev_discards,rev_outputs)=accinletflush_output_sec=(funmaybe_output_sec_and_idx->(match(maybe_output_sec_and_idx:(output_section_spec*Nat_big_num.num)option)withSome(OutputSectionSpec(guard,addr,name1,comp),script_idx)->(*let _ = errln ("Guardedly flushing output section named " ^ name ^ " with " ^ (
match addr with Nothing -> "no address yet" | Just a -> "address 0x" ^ (hex_string_of_natural a) end
) ^ " and composed of " ^ (show (length comp)) ^ " constituents.")
in*)(* evaluate the guard *)ifinterpret_guardguardcompname1then(* do it *)(rev_discards,(((OutputSectionSpec(guard,addr,name1,comp)),script_idx)::rev_outputs))else(* ignore it *)acc|None->(* for convenience, make this a no-op rather than error *)(* failwith "internal error: flushing output section with no current output section" *)acc))in(matchscript1with[]->flush_output_seccur_output_sec|(element1,idx1)::more_elements_and_idx->letdo_nothing=(acc,used_sections,used_commons,cur_output_sec,last_input_sec)inlet(new_acc,new_used_sections,new_used_commons,(new_cur_output_sec:(output_section_spec*Nat_big_num.num)option),new_last_input_sec)=((matchelement1withDefineSymbol(symdefpol,name1,(symsize,syminfo,symother))->(* Label the current section in the image
* with a new symbol definition. If there isn't
* a current section, use the ABS section (what is that labelling?). *)(acc,used_sections,used_commons,(match(cur_output_sec:(output_section_spec*Nat_big_num.num)option)withNone->(*let _ = errln ("FIXME: for defining `" ^ name ^ "': ABS symbol defs not yet supported") in*)None|Some((OutputSectionSpec(guard,maybe_addr,secname1,comp)),output_script_idx)->(*let _ = errln ("Including a symbol named `" ^ name ^ " in composition of output section `" ^ secname ^ "'") in*)Some((OutputSectionSpec(guard,maybe_addr,secname1,List.rev_append(List.revcomp)[ProvideSymbol(symdefpol,name1,(symsize,syminfo,symother))])),output_script_idx)),last_input_sec)|AdvanceAddress(AddressExprFnadvance_fn)->(* If we're inside a section, insert a hole,
* else just update the logical address *)(*let _ = errln ("Advancing location counter") in*)(matchcur_output_secwithNone->do_nothing(* This assignment is setting a new LMA. *)(* (acc, *)|Some(sec,idx1)->do_nothing)|MarkAndAlignDataSegment(maxpagesize1,commonpagesize1)->(* The "data segment end" is a distinguished label,
* so we can encode the whole thing into a conditional. *)(*let _ = errln ("Mark/aligning data segment") in*)do_nothing|MarkDataSegmentEnd->(*let _ = errln ("Marking data segment end") in*)do_nothing|MarkDataSegmentRelroEnd(*(fun_from_secs_to_something)*)->(*let _ = errln ("Marking data segment relro end") in*)do_nothing|OutputSection(outputguard,maybe_expr,name1,sub_elements)->(* If we have a current output section, finish it and add it to the image.
* Q. Where do guards ("ONLY_IF_RO" etc) get evaluated?
* A. Inside flush_output_sec. *)(*let _ = errln ("Recursively composing a new output section `" ^ name ^ "'...") in*)letacc_with_output_sec=(flush_output_seccur_output_sec)inletnew_cur_output_sec=(Some((OutputSectionSpec(outputguard,(* maybe_expr pos secs *)None,name1,[])),idx1))in(* Recurse down the list of input queries, assigning them to this output sec
* Note that output sections may not nest within other output sections.
* At the end of the list of sub_elements, we will flush the section we built up.
*)letfinal_acc=(assign_inputs_to_output_sectionsaccused_sectionsused_commonsinputsnew_cur_output_seclast_input_secseen_ordering(label_scriptsub_elements))in(* NOTE that this sub-accumulation will never add a new output section
* because output sections can't nest. *)(final_acc,used_sections,used_commons,(* cur_output_sec *)None,last_input_sec)|DiscardInput(selector)->letselected=(selectorinputs)inlet(rev_discards,rev_outputs)=accin(*let _ = Missing_pervasives.errln ("Processing discard rule; selected " ^ (show (length selected))
^ " inputs.")
in*)((List.rev_append(List.rev(List.rev(letx2=([])inList.fold_right(funix2->iftruetheni::x2elsex2)selectedx2)))rev_discards,rev_outputs),used_sections,used_commons,cur_output_sec,last_input_sec)|InputQuery(retainpol,sortpol,selector)->(* Input queries can only occur within an output section. *)(matchcur_output_secwithNone->failwith"linker script error: input query without output section"|Some((OutputSectionSpec(output_guard1,output_sec_addr,output_sec_name,output_composition)),output_script_idx)->(* Add them to the current output spec. We have to be careful about ordering:
* according to the GNU ld manual (and observed behaviour), by default
* "the linker will place files and sections matched by wildcards in the order
* in which they are seen during the link". For .o files on the command line,
* this means the command line order. But for members of archives, it means
* the order in which they were "pulled in" during input enumeration. We
* actually don't compute this here; it is passed in from our caller in link.lem. *)letsortfun=((matchsortpolwithDefaultSort->List.sortseen_ordering(* FIXME: pay attention to command line *)|SeenOrder->List.sortseen_ordering|ByName->List.sortcompareInputSpecByName|ByNameThenAlignment->List.sortcompareInputSpecByNameThenAlignment|ByAlignment->List.sortcompareInputSpecByAlignment|ByAlignmentThenName->List.sortcompareInputSpecByAlignmentThenName|ByInitPriority->List.sortcompareInputSpecByInitPriority))inletselected=(selectorinputs)inletselected_deduplicated=(List.filter(funinp->(matchinpwithInputSection(irec)->not(Pset.mem(irec.idx,irec.shndx)used_sections)|Common(idx1,fname1,img2,def)->not(Pset.mem(idx1,def.def_sym_scn,def.def_sym_idx)used_commons)))selected)in(*let _ = errln ("Evaluated an input query, yielding " ^
(show (length selected)) ^ " undeduplicated and " ^
(show (length selected_deduplicated)) ^
" deduplicated results, to be added to composition currently of " ^
(show (length output_composition)) ^ " items.") in*)(* Search input memory images for matching sections. *)letsorted_selected_inputs=(sortfunselected_deduplicated)inlet(sectionMatchList:input_section_reclist)=(Lem_list.mapMaybe(funinp->(matchinpwithInputSection(x)->(*let _ = errln ("Matched an input section named " ^ x.isec.elf64_section_name_as_string ^
" in a file " ^ x.fname ^ " with first 20 bytes " ^ (show (take 20
(let maybe_elname = elf_memory_image_element_coextensive_with_section x.shndx x.img
in
match maybe_elname with
Nothing -> failwith ("impossible: no such element (matching shndx " ^ (show x.shndx) ^ ")")
| Just idstr ->
match Map.lookup idstr x.img.elements with
Just el -> el.contents
| Nothing -> failwith "no such element"
end
end
))))
in*)Somex|_->None))sorted_selected_inputs)inletcommonMatchList=(Lem_list.mapMaybe(funinp->(matchinpwith|Common(idx1,fname1,img2,def)->Some(idx1,fname1,img2,def)|_->None))sorted_selected_inputs)in(acc,Pset.(union)used_sections(letx2=(Pset.from_list(pairCompareNat_big_num.compareNat_big_num.compare)[])inList.fold_right(funirecx2->iftruethenPset.add(irec.idx,irec.shndx)x2elsex2)sectionMatchListx2),Pset.(union)used_commons(letx2=(Pset.from_list(tripleCompareNat_big_num.compareNat_big_num.compareNat_big_num.compare)[])inList.fold_right(fun(idx1,fname1,img2,def)x2->iftruethenPset.add(idx1,def.def_sym_scn,def.def_sym_idx)x2elsex2)commonMatchListx2),(* new_cur_output_spec *)Some((OutputSectionSpec(output_guard1,output_sec_addr,output_sec_name,List.rev_append(List.rev(List.rev_append(List.revoutput_composition)(letx2=([])inList.fold_right(funinput_secx2->iftruethenIncludeInputSection(retainpol,(* input_sec.fname, input_sec.idx, input_sec.shndx, input_sec.isec, input_sec.img *)input_sec)::x2elsex2)sectionMatchListx2)))(letx2=([])inList.fold_right(fun(idx1,fname1,img2,def)x2->iftruethenIncludeCommonSymbol(DefaultKeep,fname1,idx1,def,img2)::x2elsex2)commonMatchListx2))),output_script_idx),last_input_sec))))in(*let _ = match new_cur_output_sec with
Just (OutputSectionSpec (guard, addr, name, comp), script_idx) ->
errln ("Now output section `" ^ name ^ "' is composed of " ^ (show (length comp)) ^ " elements.")
| Nothing -> ()
end in*)assign_inputs_to_output_sectionsnew_accnew_used_sectionsnew_used_commons(inputs:input_speclist)(new_cur_output_sec)(new_last_input_sec:input_specoption)seen_ordering(more_elements_and_idx:labelled_linker_control_script)))(* NOTE: this is also responsible for deleting any PROVIDEd symbols that
* were not actually referenced. BUT HOW, if we haven't built the image and
* hence haven't added the symbols yet? Symbols affect reachability, so
* we're going to have to figure this out. Really we want a memory image that
* does not yet have addresses assigned, but does have the symbols inserted.
* BUT even that is not right, because we want to be able to remove some
* sections (GC them). So the section composition is not yet fixed. So we have
* a problem.
*
* Note that the only symbols we have to remove are ones that were PROVIDEd
* in our output composition. So doing the GC on output compositions seems
* sane. We can get the graph's edge list by inspecting the constituent memory
* images from which each output section composition element is drawn.
* Collecting sections and collecting symbols seems fair. Note that symbols
* can never be placed mid-section (I don't think?? they can use arbitrary
* expressions, but not that depend on whether an input section is included
* or not) so removing a section should never imply the removal of a symbol.
*
* So that implies we need not yet build a memory image.
*)(*val compute_def_use_and_gc : allocated_sections_map -> allocated_sections_map*)letcompute_def_use_and_gcoutputs_by_name:allocated_sections_map=outputs_by_name(* FIXME: implement GC *)letoutput_section_typecomp:Nat_big_num.num=((* are we composed entirely of nobits sections and common symbols? *)letall_nobits=(List.for_all(funcomp_el->(matchcomp_elwithIncludeInputSection(retain_pol,(* fname, linkable_idx, shndx, isec, img *)irec)->Nat_big_num.equalirec.isec.elf64_section_typesht_nobits|IncludeCommonSymbol(retain_pol,fname1,linkable_idx,def,img2)->true|_->(* padding and symdefs can be nobits *)true))comp)inifall_nobitsthensht_nobitselsesht_progbits)letoutput_section_flagscomp:Nat_big_num.num=(letwritable=(List.exists(funcomp_el->(matchcomp_elwithIncludeInputSection(retain_pol,(* fname, linkable_idx, shndx, isec, img *)irec)->flag_is_setshf_writeirec.isec.elf64_section_flags|IncludeCommonSymbol(retain_pol,fname1,linkable_idx,def,img2)->(* assume common symbols are writable *)true|_->(* padding and symdefs do not make a section writable *)false))comp)inletexecutable=(List.exists(funcomp_el->(matchcomp_elwithIncludeInputSection(retain_pol,(* fname, linkable_idx, shndx, isec, img *)irec)->flag_is_setshf_execinstrirec.isec.elf64_section_flags|IncludeCommonSymbol(retain_pol,fname1,linkable_idx,def,img2)->(* assume common symbols are not executable, since they're zeroed *)false|_->(* padding and symdefs do not make a section executable -- HMM *)false))comp)inletalloc=(List.exists(funcomp_el->(matchcomp_elwithIncludeInputSection(retain_pol,(* fname, linkable_idx, shndx, isec, img *)irec)->flag_is_setshf_allocirec.isec.elf64_section_flags|IncludeCommonSymbol(retain_pol,fname1,linkable_idx,def,img2)->(* common symbols are allocatable *)true|ProvideSymbol(pol,name1,spec)->(* symbols make a section allocatable? HMM *)true|_->(* padding does not make a section allocatable *)false))comp)inletis_thread_local_yesnomaybe=(funcomp_el->(matchcomp_elwithIncludeInputSection(retain_pol,(* fname, linkable_idx, shndx, isec, img *)irec)->Some(flag_is_setshf_tlsirec.isec.elf64_section_flags)|IncludeCommonSymbol(retain_pol,fname1,linkable_idx,def,img2)->(* FIXME: support tcommon *)Some(false)|ProvideSymbol(pol,name1,spec)->(* linker script symbols shouldn't be defined here, unless they can be declared thread-local (FIXME: can they?) *)Somefalse|_->(* padding does not make a section thread-local, or non-. *)None))inletthread_local=((* Is any element positively thread-local? *)letv=(List.fold_left(funacc_ynm->funcomp_el->letnew_ynm=(is_thread_local_yesnomaybecomp_el)in(match(acc_ynm,new_ynm)with(None,None)->None|(None,Somex)->Somex|(Somex,None)->Somex|(Sometrue,Somefalse)->Sometrue|(Sometrue,Sometrue)->Sometrue|(Somefalse,Somefalse)->Somefalse|(Sometrue,Somefalse)->Sometrue))Nonecomp)inif(Lem.option_equal(=)v(Some(true)))&¬((Lem.option_equal(=)(Some(true))(* are *all* either don't-care or positively thread-local? *)(List.fold_left(funacc_ynm->funcomp_el->letnew_ynm=(is_thread_local_yesnomaybecomp_el)in(match(acc_ynm,new_ynm)with(None,None)->None|(None,Somex)->Somex|(Somex,None)->Somex|(Sometrue,Somefalse)->Somefalse|(Sometrue,Sometrue)->Sometrue|(Somefalse,Somefalse)->Somefalse|(Sometrue,Somefalse)->Somefalse))Nonecomp)))thenfailwith"error: section mixes thread-local and non-thread-local inputs"else(matchvwithNone->false|Somex->x))inNat_big_num.bitwise_or(ifthread_localthenshf_tlselse(Nat_big_num.of_int0))(Nat_big_num.bitwise_or(ifexecutablethenshf_execinstrelse(Nat_big_num.of_int0))(Nat_big_num.bitwise_or(ifwritablethenshf_writeelse(Nat_big_num.of_int0))(ifallocthenshf_allocelse(Nat_big_num.of_int0)))))letsymbol_def_for_provide_symbolname1size2infoothercontrol_script_linkable_idx:symbol_definition=({def_symname=(*let _ = errln ("Linker script is defining symbol called `" ^ name ^ "'") in*)name1;def_syment=({elf64_st_name=(Uint32_wrapper.of_bigint((Nat_big_num.of_int0)))(* ignored *);elf64_st_info=info;elf64_st_other=other;elf64_st_shndx=(Uint32_wrapper.of_bigint((Nat_big_num.of_int0)));elf64_st_value=(Uint64_wrapper.of_bigint((Nat_big_num.of_int0)))(* ignored *);elf64_st_size=(Uint64_wrapper.of_bigintsize2)});def_sym_scn=((Nat_big_num.of_int0));def_sym_idx=((Nat_big_num.of_int0));def_linkable_idx=control_script_linkable_idx})(*val assign_dot_to_itself : natural -> address_expr_fn_map allocated_sections_map -> (natural * address_expr_fn_map allocated_sections_map * address_expr_fn)*)letassign_dot_to_itselffreshalloc_map:Nat_big_num.num*((Nat_big_num.num),(Nat_big_num.num->allocated_sections_map->Nat_big_num.num))Pmap.map*address_expr_fn=(letfn=(fundot->fun_->dot)inletalloc_map'=(Pmap.addfreshfnalloc_map)inletfresh'=(Nat_big_num.add((Nat_big_num.of_int1))fresh)in(fresh',alloc_map',AddressExprFnfresh))(*val build_image :
abi any_abi_feature ->
address_expr_fn_map allocated_sections_map -> (* global dictionary of address_expr_fn_ref -> address_expr_fn *)
elf_memory_image -> (* accumulator *)
natural -> (* location counter *)
allocated_sections_map -> (* outputs constructed earlier *)
(Map.map string (list (natural * binding))) -> (* bindings_by_name *)
labelled_linker_control_script ->
natural -> (* control_script_linkable_idx *)
(Map.map string (list symbol_definition)) -> (* linker_defs_by_name *)
(elf_memory_image * allocated_sections_map)*)(* accumulated result *)letrecbuild_imageaalloc_mapaccpos(AllocatedSectionsMapoutputs_by_name)bindings_by_namescript1control_script_linkable_idxlinker_defs_by_name:(any_abi_feature)annotated_memory_image*allocated_sections_map=(let(add_output_section:(Nat_big_num.num*elf_memory_image)->output_section_spec->(Nat_big_num.num*elf_memory_image*Nat_big_num.num*output_section_spec))=(fun((*scn_idx, *)pos,acc_img)->(fun(OutputSectionSpec(guard,addr,secname1,comp))->(*let _ = errln ("Computing composition of output section `" ^ secname ^ "' from " ^ (show (length comp)) ^ " elements")
in*)letunaligned_start_addr=((matchaddrwithSomea->failwith("internal error: section "^(secname1^": did not expect address to be assigned yet"))|None->pos))inletalign=(alignof_output_sectioncomp)in(*let _ = errln ("Aligning start of output section " ^ secname ^ " up to a " ^ (show align) ^ "-byte address boundary")
in*)letoutput_section_start_addr=(align_up_toalignunaligned_start_addr)inlet(end_addr,comp_addrs)=(do_output_section_layout_starting_at_addroutput_section_start_addr(AllocatedSectionsMapoutputs_by_name)comp)inletsize2=(Nat_big_num.sub_natend_addroutput_section_start_addr)in(*let _ = Missing_pervasives.outln (
if List.null comp then secname else (
((space_padded_and_maybe_newline 16 secname) ^
("0x" ^ (left_zero_padded_to 16 (hex_string_of_natural output_section_start_addr))) ^ " " ^
(left_space_padded_to 10 ("0x" ^ (hex_string_of_natural size))))
)
)
in*)let(concatenated_content,final_addr,new_range_tag_pairs)=(List.fold_left(fun(accum_pat,accum_current_addr,accum_meta)->(fun(comp_el,comp_addr)->(*let _ = errln ("Adding an element to composition of output section `" ^ secname ^ "', current address 0x" ^ (hex_string_of_natural accum_current_addr))
in*)letmake_line=(funnamestr->(funaddrstr->(funszstr->(funrhs->((space_padded_and_maybe_newline((Nat_big_num.of_int16))(" "^namestr))^(("0x"^(left_zero_padded_to((Nat_big_num.of_int16))addrstr))^(" "^((left_space_padded_to((Nat_big_num.of_int10))("0x"^szstr))^(" "^rhs)))))))))inlet(sz,comp_el_pat,this_el_meta)=((matchcomp_elwith|IncludeInputSection(retainpolicy,(* fname, linkable_idx, shndx, isec, img *)irec)->(* We want to get the input section as a byte pattern *)(*let _ = errln ("Processing inclusion of input section `" ^ irec.isec.elf64_section_name_as_string
^ "' from file `" ^ irec.fname
^ "' into output section `" ^ secname
^ "'")
in*)letmaybe_secname=(elf_memory_image_element_coextensive_with_sectionirec.shndxirec.img)in(matchmaybe_secnamewithNone->failwith("impossible: no such section"(*(matching irec.shndx " ^ (show irec.shndx) ^ ")""*))|Someidstr->(*let _ = errln ("Found element named " ^ idstr ^ " coextensive with section named " ^
irec.isec.elf64_section_name_as_string ^ " in file " ^ irec.fname)
in*)(matchPmap.lookupidstrirec.img.elementswithSomeel->(*let _ = Missing_pervasives.outln (make_line irec.isec.elf64_section_name_as_string
(hex_string_of_natural comp_addr) (hex_string_of_natural irec.isec.elf64_section_size)
irec.fname)
in*)letsection_el_name=(get_unique_name_for_section_from_indexirec.shndxirec.isecirec.img)in(*let _ = errln ("Copying metadata for output section `" ^ section_el_name ^ "'") in*)letrange_or_sym_is_in_this_sec=(funmaybe_range->(funtag->(* is it within the section we're outputting?
* first we needs its element name. *)(* filter out ones that don't overlap *)(matchmaybe_rangewithSome(el_name,(start,len))->(* img and shndx came as a unit, so they're definitely
* talking about the same file *)(* shndx = sym_shndx *)section_el_name=el_name|None->(* ABS symbols have this property *)(matchtagwithSymbolDef(def)->(* don't match section symbols, or we'll be inundated *)letsym_shndx=(Uint32_wrapper.to_bigintdef.def_syment.elf64_st_shndx)inifnot(Nat_big_num.equalsym_shndxshn_abs)||(not(Nat_big_num.equal(get_elf64_symbol_typedef.def_syment)stt_section))thenfalseelse(letabs_address=(Ml_bindings.nat_big_num_of_uint64def.def_syment.elf64_st_value)in(* check it against our section *)letsection_end_addr=(Nat_big_num.addaccum_current_addrirec.isec.elf64_section_size)in(Nat_big_num.greater_equalabs_addressaccum_current_addr&&Nat_big_num.lessabs_addresssection_end_addr)(* FIXME: argument that this should be <=, i.e. can mark end addr *)(* PROBLEM: this is all very well, but there's no reason why
* ABS symbols need to point at an address within some output
* section. They can just be arbitrary values. This is a bit of an
* abuse if we do it within the C language (to get the value, you
* have to do "(int) &sym", i.e. create a meaningless pointer
* intermediate) but arguably is okay in an impl-def way.
*
* WHAT to do? well, just always output the ABS symbols, for now.
*
* The example that provoked this is in glibc's
* locale/lc-address.c, which compiles down to create
* the following ABS symbol:
*
* 0000000000000001 g *ABS* 0000000000000000 _nl_current_LC_ADDRESS_used
*
* ... i.e. the _nl_current_LC_ADDRESS_used appears to be just a flag.
*
* Where can we handle this? We don't see ABS symbols since they
* aren't associated with sections. We simply need to copy over
* all the ABS symbols appearing in included input objects.
* That means there's no point doing anything with them here
* while we're fiddling with sections. Do it later in a whole-
* -image pass.
*)&&false(* ... at least until we see a better way *))|_->false))))inletranges_and_tags=(letx2=([])inList.fold_right(fun(maybe_range,tag)x2->ifrange_or_sym_is_in_this_secmaybe_rangetagthen(maybe_range,tag)::x2elsex2)(Pset.elementsirec.img.by_range)x2)inletincluded_defs=(letx2=([])inList.fold_right(fun(maybe_range,def)x2->ifrange_or_sym_is_in_this_secmaybe_range(SymbolDef(def))thendef::x2elsex2)(elf_memory_image_defined_symbols_and_rangesirec.img)x2)inletincluded_global_defs=(letx2=([])inList.fold_right(fundefx2->ifnot(Nat_big_num.equal((* filter out locals *)get_elf64_symbol_bindingdef.def_syment)stb_local)thendef::x2elsex2)included_defsx2)in(* What symbol defs are being included? *)(* For each global symbol defined in the section, output a line. *)(*let _ = Missing_pervasives.outs (List.foldl (^) "" (
List.map (fun def -> (make_line ""
(hex_string_of_natural (comp_addr + (natural_of_elf64_addr def.def_syment.elf64_st_value)))
(hex_string_of_natural (natural_of_elf64_xword def.def_syment.elf64_st_size))
(" " ^ def.def_symname)) ^ "\n"
) included_global_defs
))
in*)let(new_ranges_and_tags:((element_rangeoption)*(any_abi_featurerange_tag))Pset.set)=(Lem_set.setMapMaybe(instance_Basic_classes_SetType_tup2_dict(instance_Basic_classes_SetType_Maybe_maybe_dict(instance_Basic_classes_SetType_tup2_dictinstance_Basic_classes_SetType_var_dict(instance_Basic_classes_SetType_tup2_dictinstance_Basic_classes_SetType_Num_natural_dictinstance_Basic_classes_SetType_Num_natural_dict)))instance_Basic_classes_SetType_var_dict)(instance_Basic_classes_SetType_tup2_dict(instance_Basic_classes_SetType_Maybe_maybe_dict(instance_Basic_classes_SetType_tup2_dictinstance_Basic_classes_SetType_var_dict(instance_Basic_classes_SetType_tup2_dictinstance_Basic_classes_SetType_Num_natural_dictinstance_Basic_classes_SetType_Num_natural_dict)))instance_Basic_classes_SetType_var_dict)(fun(maybe_range,tag)->(* How do we update existing metadata? In general,
* we get a new range. *)letnew_range=((matchmaybe_rangewithNone->None|Some(el_name,(start,len))->Some(secname1,((* FIXME: pass this through a section-to-element gensym.
We can just (for now) define output element names
to equal the section names, since we have no unnamed
output sections and no output common symbols. *)letnew_start_off=(Nat_big_num.addstart(Nat_big_num.sub_natcomp_addroutput_section_start_addr))in(*let _ = errln ("Calculated element offset 0x" ^ (hex_string_of_natural new_start_off) ^
" in element " ^ secname ^ " for tag at address 0x" ^ (hex_string_of_natural accum_current_addr) ^
" , start offset 0x" ^ (hex_string_of_natural start) ^ ", output section start addr 0x" ^
(hex_string_of_natural output_section_start_addr) ^ ", comp_addr 0x" ^ (hex_string_of_natural comp_addr))
in*)(new_start_off,len)))))in(matchtagwith(* If it's a section, we discard it.
* We will add a new section record at the end. (FIXME) *)|FileFeature(ElfSection(idx1,isec1))->None(* If it's a symbol def, we propagate it.
* We record its linkable idx, so we can
* match it later with the bindings we formed
* earlier.
* FIXME: this is a bit nasty. Perhaps we
* should replace syment with a minimal structure
* that avoids duplication. Same for isecs. *)|SymbolDef(def)->(* if get_elf64_symbol_type def.def_syment = stt_section
then Nothing FIXME: also re-create the section symbol when we create the ElfSection
else *)(* This doesn't work -- some refs might be bound to this symbol.
Instead, strip the symbol when we generate the output symtab (FIXME). *)(*let _ = errln ("Copying symbol named `" ^ def.def_symname ^ "'")
in*)Some(new_range,SymbolDef({def_symname=(def.def_symname);def_syment=(def.def_syment);def_sym_scn=(def.def_sym_scn);def_sym_idx=(def.def_sym_idx);def_linkable_idx=(irec.idx)}))|AbiFeature(x)->Some(new_range,AbiFeature(x))(* If it's a symbol ref with no reloc site, we discard it? *)|SymbolRef(r)->(*let _ = if r.ref.ref_symname = "_start" then errln ("Saw ref to _start, "
^ "in section " ^ irec.isec.elf64_section_name_as_string ^ " of linkable " ^ (show irec.idx))
else ()
in*)letget_binding_for_ref=(funsymref->(funlinkable_idx->(funfname1->letname_matches1=((matchPmap.lookupsymref.ref_symnamebindings_by_namewithSomex->x|None->[]))in(matchList.filter(fun(bi,((r_idx,r,r_item),m_d))->Nat_big_num.equalr_idxlinkable_idx&&(r=symref))name_matches1with[(b_idx,b)]->(b_idx,b)|[]->failwith"no binding found"|_->failwith("ambiguous binding found for symbol `"^(symref.ref_symname^("' in file "^fname1)))))))inlet(bi,b)=(get_binding_for_refr.refirec.idxirec.fname)inlet((ref_idx,ref1,ref_linkable),maybe_def)=bin(matchr.maybe_relocwithNone->None(* If it's a reloc site, we need to somehow point it
* at the *definition* that it was bound to. YES.
* reloc_sites are
type reloc_site = <|
ref_relent : elf64_relocation_a
; ref_rel_scn : natural --the relocation section idx
; ref_rel_idx : natural --the index of the relocation rec
; ref_src_scn : natural --the section *from which* the reference logically comes
|>
type elfNN_relocation_a =
<| elfNN_ra_offset : elf32_addr --Address at which to relocate
; elfNN_ra_info : elf32_word --Symbol table index/type of relocation to apply
; elfNN_ra_addend : elf32_sword --Addend used to compute value to be stored
|>
* ... of which ref_src_scn, ref_rel_idx,
* ref_rel_scn and elfNN_ra_offset can be ignored.
*
* What *is* important is that we somehow point at
* the symbol definition (or perhaps *un*definition,
* if we're generating a shared library) that it
* refers to.
*
* For that, we update ra_info use the 1 + binding_idx,
* i.e. consider that there is a fresh symbol table
* and that it has a distinct entry for each binding.
*
* FIXME: we also need to account for
* reloc decisions -- MakePIC etc.
*)|Some(rs)->let(rel_type1,_)=(a.parse_reloc_infors.ref_relent.elf64_ra_info)inSome(new_range,SymbolRef({ref=({(* This is not the place to be fixing up
* symbol references. We can't yet patch the element content,
* because we haven't yet decided on the address of everything.
*
* That said, we *do* need to represent the old ref in the new
* linked-image context. That's *all* we should be doing, right now.
*
*)ref_symname=(ref1.ref_symname);ref_syment=({elf64_st_name=(Uint32_wrapper.of_bigint((Nat_big_num.of_int0)))(* unused *);elf64_st_info=(ref1.ref_syment.elf64_st_info);elf64_st_other=(ref1.ref_syment.elf64_st_other);elf64_st_shndx=(Uint32_wrapper.of_bigint((* shn_abs *)(Nat_big_num.of_int0)));elf64_st_value=(Uint64_wrapper.of_bigint((Nat_big_num.of_int0)));elf64_st_size=(Uint64_wrapper.of_bigint((Nat_big_num.of_int0)))});ref_sym_scn=((Nat_big_num.of_int0));ref_sym_idx=((Nat_big_num.of_int0))(* match maybe_def with Just _ -> 1+bi | Nothing -> 0 end *)});maybe_reloc=(Some{ref_relent=({elf64_ra_offset=(Uint64_wrapper.of_bigint((Nat_big_num.of_int0)))(* ignored *);elf64_ra_info=(Uint64_wrapper.logor(* HACK: use bi as the symbol index. *)(Uint64_wrapper.of_bigintrel_type1)(Uint64_wrapper.shift_left(* ... actually, don't, now we have maybe_def_bound_to *)(Uint64_wrapper.of_bigint((* (1+bi) *)(Nat_big_num.of_int0)))32));elf64_ra_addend=(rs.ref_relent.elf64_ra_addend)});ref_rel_scn=((Nat_big_num.of_int0));ref_rel_idx=((Nat_big_num.of_int0));ref_src_scn=((Nat_big_num.of_int0))});maybe_def_bound_to=((* Re-search the bindings list for a match, because we might have
* re-bound this symbol since we created the image. FIXME: since
* we do this, is there anything gained from populating this field
* earlier? Probably best not to. *)let(possible_bindings:(Nat_big_num.num*binding)list)=((matchPmap.lookupref1.ref_symnamebindings_by_namewithSomel->ifref1.ref_symname="__fini_array_end"then(*let _ = errln ("Found " ^ (show (length l)) ^ " bindings for __fini_array_end, of which " ^
(show (length (List.filter (fun (bi, (r, maybe_d)) -> maybe_d <> Nothing) l))) ^
" are with definition")
in*)lelsel|None->[]))in(* what's the actual binding? *)(matchr.maybe_def_bound_towithNone->failwith("at this stage, all references must have a decision: `"^(ref1.ref_symname^"'"))|Some(decision,_)->(* Search the list of bindings for a possibly-updated
* binding for this reference. *)letmatching_possibles=(List.filter(fun(bi,((ref_idx,ref1,ref_item),maybe_d))->(matchmaybe_dwithNone->false|Some(def_idx,def,def_item)->Nat_big_num.equal(* match the *reference*, whose linkable we're processing now *)irec.idxref_idx&&(Nat_big_num.equalr.ref.ref_sym_scnref1.ref_sym_scn&&Nat_big_num.equalr.ref.ref_sym_idxref1.ref_sym_idx)(*
def.def_syment = sd.def_syment
&& def.def_sym_scn = sd.def_sym_scn
&& def.def_sym_idx = sd.def_sym_idx
&& def_idx = sd.def_linkable_idx *)))possible_bindings)in(*let _ = errln ("For a ref to `" ^ ref.ref_symname ^
"', possibles list is: " ^ (
List.foldl (fun x -> fun y -> x ^ ", " ^ y) "" (List.map (fun (bi, ((_, _, _), maybe_d)) ->
match maybe_d with
Just(def_idx, def, def_item) ->
"`" ^ def.def_symname ^ "' " ^
"in linkable " ^ (show def_idx) ^
", section " ^ (show def.def_sym_scn) ^
", sym idx " ^ (show def.def_sym_idx)
| _ -> failwith "impossible: just filtered out no-def bindings"
end
) matching_possibles)
))
in*)letnew_bound_to=((matchmatching_possibleswith[]->Some(ApplyReloc,None)|[(bi,((rl,r,ri),maybe_d))]->Some(decision,(matchmaybe_dwithSome(def_idx,def,def_item)->Some{def_symname=(def.def_symname);def_syment=(def.def_syment);def_sym_scn=(def.def_sym_scn);def_sym_idx=(def.def_sym_idx);def_linkable_idx=def_idx}|None->None))|_->failwith("After linker script, ambiguous bindings for `"^(ref1.ref_symname^"'"))))inifnot((Lem.option_equal(Lem.pair_equal(=)(Lem.option_equal(=)))new_bound_tor.maybe_def_bound_to))then(*let _ = errln ("Changed binding for reference to `" ^ ref.ref_symname ^
"' in linkable " ^ (show irec.idx))
in*)new_bound_toelseif(Lem.option_equal(Lem.pair_equal(=)(Lem.option_equal(=)))new_bound_toNone)thenfailwith"really need a decision by now"elsenew_bound_to))(* if irec.fname = "libc.a(__uClibc_main.os)"
&& irec.isec.elf64_section_name_as_string = ".data.rel.local"
then
let _ = errln ("Saw the bugger: " ^ (match r.maybe_def_bound_to with
Just(decision, Just(sd)) -> show sd.def_syment
| _ -> "(not complete)"
end))
in r.maybe_def_bound_to
else r.maybe_def_bound_to
*)})))(* match maybe_reloc *))(* match tag *))((Pset.from_list(pairCompare(maybeCompare(pairComparecompare(pairCompareNat_big_num.compareNat_big_num.compare)))compare)ranges_and_tags)))(* end mapMaybe fn *)inletisec_sz=(irec.isec.elf64_section_size)inletmaybe_el_sz=(el.length1)inletcontents_sz=(lengthel.contents)inlet(actual_sz,padded_contents)=((matchmaybe_el_szwithSomeel_sz->letdiff=(Nat_big_num.sub_natel_szcontents_sz)inifNat_big_num.lessdiff((Nat_big_num.of_int0))then(* contents greater than what the el says, so chop the end off *)(*let _ = Missing_pervasives.errln ("Warning: size mismatch for section " ^ irec.isec.elf64_section_name_as_string ^
" from " ^ irec.fname)
in*)(el_sz,take0el_szel.contents)else(el_sz,List.rev_append(List.revel.contents)(replicate0diffNone))|None->ifnot(Nat_big_num.equal(lengthel.contents)isec_sz)thenfailwith"input section size not equal to its content pattern length"else(isec_sz,el.contents)))in(*let _ = errln ("Saw first 20 bytes of section " ^ irec.isec.elf64_section_name_as_string ^
" from " ^ irec.fname ^ " as " ^ (show (take 20 padded_contents)))
in*)(actual_sz,padded_contents,new_ranges_and_tags)|_->failwith"impossible: no such element")(* match Map.lookup idstr img.elements *))(* match maybe_secname *)|IncludeCommonSymbol(retain_pol,fname1,linkable_idx,def,img2)->(*let _ = errln ("Including common symbol called `" ^ def.def_symname ^ "'")
in*)(* We want to get the common symbol as a byte pattern *)letsz=(Ml_bindings.nat_big_num_of_uint64def.def_syment.elf64_st_size)inletcontent=(Missing_pervasives.replicate0sz(Some(Char.chr(Nat_big_num.to_int((Nat_big_num.of_int0))))))in(*let _ = Missing_pervasives.outln (make_line "COMMON" (hex_string_of_natural comp_addr)
(hex_string_of_natural sz) fname)
in*)(sz,content,(Pset.from_list(pairCompare(maybeCompare(pairComparecompare(pairCompareNat_big_num.compareNat_big_num.compare)))compare)[(Some(secname1,(Nat_big_num.sub_natcomp_addroutput_section_start_addr,sz)),SymbolDef({def_symname=(def.def_symname);def_syment=(def.def_syment);def_sym_scn=(def.def_sym_scn);def_sym_idx=(def.def_sym_idx);def_linkable_idx=linkable_idx}))]))(* | Hole(AddressExprFn f) ->
let next_addr = f addr (AllocatedSectionsMap outputs_by_name)
in
let n = next_addr - addr
in
let content = Missing_pervasives.replicate n Nothing
in
let _ = Missing_pervasives.outln (make_line "*fill*" (hex_string_of_natural comp_addr)
(hex_string_of_natural n)
"")
in
(next_addr - addr, content, {}) *)|ProvideSymbol(pol,name1,(size2,info,other))->(*let _ = errln ("Creating symbol definition named `" ^ name ^ "' in output section `" ^ secname ^ "'")
in*)letsymaddr=accum_current_addr(* FIXME: support others *)in(*let _ = Missing_pervasives.outln (make_line "" (hex_string_of_natural symaddr) "" ("PROVIDE (" ^ name ^ ", .)"))
in*)((* sz *)(Nat_big_num.of_int0),(* comp_el_pat *)[],(Pset.from_list(pairCompare(maybeCompare(pairComparecompare(pairCompareNat_big_num.compareNat_big_num.compare)))compare)[(Some(secname1,((Nat_big_num.sub_natsymaddroutput_section_start_addr),(Nat_big_num.of_int0))),SymbolDef(symbol_def_for_provide_symbolname1size2infoothercontrol_script_linkable_idx))]))))(* match comp_el_pat *)in(*let _ = errln ("Appending byte pattern to section " ^ secname ^ ", first 20 bytes: " ^
(show (take 20 comp_el_pat)))
in*)letnew_content=(append_to_byte_pattern_at_offset(Nat_big_num.sub_natcomp_addroutput_section_start_addr)accum_patcomp_el_pat)inletnew_addr=(Nat_big_num.addcomp_addrsz)inletnew_meta=(Pset.(union)accum_metathis_el_meta)in(new_content,new_addr,new_meta)))([],output_section_start_addr,(Pset.from_list(pairCompare(maybeCompare(pairComparecompare(pairCompareNat_big_num.compareNat_big_num.compare)))compare)[]))(list_combinecompcomp_addrs))inletconcat_sec_el=({Memory_image.startpos=(Some(output_section_start_addr));Memory_image.length1=(Some(size2));Memory_image.contents=concatenated_content})in(*let _ = Missing_pervasives.outln "" in*)(* Make a new element in the image, also transferring metadata from input elements
* as appropriate. *)letnew_by_range_list=((Some(secname1,((Nat_big_num.of_int0),size2)),FileFeature(ElfSection((* We don't yet konw where this'll come in the output file, so ... *)(* scn_idx *)(Nat_big_num.of_int0),{elf64_section_name=((Nat_big_num.of_int0))(* ignored *);elf64_section_type=(output_section_typecomp);elf64_section_flags=(output_section_flagscomp);elf64_section_addr=((Nat_big_num.of_int0))(* ignored -- covered by element *);elf64_section_offset=((Nat_big_num.of_int0))(* ignored -- will be replaced when file offsets are assigned *);elf64_section_size=((Nat_big_num.of_int0))(* ignored *);elf64_section_link=((Nat_big_num.of_int0))(* HMM *);elf64_section_info=((Nat_big_num.of_int0))(* HMM *);elf64_section_align=(alignof_output_sectioncomp);elf64_section_entsize=((Nat_big_num.of_int0))(* HMM *);elf64_section_body=Byte_sequence.empty(* ignored *);elf64_section_name_as_string=secname1(* can't rely on this being ignored *)})))::Pset.elementsnew_range_tag_pairs)in(*let _ = errln ("Metadata for new section " ^ secname ^ " consists of " ^ (show (length new_by_range_list)) ^ " tags.")
in*)letnew_by_range=(List.fold_left(funm->fun(maybe_range,tag)->letnew_s=(Pset.add(maybe_range,tag)m)in(* let _ = errln ("Inserting an element into by_range; before: " ^ (show (Set.size m)) ^ "; after: " ^ (show (Set.size new_s)))
in *)new_s)acc_img.by_rangenew_by_range_list)inletnew_by_tag=(by_tag_from_by_range(instance_Basic_classes_SetType_Maybe_maybe_dict(instance_Basic_classes_SetType_tup2_dictinstance_Basic_classes_SetType_var_dict(instance_Basic_classes_SetType_tup2_dictinstance_Basic_classes_SetType_Num_natural_dictinstance_Basic_classes_SetType_Num_natural_dict)))instance_Basic_classes_SetType_var_dictnew_by_range)inlet_=(letsection_tags_bare=(List.filter(fun(maybe_range,tag)->(matchtagwith|FileFeature(ElfSection(idx1,isec1))->true|_->false))(Pset.elementsnew_by_range))in(* errln ("Total metadata now includes " ^ (show (length section_tags_bare)) ^ " sections; are by_range and "
^ "by_tag consistent? " ^ (show (new_by_tag = by_tag_from_by_range new_by_range))) *)())in(* this expression is the return value of add_output_section *)(Nat_big_num.add(* new_pos *)output_section_start_addrsize2,(* new_acc *){elements=(Pmap.addsecname1concat_sec_elacc_img.elements)(* tag it as a section, and transfer any tags *);by_range=(* let _ = errln ("Returning from add_output_section a by_range with " ^
(show (Set.size new_by_range))) in *)new_by_range;by_tag=new_by_tag},(* sec_sz *)size2,(* replacement_output_sec *)(OutputSectionSpec(guard,Some(output_section_start_addr),secname1,comp)))))(* end add_output_section *)in(matchscript1with[]->(acc,(AllocatedSectionsMapoutputs_by_name))|(element1,el_idx)::more_elements_and_idx->letdo_nothing=(acc,pos,(AllocatedSectionsMapoutputs_by_name))inlet(new_acc,new_pos,new_outputs_by_name)=((matchelement1withDefineSymbol(symdefpol,name1,(symsize,syminfo,symother))->(* We've already added this to the output composition. *)do_nothing|AdvanceAddress(AddressExprFnadvance_fn_ref)->letadvance_fn=((matchPmap.lookupadvance_fn_refalloc_mapwith|Somem->m|None->failwith"alloc_map invariant failure"))inletnew_pos=(advance_fnpos(AllocatedSectionsMapoutputs_by_name))in(acc,new_pos,(AllocatedSectionsMapoutputs_by_name))(* FIXME: the allocated sections map is the subset of the outputs_by_name map
* that has been allocated -- meaning *both* sized *and* placed.
* Since we're a multi-pass interpreter, we've sized everything already, but
* only a subset has been placed. So we need to weed out all elements from
* outputs_by_name that don't correspond to a section in the accumulated image.
* We should probably include the section's range_tag in the allocated_sections_map,
* which would force us to do this, but at the moment neither of these is done. *)|MarkAndAlignDataSegment(maxpagesize1,commonpagesize1)->(* GNU linker manual says:
"DATA_SEGMENT_ALIGN(MAXPAGESIZE, COMMONPAGESIZE)
is equivalent to either
(ALIGN(MAXPAGESIZE) + (. & (MAXPAGESIZE - 1)))
or
(ALIGN(MAXPAGESIZE) + (. & (MAXPAGESIZE - COMMONPAGESIZE)))
depending on whether the latter uses fewer COMMONPAGESIZE sized
pages for the data segment (area between the result of this
expression and `DATA_SEGMENT_END') than the former or not. If the
latter form is used, it means COMMONPAGESIZE bytes of runtime
memory will be saved at the expense of up to COMMONPAGESIZE wasted
bytes in the on-disk file."
In other words, we're marking the beginning of the data segment
by aligning our position upwards by an amount that
- guarantees we're on a new page...
- ... but (option 1) at an address that's congruent, modulo the max page size
(e.g. for 64kB maxpage, 4kB commonpage, we AND with 0xffff)
- ... (option 2) at an offset that's at the commonpagesize boundary
immediately preceding the lowest congruent address
(e.g. for 64kB maxpage, 4kB commonpage, we AND with 0xf000,
so if we're at pos 0x1234, we bump up to 0x11000).
FIXME:
The GNU linker seems to bump up to 0x12000 here, not 0x11000.
Specifically,
DATA_SEGMENT_ALIGN (0x200000, 0x1000)
bumps 0x4017dc up to 0x602000.
This is indeed better, because it allows the next section
to be output without a big gap in the file.
LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000
0x00000000000017dc 0x00000000000017dc R E 200000
LOAD 0x0000000000002000 0x0000000000602000 0x0000000000602000
0x0000000000000120 0x0000000000000ce8 RW 200000
... whereas if the second LOAD began at address 0x601000,
the file offset of its first section would have to be 0x11000.
So what *should* the formula be?
It needs to calculate the next address which
- is a commonpagesize boundary;
- is minimally >= the current address, modulo the commonpagesize
- is minimally >= the current address, modulo the maxpagesize.
The AND operation gives us something that is minimally *below*
the commonpagesize boundary. I think we need to add COMMONPAGESIZE.
The code does this (in ldexp.c around line 478 as of binutils 2.25):
expld.result.value = align_n (expld.dot, maxpage);
/* omit relro phase */
if (expld.dataseg.phase == exp_dataseg_adjust)
{
if (commonpage < maxpage)
expld.result.value += ((expld.dot + commonpage - 1)
& (maxpage - commonpage));
}
else
{
expld.result.value += expld.dot & (maxpage - 1);
Which amounts to:
1. first, align up to maxpage. So for our example, we're now 0x10000.
or for our real example, we're now 0x600000
THEN since the first phase (expld_dataseg_none)
hits the final "else" case,
we immediately restore the modulus of the address,
giving 0x60188c.
or 0x6019ac the second time around (FIXME: why two?)
2. next, on the relevant phase (pass) of the script interpreter,
i.e. OPTION 2
if commonpage < maxpage,
bump up the *non-maxpage-aligned non-modulo-restored* address
by
(. + commonpage - 1) & (maxpage - commonpage)
i.e. for our example earlier
(0x01234 + 0x1000 - 1) & (0xf000)
=
0x02233 & 0xf000
=
0x02000
i.e. for our real example
(0x4019ac + 0x1000 - 1) & (0x1ff000)
=
0x4019ac + 0x1000 - 1) & 0x1ff000
=
0x002000
3. OPTION 1 is implemented by the trailing "else {"
-- it restores the modulus.
So the problem with our original logic (below) was that
it did what the manual says, not what the code does.
Specifically, the code for option 2 does
(. + commonpagesize - 1) & (maxpagesize - commonpagesize)
and NOT simply
. & (maxpagesize - commonpagesize).
FIXME: report this bug.
Note that intervening commands can do arbitrary things to the location
counter, so we can't do any short-cut arithmetic based on section sizes;
we actually have to run the layout procedure til we hit the end of the
data segment, and then see how we do.
We run this function *forward* with the first option on a subset
of the script ending with the end of the data segment.
We then see what comes back.
*)(* let num_pages_used *)(*let _ = errln ("Option 1 congruence add-in from pos 0x" ^ (hex_string_of_natural pos) ^ ", maxpagesize 0x" ^
(hex_string_of_natural maxpagesize) ^ " is 0x" ^ (hex_string_of_natural (natural_land pos (maxpagesize - 1))))
in*)letoption1=(Nat_big_num.add(align_up_tomaxpagesize1pos)(Nat_big_num.bitwise_andpos(Nat_big_num.sub_natmaxpagesize1((Nat_big_num.of_int1)))))in(*let _ = errln ("Mark/align data segment: option 1 is to bump pos to 0x" ^ (hex_string_of_natural option1))
in*)letoption2=(Nat_big_num.add(align_up_tomaxpagesize1pos)(Nat_big_num.bitwise_and(Nat_big_num.sub_nat(Nat_big_num.addposcommonpagesize1)((Nat_big_num.of_int1)))(Nat_big_num.sub_natmaxpagesize1commonpagesize1)))in(*let _ = errln ("Mark/align data segment: option 2 is to bump pos to 0x" ^ (hex_string_of_natural option2))
in*)letdata_segment_endpos=(funstartpos1->(* run forward from here until MarkDataSegmentEnd,
* accumulating the actually-made outputs by name and their sizes *)let(endpos,_)=(List.fold_left(fun(curpos,seen_end)->fun(new_script_item,new_script_item_idx)->(*let _ = errln ("Folding at pos 0x" ^ (hex_string_of_natural curpos))
in*)ifseen_endthen(curpos,true)elselet(newpos,new_seen)=((matchnew_script_itemwith|MarkDataSegmentEnd->(*let _ = errln "data segment end"
in*)(* break the loop early here *)(curpos,true)|OutputSection(outputguard,maybe_expr,name1,sub_elements)->(*let _ = errln ("output section " ^ name)
in*)letmaybe_found=(Pmap.lookupname1outputs_by_name)inlet(OutputSectionSpec(guard,addr,secname1,comp),seen_script_el_idx)=((matchmaybe_foundwithSome(f,seen_script_el_idx)->(f,seen_script_el_idx)|None->failwith"internal error: output section not found"))in(* Sometimes a given output section name, say .eh_frame, can come from multiple
* script elements with disjoint guard conditions (only_if_ro and only_if_rw, say).
* Only one of them will actually be selected when the guard is being evaluated.
* So when we "replay" the sections' output here, we want to skip the ones whose
* guards were false. The way we implement this is to store the originating script
* element idx in the allocated_output_sections map. We can test that against our
* current script element_idx here *)letreplay_output=(Nat_big_num.equalseen_script_el_idxel_idx)inifreplay_outputthen(letunaligned_start_addr=curposinletstart_addr=(align_up_to(alignof_output_sectioncomp)unaligned_start_addr)inlet(end_addr,comp_addrs)=(do_output_section_layout_starting_at_addrstart_addr(AllocatedSectionsMapoutputs_by_name)comp)inletsize2=(Nat_big_num.sub_natend_addrstart_addr)in(end_addr,(* seen_end *)false))else(curpos,(* seen_end *)false)|AdvanceAddress(AddressExprFnadvance_fn_ref)->(*let _ = errln "Advance address"
in*)letadvance_fn=((matchPmap.lookupadvance_fn_refalloc_mapwith|Somem->m|None->failwith"alloc_map invariant failed"))inletnew_pos=(advance_fncurpos(AllocatedSectionsMapoutputs_by_name))in(new_pos,false)|_->(curpos,seen_end)))inifNat_big_num.lessnewposcurposthenfailwith"went backwards"else(newpos,new_seen))(startpos1,false)more_elements_and_idx)inendpos)inletendpos_option1=(data_segment_endposoption1)inletendpos_option2=(data_segment_endposoption2)in(*let _ = errln ("Mark/align data segment: option 1 gives an endpos of 0x" ^ (hex_string_of_natural endpos_option1))
in*)(*let _ = errln ("Mark/align data segment: option 2 gives an endpos of 0x" ^ (hex_string_of_natural endpos_option2))
in*)letnpages=(funstartpos1->(funendpos->Nat_big_num.div(Nat_big_num.sub_nat(align_up_tocommonpagesize1endpos)(round_down_tocommonpagesize1startpos1))commonpagesize1))inletnpages_option1=(npagesoption1endpos_option1)inletnpages_option2=(npagesoption2endpos_option1)in(*let _ = errln ("Mark/align data segment: option 1 uses " ^ (show npages_option1) ^ " COMMONPAGESIZE-sized pages")
in*)(*let _ = errln ("Mark/align data segment: option 2 uses " ^ (show npages_option2) ^ " COMMONPAGESIZE-sized pages")
in*)ifNat_big_num.lessnpages_option1npages_option2then(*let _ = errln "Choosing option 1" in*)(acc,option1,(AllocatedSectionsMapoutputs_by_name))else(*let _ = errln "Choosing option 2" in*)(acc,option2,(AllocatedSectionsMapoutputs_by_name))|MarkDataSegmentEnd->do_nothing|MarkDataSegmentRelroEnd(*(fun_from_secs_to_something)*)->do_nothing|OutputSection(outputguard,maybe_expr,name1,sub_elements)->(* Get the composition we computed earlier, and actually put it in
* the image, assigning an address to it. *)letmaybe_found=(Pmap.lookupname1outputs_by_name)inlet(found,seen_script_el_idx)=((matchmaybe_foundwithSome(f,saved_idx)->(f,saved_idx)|None->failwith"internal error: output section not found"))inlet(OutputSectionSpec(guard,addr,secname1,comp))=foundin(* let next_free_section_idx = 1 + naturalFromNat (Map.size outputs_by_name)
in *)letcount_sections_in_image=(funimg2->(let(section_tags,section_ranges)=(elf_memory_image_section_rangesimg2)inletsection_tags_bare=(Lem_list.map(funtag->(matchtagwith|FileFeature(ElfSection(idx1,isec1))->true|_->false))section_tags)inlengthsection_tags_bare))in(* Do we actually want to add an output section? Skip empty sections.
* CARE: we actually want to heed the proper ld semantics for empty sections
* (e.g. ". = ." will force output). From the GNU ld manual:
The linker will not normally create output sections with no contents.
This is for convenience when referring to input sections that may or
may not be present in any of the input files. For example:
.foo : { *(.foo) }
will only create a `.foo' section in the output file if there is a
`.foo' section in at least one input file, and if the input sections
are not all empty. Other link script directives that allocate space in
an output section will also create the output section. So too will
assignments to dot even if the assignment does not create space, except
for `. = 0', `. = . + 0', `. = sym', `. = . + sym' and `. = ALIGN (. !=
0, expr, 1)' when `sym' is an absolute symbol of value 0 defined in the
script. This allows you to force output of an empty section with `. =
.'.
The linker will ignore address assignments ( *note Output Section
Address::) on discarded output sections, except when the linker script
defines symbols in the output section. In that case the linker will
obey the address assignments, possibly advancing dot even though the
section is discarded.
* It follows that we might discard the output section,
* but *retain* the symbol definitions within it,
* and keep the dot-advancements that
* In other words, we care about two things:
*
* -- whether there are any non-empty input sections, *or*
* non-excluded assignments to dot, inside the composition:
* this controls whether the section is output
* -- whether the script defines symbols in the section; if so
* then *even if the section is discarded*
* we must honour the address assignments,
* which means using the ending address of do_output_section_layout_starting_at_addr,
* *and*
* we must retain the symbol definitions (which now could
* end up going in some other section? HMM...)
*)letcomp_element_allocates_space=(funcomp_el->(matchcomp_elwithIncludeInputSection(_,irec)->Nat_big_num.greater(*let _ = errln ("Saw an input section named `" ^ irec.isec.elf64_section_name_as_string ^
"' of size " ^ (show irec.isec.elf64_section_size))
in*)irec.isec.elf64_section_size((Nat_big_num.of_int0))|IncludeCommonSymbol(retain_pol,fname1,idx1,def,img2)->Nat_big_num.greater(Ml_bindings.nat_big_num_of_uint64def.def_syment.elf64_st_size)((Nat_big_num.of_int0))|ProvideSymbol(pol,name1,spec)->true(* HACK: what else makes sense here? *)|Hole(AddressExprFn(address_fn_ref))->letaddress_fn=((matchPmap.lookupaddress_fn_refalloc_mapwith|Somem->m|None->failwith"alloc_map invariant failed"))inletassignment_is_excluded=(funf->(* really makes you wish you were programming in Lisp *)letalways_gives_0=(Nat_big_num.equal(f((Nat_big_num.of_int0))(AllocatedSectionsMapoutputs_by_name))((Nat_big_num.of_int0))&&Nat_big_num.equal(f((Nat_big_num.of_int42))(AllocatedSectionsMapoutputs_by_name))((Nat_big_num.of_int0)))(* FIXME: this is wrong *)inletalways_gives_dot=(Nat_big_num.equal(f((Nat_big_num.of_int0))(AllocatedSectionsMapoutputs_by_name))((Nat_big_num.of_int0))&&Nat_big_num.equal(f((Nat_big_num.of_int42))(AllocatedSectionsMapoutputs_by_name))((Nat_big_num.of_int42)))(* FIXME: this is wrong *)in(* FIXME: what are the semantics of function equality in Lem? *)always_gives_0||(always_gives_dot(*&& (AddressExprFn(f)) <> assign_dot_to_itself*)(* FIXME DPM: almost certainly not what is meant... *)))innot(assignment_is_excludedaddress_fn)))inletsection_contains_non_empty_inputs=(List.existscomp_element_allocates_spacecomp)in(* See note in MarkDataSegmentEnd case about script element idx. Short version:
* multiple output section stanzas, for a given section name, may be in the script,
* but only one was activated by the section composition pass. Ignore the others. *)letdo_output=((Nat_big_num.equalseen_script_el_idxel_idx)&§ion_contains_non_empty_inputs)inifnotdo_outputthen(*let _ = errln ("At pos 0x" ^ (hex_string_of_natural pos) ^ ", skipping output section " ^ name ^
" because " ^ (if not section_contains_non_empty_inputs
then "it contains no non-empty inputs"
else "it was excluded by its output guard"))
in*)(acc,pos,(AllocatedSectionsMapoutputs_by_name))else((* let _ = errln ("Before adding output section, we have " ^ (show (count_sections_in_image acc))
^ " sections.")
in *)let(new_pos,new_acc,sec_sz,replacement_output_sec)=(add_output_section((* next_free_section_idx, *)pos,acc)found)in(*let _ = errln ("At pos 0x" ^ (hex_string_of_natural pos) ^ ", adding output section " ^ name ^
" composed of " ^ (show (length comp)) ^ " items, new pos is 0x" ^ (hex_string_of_natural new_pos))
in*)(* let _ = errln ("Received from add_output_section a by_range with " ^ (show (Set.size new_acc.by_range))
^ " metadata records of which " ^ (show (Set.size {
(r, t)
| forall ((r, t) IN new_acc.by_range)
| match t with FileFeature(ElfSection(x)) -> true | _ -> false end
}
)) ^ " are ELF sections; one more time: " ^ (show (Set.size {
(t, r)
| forall ((t, r) IN new_acc.by_tag)
| match t with FileFeature(ElfSection(x)) -> true | _ -> false end
}
)) ^ "; count_sections_in_image says " ^ (show (
length (Multimap.lookupBy Memory_image_orderings.tagEquiv (FileFeature(ElfSection(0, null_elf64_interpreted_section))) new_acc.by_tag)
))
)
in *)(* let _ = errln ("After adding output section, we have " ^ (show (count_sections_in_image new_acc))
^ " sections.")
in *)(new_acc,new_pos,(AllocatedSectionsMap(Pmap.addname1(replacement_output_sec,el_idx)(Pmap.removename1outputs_by_name)))))|DiscardInput(selector)->do_nothing|InputQuery(retainpol,sortpol,selector)->do_nothing))in(* recurse *)build_imageaalloc_mapnew_accnew_posnew_outputs_by_namebindings_by_namemore_elements_and_idxcontrol_script_linkable_idxlinker_defs_by_name))(*
let rec consecutive_commons rev_acc l =
match l with
[] -> reverse rev_acc
| IncludeCommonSymbol(pol, fname, def, img) :: rest ->
consecutive_commons ((pol, fname, def, img) :: rev_acc) rest
| _ -> reverse rev_acc
end
*)(*val default_place_orphans : input_output_assignment -> list input_spec -> input_output_assignment*)letdefault_place_orphans(discards,outputs)inputs:(input_spec)list*(output_section_spec*Nat_big_num.num)list=((* Try to emulate the GNU linker.
* Its docs say:
"It attempts to place orphan sections after
non-orphan sections of the same attribute, such as code vs data,
loadable vs non-loadable, etc. If there is not enough room to do this
then it places at the end of the file.
For ELF targets, the attribute of the section includes section type
as well as section flag."
* It places the .tm_clone_table orphan
[ 9] .tm_clone_table PROGBITS 0000000000000000 00000160
0000000000000000 0000000000000000 WA 0 0 8
as
.data 0x0000000000602120 0x0 crtend.o
.data 0x0000000000602120 0x0 crtn.o
.tm_clone_table
0x0000000000602120 0x0
.tm_clone_table
0x0000000000602120 0x0 crtbeginT.o
.tm_clone_table
0x0000000000602120 0x0 crtend.o
.data1
*(.data1)
0x0000000000602120 _edata = .
i.e. between .data and .data1. In the script:
.got.plt : { *(.got.plt) *(.igot.plt) }
.data :
{
*(.data .data.* .gnu.linkonce.d.* )
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
i.e. no clear reason for why between .data and .data1. In the code:
(see elf32em.c line 1787 in binutils 2.25)
... the key bit of code is as follows.
place = NULL;
if ((s->flags & (SEC_ALLOC | SEC_DEBUGGING)) == 0)
place = &hold[orphan_nonalloc];
else if ((s->flags & SEC_ALLOC) == 0)
;
else if ((s->flags & SEC_LOAD) != 0
&& ((iself && sh_type == SHT_NOTE)
|| (!iself && CONST_STRNEQ (secname, ".note"))))
place = &hold[orphan_interp];
else if ((s->flags & (SEC_LOAD | SEC_HAS_CONTENTS | SEC_THREAD_LOCAL)) == 0)
place = &hold[orphan_bss];
else if ((s->flags & SEC_SMALL_DATA) != 0)
place = &hold[orphan_sdata];
else if ((s->flags & SEC_THREAD_LOCAL) != 0)
place = &hold[orphan_tdata];
else if ((s->flags & SEC_READONLY) == 0)
place = &hold[orphan_data];
else if (((iself && (sh_type == SHT_RELA || sh_type == SHT_REL))
|| (!iself && CONST_STRNEQ (secname, ".rel")))
&& (s->flags & SEC_LOAD) != 0)
place = &hold[orphan_rel];
else if ((s->flags & SEC_CODE) == 0)
place = &hold[orphan_rodata];
else
place = &hold[orphan_text];
.. we replicate it here.
*)letoutput_irecs=(List.fold_left(funacc->funoutp->((matchoutpwith(OutputSectionSpec(guard,maybe_addr,name1,comp),script_el_idx)->letall_irecs=(List.fold_left(funinner_acc->funcomp_el->(matchcomp_elwithIncludeInputSection(_,irec)->Pset.addirecinner_acc|_->inner_acc))(Pset.from_listcompare[])comp)inPset.(union)all_irecsacc|_->acc)))(Pset.from_listcompare[])outputs)inlet(orphans:input_speclist)=(List.filter(funinp->(matchinpwithInputSection(irec)->letv=(not(Pset.memirecoutput_irecs))in(*let _ = if v then errln ("Saw an orphan input section: " ^
irec.secname ^ " in " ^ irec.fname) else ()
in*)v|_->false))inputs)inletplace_one_orphan=(funacc->funinput->(letirec=((matchinputwithInputSection(irec)->irec|_->failwith"impossible: orphan section is not a section"))inlet(discards,outputs)=accinletfind_output=(funmaybe_name->funmaybe_type->funflags_must_have->funflags_must_not_have->(Missing_pervasives.find_index0(fun(OutputSectionSpec(guard,maybe_addr,name1,comp),script_el_idx)->letflags=(output_section_flagscomp)in(matchmaybe_namewithSomen->n=name1|None->true)&&((matchmaybe_typewithSomet->Nat_big_num.equal(output_section_typecomp)t|None->true)&&(Pset.for_all(funx->flag_is_setxflags)flags_must_have&&Pset.for_all(funx->not(flag_is_setxflags))flags_must_not_have)))outputs))inletplace_after_nonalloc=(find_outputNoneNone(Pset.from_listNat_big_num.compare[])(Pset.from_listNat_big_num.compare[shf_alloc]))inletplace_after_interp=(find_output(Some(".interp"))(Some(sht_progbits))(Pset.from_listNat_big_num.compare[shf_alloc])(Pset.from_listNat_big_num.compare[]))inletplace_after_bss=(find_output(Some(".bss"))(Some(sht_nobits))(Pset.from_listNat_big_num.compare[shf_alloc;shf_write])(Pset.from_listNat_big_num.compare[]))inletplace_after_rodata=(find_output(Some(".rodata"))(Some(sht_progbits))(Pset.from_listNat_big_num.compare[shf_alloc])(Pset.from_listNat_big_num.compare[shf_write]))inletplace_after_rel=(find_output(Some(".rela.dyn"))(Some(sht_rela))(Pset.from_listNat_big_num.compare[])(Pset.from_listNat_big_num.compare[]))inletplace_after_data=(find_output(Some(".data"))(Some(sht_progbits))(Pset.from_listNat_big_num.compare[shf_alloc;shf_write])(Pset.from_listNat_big_num.compare[]))inletplace_after_text=(find_output(Some(".text"))(Some(sht_progbits))(Pset.from_listNat_big_num.compare[shf_alloc;shf_execinstr])(Pset.from_listNat_big_num.compare[]))inlet(place_after:Nat_big_num.numoption)=((matchinputwithInputSection(irec)->(* HACK: simulates GNU linker, but this logic ought to go elsewhere *)ifirec.isec.elf64_section_name_as_string=".note.GNU-stack"thenNoneelseifnot(flag_is_setshf_allocirec.isec.elf64_section_flags)&&(* not flag_is_set shf_alloc irec.isec.elf64_section_flags *)(* no debugging, for now *)truethenplace_after_nonallocelse(* FIXME: reinstate alloc-debugging case *)ifNat_big_num.equalirec.isec.elf64_section_typesht_note(* FIXME: replicate iself logic *)||(irec.isec.elf64_section_name_as_string=".note")thenplace_after_interpelseifNat_big_num.equalirec.isec.elf64_section_typesht_nobitsthenplace_after_bsselse(* FIXME: implement thread-local case *)ifnot(flag_is_setshf_writeirec.isec.elf64_section_flags)&¬(flag_is_setshf_execinstrirec.isec.elf64_section_flags)thenplace_after_rodataelseifflag_is_setshf_writeirec.isec.elf64_section_flags&¬(flag_is_setshf_execinstrirec.isec.elf64_section_flags)thenplace_after_dataelseplace_after_text))inlet(discards,outputs)=accin(matchplace_afterwithSomeidx1->(* The section exists and has the flags we expected, and is at output idx *)(discards,mapi(funi->funoutput->(* FIXME: also fix up flags, alignment etc. *)let(OutputSectionSpec(guard,maybe_addr,name1,comp),script_el_idx)=outputinifNat_big_num.equal(Nat_big_num.of_inti)idx1then(OutputSectionSpec(guard,maybe_addr,name1,List.rev_append(List.revcomp)[IncludeInputSection(DefaultKeep,irec)]),script_el_idx)elseoutput)outputs)|None->(*let _ = errln ("Warning: discarding orphan section `" ^ irec.isec.elf64_section_name_as_string
^ "' from file `" ^ irec.fname ^ "'")
in*)(List.rev_append(List.revdiscards)[input],outputs))))inList.fold_leftplace_one_orphan(discards,outputs)orphans)(*val interpret_linker_control_script :
address_expr_fn_map allocated_sections_map ->
linker_control_script
-> linkable_list
-> natural (* control_script_linkable_idx *)
-> abi any_abi_feature
-> list input_spec
-> (input_spec -> input_spec -> ordering) (* seen ordering *)
-> (input_output_assignment -> list input_spec -> input_output_assignment) (* place orphans *)
-> (Map.map string (list (natural * binding))) (* initial_bindings_by_name *)
-> (elf_memory_image * Map.map string (list (natural * binding)))*)letinterpret_linker_control_scriptalloc_mapscript1linkablescontrol_script_linkable_idxainputsseen_orderingplace_orphansinitial_bindings_by_name:(any_abi_feature)annotated_memory_image*((string),((Nat_big_num.num*binding)list))Pmap.map=(letlabelled_script=(label_scriptscript1)in(*let _ = List.mapi (fun i -> fun input ->
errln ("Input " ^ (show i) ^ " is " ^
match input with
InputSection(inp) ->
"input section, name `" ^ inp.secname ^
"', from file `" ^ inp.fname ^ "' (linkable idx " ^ (show inp.idx) ^ ")"
| Common(idx, symname, img, def) ->
"common symbol `" ^ symname ^ "'"
end
)
) inputs
in*)let(discards_before_orphans,outputs_before_orphans)=(assign_inputs_to_output_sections([],[])(Pset.from_list(pairCompareNat_big_num.compareNat_big_num.compare)[])(Pset.from_list(tripleCompareNat_big_num.compareNat_big_num.compareNat_big_num.compare)[])inputsNoneNoneseen_orderinglabelled_script)in(* place orphans *)let(discards,outputs)=(place_orphans(discards_before_orphans,outputs_before_orphans)inputs)in(* In assigning inputs to outputs, we may also have defined some symbols. These affect the
* bindings that are formed. So, we rewrite the bindings here. Note that we have to do so here,
* not in the caller, because these extra bindings can affect the reachability calculation
* during GC. *)let(linker_defs_by_name,(bindings_by_name:((string,((Nat_big_num.num*binding)list))Pmap.map)))=(let(script_defs_by_name:(string,((symbol_definition*symbol_def_policy)list))Pmap.map)=(List.fold_left(funacc->(fun((OutputSectionSpec(guard,maybe_addr,secname1,comp)),script_el_idx)->List.fold_left(funinner_acc->funcomp_el->((matchcomp_elwithProvideSymbol(pol,name1,(size2,info,other))->(*let _ = errln ("Linker script defining symbol `" ^ name ^ "'")
in*)letdef=(symbol_def_for_provide_symbolname1size2infoothercontrol_script_linkable_idx)inletv=((matchPmap.lookupname1inner_accwithNone->[(def,pol)]|Somel->(def,pol)::l))inPmap.addname1vinner_acc|_->inner_acc)))(acc:(string,((symbol_definition*symbol_def_policy)list))Pmap.map)comp))(Pmap.emptycompare)outputs)in(* Now that we've made these definitions, what bindings are affected?
* We also use this opportunity to bind references to linker-generated symbols,
* such as _GLOBAL_OFFSET_TABLE_, since any definitions of these should now be merged
* into our inputs. *)(* bit of a HACK: reconstruct the linkable img and idx from the input items *)letidx_to_img=(List.fold_left(funacc_m->funitem->(matchitemwithCommon(idx1,_,img2,symdef)->Pmap.addidx1img2(Pmap.removeidx1acc_m)|InputSection(irec)->Pmap.addirec.idxirec.img(Pmap.removeirec.idxacc_m)))(Pmap.emptyNat_big_num.compare)inputs)inlet(lowest_idx:Nat_big_num.num)=((matchPset.min_elt_opt(Pmap.domainidx_to_img)withSomex->x|None->failwith"internal error: no linkable items"))inletfirst_linkable_item=((matchlinkableswithx::more->x|_->failwith"internal error: no linkables"))inlet(control_script_input_item:input_item)=("(built-in control script)",ControlScript,(BuiltinControlScript,[Builtin]))inlet(control_script_linkable_item:linkable_item)=(ControlScriptDefs,control_script_input_item,{item_fmt="";item_check_sections=false;item_copy_dt_needed=false;item_force_output=true})inletupdated_bindings_and_new_defs=(Pmap.map(funb_list_initial->Lem_list.map(fun(b_idx,b_initial)->let((iref_idx,iref,iref_item),maybe_idef)=b_initialin(*let _ = errln ("Looking for linker script or linker-generated defs of symbol `" ^ iref.ref_symname ^ "'")
in*)letpossible_script_defs=((matchPmap.lookupiref.ref_symnamescript_defs_by_namewithSomel->l|None->[]))inlet(possible_linker_generated_def:symbol_definitionoption)=(ifa.symbol_is_generated_by_linkeriref.ref_symnamethen(* can we find a definition by this name? *)((matchPmap.lookuplowest_idxidx_to_imgwithNone->failwith"no lowest idx found"|Someimg2->(matchList.filter(fundef->def.def_symname=iref.ref_symname)(defined_symbolsinstance_Basic_classes_Ord_Abis_any_abi_feature_dictinstance_Abi_classes_AbiFeatureTagEquiv_Abis_any_abi_feature_dictimg2)with[]->None|[def]->Some(def)|_->failwith("first linkable has multiple defs of name `"^(iref.ref_symname^"'")))))elseNone)in(* If the binding has no def, we always use the def we have.
* If the binding has a def, we use our def only if the policy is AlwaysDefine. *)(*let _ = errs ("Do we override binding " ^ (show b_idx) ^ ", symbol named `" ^
iref.ref_symname ^ "'? ")
in*)(* FIXME: check real semantics of defining symbols like '_GLOBAL_OFFSET_TABLE_' in linker script or input objects.
* This is really just a guess. *)letnew_b_and_maybe_new_def=((match(maybe_idef,possible_script_defs,possible_linker_generated_def)with|(_,[],None)->(*let _ = errln "no" in *)(((iref_idx,iref,iref_item),maybe_idef),None)|(None,[],Some(def))->(*let _ = errln "yes (was undefined)" in*)(((iref_idx,iref,iref_item),Some(lowest_idx,def,first_linkable_item)),Some(def))|(_,[(def,AlwaysDefine)],_)->(*let _ = errln "yes (linker script provides unconditional def)" in*)(((iref_idx,iref,iref_item),Some(control_script_linkable_idx,def,control_script_linkable_item)),Some(def))|(Someexisting_def,([(def,ProvideIfUsed)]),_)->(*let _ = errln "no" in*)(((iref_idx,iref,iref_item),Someexisting_def),None)|(None,[(def,ProvideIfUsed)],_)->(*let _ = errln "yes (linker script provides if-used def)" in*)(((iref_idx,iref,iref_item),Some(control_script_linkable_idx,def,control_script_linkable_item)),Some(def))|(_,pair1::pair2::more,_)->(*let _ = errln "error" in*)failwith"ambiguous symbol binding in linker control script"))in(b_idx,new_b_and_maybe_new_def))b_list_initial)initial_bindings_by_name)inlet(new_symbol_defs_map:(string,((symbol_definitionoption)list))Pmap.map)=(Pmap.map(funb_pair_list->Lem_list.map(fun(b_idx,(new_b,maybe_new_def))->maybe_new_def)b_pair_list)updated_bindings_and_new_defs)inlet(new_symbol_defs_by_name:(string,(symbol_definitionlist))Pmap.map)=(Pmap.map(funv->Lem_list.mapMaybeid0v)new_symbol_defs_map)in(* { List.mapMaybe id maybe_def_list | forall ((_, maybe_def_list) IN (Map.toSet new_symbol_defs_map)) | true }
in*)(*let new_symbol_defs = List.concat (Set_extra.toList new_symbol_def_list_set)
in*)letupdated_bindings=(Pmap.map(funb_pair_list->Lem_list.map(fun(b_idx,(new_b,maybe_new_def))->(b_idx,new_b))b_pair_list)updated_bindings_and_new_defs)in(new_symbol_defs_by_name,updated_bindings))in(*let _ = errln ("For __fini_array_end, we have " ^
(let all_bs = match Map.lookup "__fini_array_end" bindings_by_name with
Just l -> l
| Nothing -> []
end
in
((show (length all_bs)) ^
" bindings, of which " ^
(show (length (List.filter (fun (bi, ((ref_idx, ref, ref_item), maybe_def)) ->
match maybe_def with
Just _ -> true
| _ -> false
end
) all_bs))) ^ " have defs")))
in*)letoutputs_by_name=(letinsert_fun=(funm->(fun(OutputSectionSpec(guard,maybe_addr,name1,compos),script_idx)->Pmap.addname1((OutputSectionSpec(guard,maybe_addr,name1,compos)),script_idx)m))inList.fold_leftinsert_fun(Pmap.emptycompare)outputs)in(* Print the link map's "discarded input sections" output. *)(*let _ = Missing_pervasives.outln "\nDiscarded input sections\n"
in*)letdiscard_line=(funi->((matchiwithInputSection(s)->letlpadded_secname=(" "^s.secname)inlpadded_secname^((space_padding_and_maybe_newline((Nat_big_num.of_int16))lpadded_secname)^("0x0000000000000000"(* FIXME *)^(" 0x"^((hex_string_of_naturals.isec.elf64_section_size)^(" "^(s.fname^"\n"))))))|Common(idx1,fname1,img2,def)->""(* don't print discard lines for discarded commons *))))in(*let _ = Missing_pervasives.outs (List.foldl (fun str -> (fun input -> (str ^ (discard_line input)))) "" (reverse discards))
in*)letoutputs_by_name_after_gc=(compute_def_use_and_gc(AllocatedSectionsMapoutputs_by_name))in(*let _ = Missing_pervasives.outs "\nMemory Configuration\n\nName Origin Length Attributes\n*default* 0x0000000000000000 0xffffffffffffffff\n"
in
let _ = Missing_pervasives.outln "\nLinker script and memory map\n"
in*)(* FIXME: print LOAD and START_GROUP trace *)let(img2,outputs_by_name_with_position)=(build_imageaalloc_mapempty_elf_memory_image((Nat_big_num.of_int0))outputs_by_name_after_gcbindings_by_namelabelled_scriptcontrol_script_linkable_idxlinker_defs_by_name)in(*let _ = errln ("Final image has " ^ (show (Map.size img.elements)) ^ " elements and "
^ (show (Set.size img.by_tag)) ^ " metadata tags, of which " ^ (
let (section_tags, section_ranges) = elf_memory_image_section_ranges img
in
let section_tags_bare = List.map (fun tag ->
match tag with
| FileFeature(ElfSection(idx, isec)) -> (idx, isec)
| _ -> failwith "not section tag"
end) section_tags
in
show (length section_tags_bare)
) ^ " are sections.")
in*)(* The link map output for the section/address assignment basically mirrors our notion of
* output section composition. In the following:
0x0000000000400000 PROVIDE (__executable_start, 0x400000)
0x0000000000400190 . = (0x400000 + SIZEOF_HEADERS)
.interp
*(.interp)
.note.ABI-tag 0x0000000000400190 0x20
.note.ABI-tag 0x0000000000400190 0x20 crt1.o
.note.gnu.build-id
0x00000000004001b0 0x24
*(.note.gnu.build-id)
.note.gnu.build-id
0x00000000004001b0 0x24 crt1.o
.hash
*(.hash)
.gnu.hash
*(.gnu.hash)
... we can see that
- symbol provision, holes and output sections all get lines
- each output section appears with its name left-aligned, and its address,
if any, appearing afterwards; if so, the section's total size also follows.
- each input query is printed verbatim, e.g. "*(.note.gnu.build-id)"
- underneath this, a line is printed for each input section that was included,
with its address and size. This can spill onto a second line in the usual way.
- holes are shown as "*fill*"
- provided symbols are shown as in the linker script source.
PROBLEM: we don't have the script in source form, so we can't print the queries verbatim.
I should really annotate each query with its source form; when the script is parsed from source,
this can be inserted automatically. For the moment, what to do? I could annotate each script
element manually. For the moment, for diffing purposes, filter out lines with asterisks.
*)(img2,bindings_by_name))