123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181(**************************************************************************)(* This file is part of the Codex semantics library. *)(* *)(* Copyright (C) 2013-2025 *)(* CEA (Commissariat à l'énergie atomique et aux énergies *)(* alternatives) *)(* *)(* you can redistribute it and/or modify it under the terms of the GNU *)(* Lesser General Public License as published by the Free Software *)(* Foundation, version 2.1. *)(* *)(* It is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* See the GNU Lesser General Public License version 2.1 *)(* for more details (enclosed in the file LICENSE). *)(* *)(**************************************************************************)openPrintf(** The goal of this file is to inline binary files in a HTML file,
where the binary file will be transformed into a Javascript
Uint8Array. This simplify UI design as no web server, request,
protocol etc. is needed to access the data, and it works with
relatively large files (up to 2GB on Chromium).
The general strategy is the following:
- We generate the binary data as chunks of base64 text inside script tags, e.g.
<script id="binary-data0" type="application/octet-stream">aXhHSU0vNVp4Uno5d3...V2aFJKSUpkT</script>
<script id="binary-data1" type="application/octet-stream">QCBAgYBEQAmEQEAwAA...AQQoIIIAASk</script>
The fact that we put it inside script tag means that it won't be displayed.
- We generate some code that takes all these chunks and fill a
variable containing javascript array from it, in another script
tag, so that the array is available from the browser.
<script>const dataArray = ... load array ...</script>
The performance of doing so is OK; e.g., we need 30s to load 2GB of binary data into Firefox.
Note that Chrome has a limitation that forbids it to allocate arrays of size > 2GB.
Ideally, we could use wasm to do the same thing more efficiently. *)(* Note: we could have a more efficient encoding for text files, where
it suffices to escape the text instead of doing a base64
encoing. But we can just consider everything a binary file. *)letchunk_size=1024*1024(* 1 megabyte. This chunk size was chosen arbitrarily. *)(** This preamble contains the function that turn a binary chunk into
the array. FILE2HTML__setBase64 copy the binary chunk to the array at
offset offset. FILE2HTML__CreateArray allocates an array of size
[totalSize] filled with data labeled with [id].
Note: Unfortunately, we need an intermediary conversion pass to
javascript strings, where each character is represented as a
short. *)letpreamble=(sprintf{|
<script>
function FILE2HTML__setBase64(targetArray, offset = 0, id) {
const element = document.getElementById(id);
const base64String = element.textContent;
const binaryString = atob(base64String);
const length = binaryString.length;
for (let i = 0; i < length; i++) {
targetArray[offset + i] = binaryString.charCodeAt(i);
}
element.remove();
}
function FILE2HTML__CreateArray(totalSize, id){
let max_offset = totalSize / %d;
const array = new Uint8Array(totalSize);
for(let i = 0; i < max_offset; i++) FILE2HTML__setBase64(array,i*%d,id.concat(i));
return array;
}
</script>
|}chunk_sizechunk_size);;(* Write in [outc] a sequence of <script id=%idnum>base64</script>
corresponding to the content of filename. Also return the length
of the file/string as it will be useful for the javascript part. *)letgenerate_dataoutcfilenameid=letinc=open_in_binfilenameinletlen=in_channel_lengthincinletbuffer=Bytes.createchunk_sizeinletrecloopioffset=ifoffset>=lenthen()elseletbytes_to_read=minchunk_size(len-offset)in(* Use a buffer of appropriate size: we don't want to print the
full buffer if we don't use it. *)letbuffer=ifbytes_to_read=chunk_sizethenbufferelseBytes.createbytes_to_readinPrintf.printf"bytes_to_read %d chunk_size %d len-offset %d\n"bytes_to_readchunk_size(len-offset);really_inputincbuffer0bytes_to_read;Printf.fprintfoutc"<script id=\"%s%d\" type=\"application/octet-stream\">%s</script>\n"idi(Base64.encode_string(Bytes.unsafe_to_stringbuffer));loop(i+1)(offset+chunk_size)inloop00;len(** Generate Javascript that loads (and delete) the data in chunks
named [id], and put it in variable [varid], which is a Uint8Array
of length [len]. *)letgenerate_js_loading_dataoutc~varid~idlen=Printf.fprintfoutc"<script>const %s = FILE2HTML__CreateArray(%d,\"%s\");</script>"varidlenid(** Generate HTML script in [outc] such that the content of [filename]
will be placed in the variable [varid], which is a Uint8Array. *)letfile_to_htmloutc~filename~varid=letid="__file2html__"^varidinletlen=generate_dataoutcfilenameidingenerate_js_loading_dataoutc~varid~idlen;;(* Sample code that shows how to use file_to_html *)let%test_module"File_to_HTML"=(modulestruct(* Generate the full HTML *)letgenerate_htmloutcfilename=Printf.fprintfoutc{|
<!DOCTYPE html>
<html lang="en">
<head>
<title>File_to_html test</title>
</head>
<body>
|};Printf.fprintfoutc"%s"preamble;letlen=generate_dataoutcfilename"binary-data"ingenerate_js_loading_dataoutc~varid:"dataArray"~id:"binary-data"len;Printf.fprintfoutc{|
<h1>File_to_html test</h1>
<p id="result"></p> <!-- Make room for the display of the result. -->
<script>
// Traverse the ArrayBuffer to count bytes with the value 11 (0x0B)
let count = 0;
for (let i = 0; i < dataArray.length; i++) {
if (dataArray[i] === 11) {
count++;
}
}
// Display the result
document.getElementById("result").textContent =
`The concatenated ArrayBuffer contains ${count} bytes with the value 11 (0x0B).`;
</script>
</body>
</html>
|};;(* Main program *)let()=letinput_file="file_to_html.mli"inletoutput_file="file_to_html.mli.html"in(* Generate HTML *)letoutc=open_outoutput_fileinlet()=generate_htmloutcinput_fileinclose_outoutc;printf"HTML file '%s' generated successfully.\n"output_fileend)