Source file frontend.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
(*
 * Copyright (c) 2010-2013 Anil Madhavapeddy <anil@recoil.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *)

open Lwt.Infix

module Gntref = Xen_os.Xen.Gntref
module Export = Xen_os.Xen.Export

let src = Logs.Src.create "net-xen frontend" ~doc:"Mirage's Xen netfront"
module Log = (val Logs.src_log src : Logs.LOG)

let return = Lwt.return

let allocate_ring ~domid =
  let page = Io_page.get 1 in
  let x = Io_page.to_cstruct page in
  Export.get ()
  >>= fun gnt ->
  for i = 0 to Cstruct.length x - 1 do
    Cstruct.set_uint8 x i 0
  done;
  Export.grant_access ~domid ~writable:true gnt page;
  return (gnt, x)

let create_ring ~domid ~idx_size name =
  allocate_ring ~domid
  >>= fun (rx_gnt, buf) ->
  let sring = Ring.Rpc.of_buf ~buf ~idx_size ~name in
  let fring = Ring.Rpc.Front.init ~sring in
  let client = Lwt_ring.Front.init string_of_int fring in
  return (rx_gnt, fring, client)

let create_rx (id, domid) =
  create_ring ~domid ~idx_size:RX.total_size (Printf.sprintf "Netif.RX.%d" id)
let create_tx (id, domid) =
  create_ring ~domid ~idx_size:TX.total_size (Printf.sprintf "Netif.TX.%d" id)

module Make(C: S.CONFIGURATION) = struct
  type error = Mirage_net.Net.error
  let pp_error = Mirage_net.Net.pp_error

  type transport = {
    vif_id: int;
    backend_id: int;
    backend: string;      (* Path in XenStore *)
    mac: Macaddr.t;
    mtu: int;

    (* To transmit, we take half-pages from [Shared_page_pool], copy the data to them,
       and push the ref to the ring. *)
    tx_client: (TX.Response.t,int) Lwt_ring.Front.t;
    tx_gnt: Gntref.t;
    tx_mutex: Lwt_mutex.t; (* Held to avoid signalling between fragments *)
    tx_pool: Shared_page_pool.t;

    (* To receive, we share set of whole pages with the backend. We put the details of
       these grants in the rx_ring and wait to be notified that they've been used. *)
    rx_fring: (RX.Response.t,int) Ring.Rpc.Front.t;
    rx_client: (RX.Response.t,int) Lwt_ring.Front.t;
    rx_map: (int, Gntref.t * Io_page.t) Hashtbl.t;
    rx_gnt: Gntref.t;
    mutable rx_id: Cstruct.uint16;
    mutable free_pages: Io_page.t list;

    evtchn: Xen_os.Eventchn.t;
    features: Features.t;
    stats : Mirage_net.stats;
  }

  type t = {
    mutable t: transport;
    l : Lwt_mutex.t;
    c : unit Lwt_condition.t;
  }

  let h = Xen_os.Eventchn.init ()

  (* Given a VIF ID, construct a netfront record for it *)
  let plug_inner vif_id =
    let id = `Client vif_id in
    (* Read details about the device *)
    C.read_backend id >>= fun backend_conf ->
    let backend_id = backend_conf.S.backend_id in
    Log.info (fun f -> f "create: id=%d domid=%d" vif_id backend_id);
    let features = backend_conf.S.features_available in
    Log.info Features.(fun f -> f " sg:%b gso_tcpv4:%b rx_copy:%b rx_flip:%b smart_poll:%b"
      features.sg features.gso_tcpv4 features.rx_copy features.rx_flip features.smart_poll);
    C.read_frontend_mac id >>= fun mac ->
    Log.info (fun f -> f "MAC: %s" (Macaddr.to_string mac));
    (* Allocate a transmit and receive ring, and event channel *)
    create_rx (vif_id, backend_id)
    >>= fun (rx_gnt, rx_fring, rx_client) ->
    create_tx (vif_id, backend_id)
    >>= fun (tx_gnt, _tx_fring, tx_client) ->
    let tx_mutex = Lwt_mutex.create () in
    let evtchn = Xen_os.Eventchn.bind_unbound_port h backend_id in
    let evtchn_port = Xen_os.Eventchn.to_int evtchn in
    (* Write Xenstore info and set state to Connected *)
    let front_conf = { S.
      tx_ring_ref = Gntref.to_int32 tx_gnt;
      rx_ring_ref = Gntref.to_int32 rx_gnt;
      event_channel = string_of_int (evtchn_port);
      feature_requests = { Features.
        rx_copy = true;
        rx_flip = false;
        rx_notify = true;
        sg = true;
        gso_tcpv4 = false;
        smart_poll = false;
      };
    } in
    C.write_frontend_configuration id front_conf >>= fun () ->
    C.connect id >>= fun () ->
    (* Wait for backend to accept connection *)
    let rx_map = Hashtbl.create 1 in
    C.wait_until_backend_connected backend_conf >>= fun () ->
    Xen_os.Eventchn.unmask h evtchn;
    let stats = Stats.create () in
    let grant_tx_page = Export.grant_access ~domid:backend_id ~writable:false in
    let tx_pool = Shared_page_pool.make grant_tx_page in
    (* Register callback activation *)
    let backend = backend_conf.S.backend in
    C.read_mtu id >>= fun mtu ->
    return { vif_id; backend_id; tx_client; tx_gnt; tx_mutex; tx_pool;
             rx_gnt; rx_fring; rx_client; rx_map; rx_id = 0 ; stats;
             evtchn; mac; mtu; backend; features;
             free_pages = Io_page.to_pages (Io_page.get 256); (* Allocate 256*4kB=1MB of free_pages*)
           }

  (** Set of active block devices *)
  let devices : (int, t) Hashtbl.t = Hashtbl.create 1

  let notify nf () =
    Xen_os.Eventchn.notify h nf.evtchn

  let take_pages t n =
    (* returns the first n elements of l, and l without them, assumes than l is long enough *)
    let rec split_list l n acc = match n, l with
        | 0, _ -> acc, l
        | n, [] -> failwith (Printf.sprintf "Frontend wants %d pages, this is too much, fail." n) (* We assume l is long enough *)
        | n, hd::tl -> split_list tl (n-1) (hd::acc)
    in
    let fp = t.free_pages in
    let pages, new_free_pages = split_list fp n [] in
    t.free_pages <- new_free_pages ;
    pages

  external unsafe_fill_bigstring : Io_page.t -> int -> int -> int -> unit = "caml_fill_bigstring" [@@noalloc]

  let return_page t p =
    unsafe_fill_bigstring p 0 Io_page.page_size 0 ;
    t.free_pages <- p::t.free_pages

  let refill_requests nf =
    let num = Ring.Rpc.Front.get_free_requests nf.rx_fring in
    if num > 0 then
      Export.get_n num
      >>= fun grefs ->
      let pages = take_pages nf num in
      List.iter
        (fun (gref, page) ->
           let rec next () =
             let id = nf.rx_id in
             nf.rx_id <- (succ nf.rx_id) mod (1 lsl 16) ;
             if Hashtbl.mem nf.rx_map id then next () else id
           in
           let id = next () in
           Export.grant_access ~domid:nf.backend_id ~writable:true gref page;
           Hashtbl.add nf.rx_map id (gref, page);
           let slot_id = Ring.Rpc.Front.next_req_id nf.rx_fring in
           let slot = Ring.Rpc.Front.slot nf.rx_fring slot_id in
           ignore(RX.Request.(write {id; gref = Gntref.to_int32 gref}) slot)
        ) (List.combine grefs pages);
      if Ring.Rpc.Front.push_requests_and_check_notify nf.rx_fring
      then notify nf ();
      return ()
    else return ()

  let pop_rx_page nf id =
    let gref, page = Hashtbl.find nf.rx_map id in
    Hashtbl.remove nf.rx_map id;
    Export.end_access ~release_ref:true gref >>= fun () ->
    Lwt.return page

  let rx_poll nf fn =
    let module Recv = Assemble.Make(RX.Response) in
    let q = ref [] in
    Ring.Rpc.Front.ack_responses nf.rx_fring (fun slot ->
      match RX.Response.read slot with
      | Error msg -> failwith msg
      | Ok req -> q := req :: !q
    );
    List.rev !q
    |> Recv.group_frames
    |> Lwt_list.iter_s (function
      | Error (e, msgs) ->
          Log.err (fun f -> f "received error: %d" e);
          msgs |> Lwt_list.iter_s (fun msg ->
            pop_rx_page nf msg.RX.Response.id >>= fun (page : Io_page.t) ->
            return_page nf page ;
            Lwt.return_unit
          )
      | Ok frame ->
          let data = Cstruct.create frame.Recv.total_size in
          let next = ref 0 in
          frame.Recv.fragments |> Lwt_list.iter_s (fun {Recv.size; msg} ->
            let {RX.Response.id; size = _; flags = _; offset} = msg in
            pop_rx_page nf id >|= fun page ->
            let buf = Io_page.to_cstruct page in
            Cstruct.blit buf offset data !next size;
            return_page nf page ;
            next := !next + size
          ) >|= fun () ->
          assert (!next = Cstruct.length data);
          Lwt.async (fun () ->
            Stats.rx nf.stats (Int64.of_int (Cstruct.length data));
            fn data)
    )

  let tx_poll nf =
    Lwt_ring.Front.poll nf.tx_client (fun slot ->
      let resp = TX.Response.read slot in
      (resp.TX.Response.id, resp)
    )

  let listen nf ~header_size:_ receive_callback =
    let rec loop from =
      rx_poll nf.t receive_callback >>= fun () ->
      refill_requests nf.t >>= fun () ->
      tx_poll nf.t;
      Xen_os.Activations.after nf.t.evtchn from >>= fun from ->
      loop from
    in
    loop Xen_os.Activations.program_start

  let connect id =
    (* If [id] is an integer, use it. Otherwise, return an error message
       which enumerates the available interfaces. *)
    let id' =
      try Some (int_of_string id) with _ -> None
    in
    match id' with
    | Some id' -> begin
        if Hashtbl.mem devices id' then
          return (Hashtbl.find devices id')
        else begin
          Log.info (fun f -> f "connect %d" id');
          plug_inner id' >>= fun t ->
          let l = Lwt_mutex.create () in
          let c = Lwt_condition.create () in
          (* packets are dropped until listen is called *)
          let dev = { t; l; c } in
          Hashtbl.add devices id' dev;
          return dev
        end
      end
    | None ->
      C.enumerate () >>= fun all ->
      let msg =
        Printf.sprintf "device %s not found (available = [ %s ])"
          id (String.concat ", " all)
      in
      Lwt.fail_with msg

  (* Unplug shouldn't block, although the Xen one might need to due
     to Xenstore? XXX *)
  let disconnect t =
    Log.info (fun f -> f "disconnect");
    (* TODO: free pages still in [t.rx_map] *)
    Shared_page_pool.shutdown t.t.tx_pool;
    Hashtbl.remove devices t.t.vif_id;
    return ()

  (* Push up to one page's worth of data to the ring, but without sending an
   * event notification. Once the data has been added to the ring, returns the
   * remaining (unsent) data and a thread which will return when the data has
   * been ack'd by netback. *)
  let write_request ?size ~flags nf datav =
    Shared_page_pool.use nf.t.tx_pool (fun ~id gref shared_block ->
      let len, datav = Cstruct.fillv ~src:datav ~dst:shared_block in
      (* [size] includes extra pages to follow later *)
      let size = match size with |None -> len |Some s -> s in
      Stats.tx nf.t.stats (Int64.of_int size);
      let request = { TX.Request.
        id;
        gref = Gntref.to_int32 gref;
        offset = shared_block.Cstruct.off;
        flags;
        size
      } in
      Lwt_ring.Front.write nf.t.tx_client
          (fun slot -> TX.Request.write request slot; id) >>= fun replied ->
      (* request has been written; when replied returns we have a reply *)
      let release = replied >>= fun reply ->
        let open TX.Response in
        match reply.status with
        | DROPPED -> failwith "Netif: backend dropped our frame"
        | NULL -> failwith "Netif: NULL response"
        | ERROR -> failwith "Netif: ERROR response"
        | OKAY -> return () in
      return (datav, release)
    )

  (* Transmit a packet applying fillf
   * The buffer's data must fit in a single block. *)
  let write_already_locked nf ~size fillf =
    Shared_page_pool.use nf.t.tx_pool (fun ~id gref shared_block ->
        Cstruct.memset shared_block 0;
        let len = fillf (Cstruct.sub shared_block 0 size) in
        if len > size then failwith "length exceeds size" ;
        Stats.tx nf.t.stats (Int64.of_int len);
        let request = { TX.Request.
          id;
          gref = Gntref.to_int32 gref;
          offset = shared_block.Cstruct.off;
          flags = Flags.empty;
          size = len
        } in
        Lwt_ring.Front.write nf.t.tx_client
          (fun slot -> TX.Request.write request slot; id) >>= fun replied ->
        (* request has been written; when replied returns we have a reply *)
        let release = replied >>= fun reply ->
          let open TX.Response in
          match reply.status with
          | DROPPED -> failwith "Netif: backend dropped our frame"
          | NULL -> failwith "Netif: NULL response"
          | ERROR -> failwith "Netif: ERROR response"
          | OKAY -> return () in
        return ((), release)) >>= fun ((), th) ->
    Lwt_ring.Front.push nf.t.tx_client (notify nf.t);
    return th

  (* Transmit a packet from a list of pages *)
  let write_no_retry nf ~size fillf =
    let numneeded = Shared_page_pool.blocks_needed size in
    Lwt_mutex.with_lock nf.t.tx_mutex
      (fun () ->
         Lwt_ring.Front.wait_for_free nf.t.tx_client numneeded >>= fun () ->
         match numneeded with
         | 0 -> return (return ())
         | 1 ->
           (* If there is only one block, then just write it normally *)
           write_already_locked nf ~size fillf
         | n ->
           let datav = Cstruct.create size in
           let len = fillf datav in
           if len > size then failwith "length exceeds total size" ;
           let datav = Cstruct.sub datav 0 len in
           (* For Xen Netfront, the first fragment contains the entire packet
            * length, which the backend will use to consume the remaining
            * fragments until the full length is satisfied *)
           write_request ~flags:Flags.more_data ~size:len nf [datav]
           >>= fun (datav, first_th) ->
           let rec xmit datav = function
             | 0 -> return []
             | 1 ->
                 write_request ~flags:Flags.empty nf datav
                 >>= fun (datav, th) ->
                 assert (Cstruct.lenv datav = 0);
                 return [ th ]
             | n ->
                 write_request ~flags:Flags.more_data nf datav
                 >>= fun (datav, next_th) ->
                 xmit datav (n - 1)
                 >>= fun rest ->
                 return (next_th :: rest) in
           xmit datav (n - 1)
           >>= fun rest_th ->
           (* All fragments are now written, we can now notify the backend *)
           Lwt_ring.Front.push nf.t.tx_client (notify nf.t);
           return (Lwt.join (first_th :: rest_th))
      )

  let rec write nf ~size fillf =
    Lwt.catch
      (fun () -> write_no_retry nf ~size fillf)
      (function
        | Lwt_ring.Shutdown -> return (Lwt.fail Lwt_ring.Shutdown)
        | e -> Lwt.fail e)
    >>= fun released ->
    Lwt.on_failure released (function
        | Lwt_ring.Shutdown -> ignore (write nf ~size fillf)
        | ex -> raise ex
      );
    return (Ok ())

  (* The Xenstore MAC address is colon separated, very helpfully *)
  let mac nf = nf.t.mac
  let mtu nf = nf.t.mtu

  let get_stats_counters t = t.t.stats

  let reset_stats_counters t = Stats.reset t.t.stats
end