Source file true_dual_port_ram.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
open Base
open Hardcaml
open Signal
module Tdpram = Xpm.Xpm_memory_tdpram

(* Block RAM - address collision behaviour.  UG573, table 1-3, common clocks.

   For a given mode on port a and b, and read/write enables on each port, what is the
   resulting value on the data out ports, and stored in memory?

   {v
   port a     port b   wea       web        doa   dob   mem
  RF/WF/NC | RF/WF/NC | 0       | 0       | OLD | OLD | NC
  RF       | RF/WF/NC | 1 (DIA) | 0       | OLD | OLD | DIA
  WF       | RF/WF/NC | 1 (DIA) | 0       | DIA | X   | DIA
  NC       | RF/WF/NC | 1 (DIA) | 0       | NC  | X   | DIA
  RF/WF/NC | RF       | 0       | 1 (DIB) | OLD | OLD | DIB
  RF/WF/NC | WF       | 0       | 1 (DIB) | X   | DIB | DIB
  RF/WF/NC | NC       | 0       | 1 (DIB) | X   | NC  | DIB
  RF/WF/NC | RF/WF/NC | 1       | 1       | X   | X   | X
v}

   RF     = Read first
   WF     = Write first
   NC     = No change
   OLD    = Old values stored in memory
   DIA/B  = Data in A or B
   X      = Invalid
   we[ab] = write when [1], read when [0]
   In all cases the addresses ports a and b are the same value.
*)

(* Ultra RAM

   These work differently. They have 2 port [a] and [b]. The RAM is "double pumped" - that
   is it works at twice the nominal clock rate and performs the [a] operation followed by
   the [b] operation.

   On a [write] operation, the output data on the same port is unchanged. Somewhat similar
   to [no_change] mode.

   Across ports, the behavior depends on the ordering of ports ie Write [a], will be
   reflected on read [b]. But not the other way round.
*)

let any t = tree ~arity:2 ~f:(reduce ~f:( |: )) (Signal.bits_msb t)

let collision_mode (arch : Ram_arch.t) : Collision_mode.t =
  match arch with
  | Distributed -> Read_before_write
  | Blockram mode -> mode
  | Ultraram -> No_change
;;

let create_xpm
      ~read_latency
      ~arch
      ~clock_a
      ~clock_b
      ~clear_a
      ~clear_b
      ~size
      ~byte_write_width
      ~(port_a : _ Ram_port.t)
      ~(port_b : _ Ram_port.t)
      ~cascade_height:arg_cascade_height
      ~memory_optimization:arg_memory_optimization
  =
  let byte_write_width =
    match byte_write_width with
    | Byte_write_width.B8 -> 8
    | B9 -> 9
    | Full -> width port_a.data
  in
  let module Params = struct
    include Tdpram.P

    let memory_optimization =
      match arg_memory_optimization with
      | None -> memory_optimization
      | Some false -> "false"
      | Some true -> "true"
    ;;

    let cascade_height =
      match arg_cascade_height with
      | None -> cascade_height
      | Some arg_cascade_height -> Cascade_height.to_xpm_args arg_cascade_height
    ;;

    let width = width port_a.data
    let addr_bits = Bits.address_bits_for size
    let write_data_width_a = width
    let write_data_width_b = width
    let byte_write_width_a = byte_write_width
    let byte_write_width_b = byte_write_width
    let read_data_width_a = width
    let read_data_width_b = width
    let addr_width_a = addr_bits
    let addr_width_b = addr_bits
    let memory_size = width * size
    let memory_primitive = Ram_arch.to_xpm_parameter arch
    let read_latency_a = read_latency
    let read_latency_b = read_latency
    let use_mem_init = 0
    let write_mode_a = Collision_mode.to_xpm_parameter (collision_mode arch)
    let write_mode_b = write_mode_a
  end
  in
  let write_enable_width =
    assert (width port_a.data % byte_write_width = 0);
    width port_a.data / byte_write_width
  in
  assert (read_latency > 0);
  assert (width port_a.data = width port_b.data);
  assert (Params.addr_bits = width port_a.address);
  assert (Params.addr_bits = width port_b.address);
  assert (write_enable_width = width port_a.write_enable);
  assert (write_enable_width = width port_b.write_enable);
  let module RAM = Tdpram.Make (Params) in
  let ena = any port_a.write_enable |: port_a.read_enable in
  let enb = any port_b.write_enable |: port_b.read_enable in
  let regce clock en =
    let spec = Reg_spec.create () ~clock in
    match read_latency with
    | 1 -> vdd
    | n -> pipeline spec ~enable:vdd ~n:(n - 1) en
  in
  let ram : _ RAM.O.t =
    RAM.create
      { RAM.I.clka (* Port A *) = clock_a
      ; rsta = clear_a
      ; regcea = regce clock_a port_a.read_enable
      ; ena
      ; wea = port_a.write_enable
      ; dina = port_a.data
      ; addra = port_a.address
      ; injectsbiterra = gnd
      ; injectdbiterra = gnd (* Port B *)
      ; clkb = clock_b
      ; rstb = clear_b
      ; regceb = regce clock_b port_b.read_enable
      ; enb
      ; web = port_b.write_enable
      ; dinb = port_b.data
      ; addrb = port_b.address
      ; injectsbiterrb = gnd
      ; injectdbiterrb = gnd
      ; sleep = gnd
      }
  in
  ram.douta, ram.doutb
;;

let rec output_pipe ~clock ~clear ~latency ~enable d =
  let spec = Reg_spec.create () ~clock in
  let spec_c = Reg_spec.create () ~clock ~clear in
  match latency with
  | 0 -> d
  | 1 -> reg spec_c ~enable:(reg spec ~enable:vdd enable) d
  | _ ->
    output_pipe
      ~clock
      ~clear
      ~latency:(latency - 1)
      ~enable:(reg spec ~enable:vdd enable)
      (reg spec ~enable:vdd d)
;;

(* This is very similar to rams built with [Ram.create]. The main difference is when
   modelling ultrarams. To get the correct behaviour for a write on one port and read on
   the other port, we must put port [a] into [Read_before_write] mode, and port [b] into
   [Write_before_read] mode. *)
let create_base_rtl_ram
      ~(arch : Ram_arch.t)
      ~clock_a
      ~clock_b
      ~size
      ~(port_a : _ Ram_port.t)
      ~(port_b : _ Ram_port.t)
  =
  let reg clock enable = reg (Reg_spec.create ~clock ()) ~enable in
  let read_enable (port : _ Ram_port.t) =
    match collision_mode arch with
    | No_change -> port.read_enable &: ~:(port.write_enable)
    | Read_before_write | Write_before_read -> port.read_enable |: port.write_enable
  in
  let reg_a = reg clock_a (read_enable port_a) in
  let reg_b = reg clock_b (read_enable port_b) in
  let f_read_address, f_q =
    match arch with
    | Ultraram -> [| Fn.id; reg_b |], [| reg_a; Fn.id |]
    | Distributed | Blockram (Read_before_write | No_change) ->
      [| Fn.id; Fn.id |], [| reg_a; reg_b |]
    | Blockram Write_before_read -> [| reg_a; reg_b |], [| Fn.id; Fn.id |]
  in
  let q =
    Signal.multiport_memory
      size
      ~write_ports:
        [| { write_clock = clock_a
           ; write_enable = port_a.write_enable
           ; write_address = port_a.address
           ; write_data = port_a.data
           }
         ; { write_clock = clock_b
           ; write_enable =
               (match arch with
                | Distributed -> gnd
                (* Distributed RAM will not write on port B. *)
                | Blockram _ | Ultraram -> port_b.write_enable)
           ; write_address = port_b.address
           ; write_data = port_b.data
           }
        |]
      ~read_addresses:
        (Array.map2_exn f_read_address [| port_a.address; port_b.address |] ~f:(fun f a ->
           f a))
  in
  Array.map2_exn f_q q ~f:(fun f q -> f q)
;;

let create_rtl'
      ~read_latency
      ~arch
      ~clock_a
      ~clock_b
      ~clear_a
      ~clear_b
      ~size
      ~(port_a : _ Ram_port.t)
      ~(port_b : _ Ram_port.t)
  =
  assert (read_latency > 0);
  let q = create_base_rtl_ram ~arch ~clock_a ~clock_b ~size ~port_a ~port_b in
  ( output_pipe
      ~clock:clock_a
      ~clear:clear_a
      ~latency:(read_latency - 1)
      ~enable:port_a.read_enable
      q.(0)
  , output_pipe
      ~clock:clock_b
      ~clear:clear_b
      ~latency:(read_latency - 1)
      ~enable:port_b.read_enable
      q.(1) )
;;

(* Instantiate the core rtl ram multiple times so that it can support byte enables.*)
let create_rtl
      ~read_latency
      ~arch
      ~clock_a
      ~clock_b
      ~clear_a
      ~clear_b
      ~size
      ~(byte_write_width : Byte_write_width.t)
      ~(port_a : _ Ram_port.t)
      ~(port_b : _ Ram_port.t)
  =
  let split_port (port : _ Ram_port.t) =
    let split_port byte_width =
      let data = split_lsb ~part_width:byte_width port.data in
      let write_enable = bits_lsb port.write_enable in
      List.map2_exn data write_enable ~f:(fun data write_enable ->
        { port with data; write_enable })
    in
    match byte_write_width with
    | Full -> [ port ]
    | B8 -> split_port 8
    | B9 -> split_port 9
  in
  let qs =
    List.map2_exn (split_port port_a) (split_port port_b) ~f:(fun port_a port_b ->
      create_rtl'
        ~read_latency
        ~arch
        ~clock_a
        ~clock_b
        ~clear_a
        ~clear_b
        ~size
        ~port_a
        ~port_b)
  in
  let qa, qb = List.unzip qs in
  concat_lsb qa, concat_lsb qb
;;

let create
      ?(read_latency = 1)
      ?(arch = Ram_arch.Blockram No_change)
      ?(byte_write_width = Byte_write_width.Full)
      ?memory_optimization
      ?cascade_height
      ~(build_mode : Build_mode.t)
      ()
  =
  match build_mode with
  | Simulation -> create_rtl ~read_latency ~arch ~byte_write_width
  | Synthesis ->
    create_xpm ~read_latency ~arch ~byte_write_width ~cascade_height ~memory_optimization
;;