Source file segment.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708

let src = Logs.Src.create "tcp.segment" ~doc:"TCP segment"
module Log = (val Logs.src_log src : Logs.LOG)

let header_size = 20

let guard f e = if f then Ok () else Error e

let ( let* ) = Result.bind

(* looks like a good use case for gmap ;) *)
type tcp_option =
  | MaximumSegmentSize of int
  | WindowScale of int
  | Unknown of int * Cstruct.t

let equal_option a b = match a, b with
  | MaximumSegmentSize a, MaximumSegmentSize b -> a = b
  | WindowScale a, WindowScale b -> a = b
  | Unknown (t, v), Unknown (t', v') -> t = t' && Cstruct.equal v v'
  | _ -> false

let pp_option ppf = function
  | MaximumSegmentSize mss -> Fmt.pf ppf "MSS %d" mss
  | WindowScale f -> Fmt.pf ppf "window scale %d" f
  | Unknown (typ, v) -> Fmt.pf ppf "typ %X value %a" typ Cstruct.hexdump_pp v

let encode_option buf off = function
  | MaximumSegmentSize mss ->
    Cstruct.set_uint8 buf off 2;
    Cstruct.set_uint8 buf (off + 1) 4;
    Cstruct.BE.set_uint16 buf (off + 2) mss;
    off + 4
  | WindowScale f ->
    Cstruct.set_uint8 buf off 3;
    Cstruct.set_uint8 buf (off + 1) 3;
    Cstruct.set_uint8 buf (off + 2) f;
    off + 3
  | Unknown (typ, data) ->
    let len = Cstruct.length data in
    Cstruct.set_uint8 buf off typ;
    Cstruct.set_uint8 buf (off + 1) len;
    Cstruct.blit data 0 buf (off + 2) len;
    off + len + 2

let length_options opts =
  let needed =
    List.fold_left (fun l -> function
        | MaximumSegmentSize _ -> 4 + l
        | WindowScale _ -> 3 + l
        | Unknown (_, data) -> Cstruct.length data + 2 + l)
      0 opts
  in
  ((needed + 3) lsr 2) lsl 2

let encode_options buf off opts =
  List.fold_left (encode_option buf) off opts

let decode_option data =
  match Cstruct.get_uint8 data 0 with
  | 0 -> (* End of option, all remaining bytes must be 0 *)
    Ok (None, Cstruct.length data)
  | 1 -> (* No operation *) Ok (None, 1)
  | 3 ->
    let* () =
      guard (Cstruct.length data >= 3) (`Msg "window scale shorter than 3 bytes")
    in
    let* () =
      guard (Cstruct.get_uint8 data 1 = 3) (`Msg "window scale length not 3")
    in
    Ok (Some (WindowScale (Cstruct.get_uint8 data 2)), 3)
  | x ->
    let* () =
      guard (Cstruct.length data >= 2) (`Msg "option shorter than 2 bytes")
    in
    let l = Cstruct.get_uint8 data 1 in
    let* () = guard (Cstruct.length data >= l) (`Msg "option too short") in
    let* () =
      if l < 2 then
        let msg =
          Fmt.str "option length must be >= 2, got %u, full option %a"
            l Cstruct.hexdump_pp data
        in
        Error (`Msg msg)
      else
        Ok ()
    in
    match x with
    | 2 ->
      let* () =
        guard (l = 4) (`Msg "maximum segment size must be at least 4 bytes")
      in
      let mss = Cstruct.BE.get_uint16 data 2 in
      Ok (Some (MaximumSegmentSize mss), 4)
    | _ ->
      Ok (Some (Unknown (x, Cstruct.sub data 2 (l - 2))), l)

let decode_options data =
  let l = Cstruct.length data in
  let rec go idx acc =
    if l = idx then
      Ok acc
    else
      let* data, consumed = decode_option (Cstruct.shift data idx) in
      let acc' = match data with None -> acc | Some x -> x :: acc in
      go (idx + consumed) acc'
  in
  go 0 []

module Flag = struct
  type flags =
    | FIN
    | SYN
    | RST
    | PSH
    | ACK

  let number =
    let fin = 1 lsl 0
    and syn = 1 lsl 1
    and rst = 1 lsl 2
    and psh = 1 lsl 3
    and ack = 1 lsl 4
    in
    function
    | FIN -> fin
    | SYN -> syn
    | RST -> rst
    | PSH -> psh
    | ACK -> ack

  let decode byte =
    let ack = number ACK land byte > 0
    and psh = number PSH land byte > 0
    and fin = number FIN land byte > 0
    and syn = number SYN land byte > 0
    and rst = number RST land byte > 0
    in
    let* flag =
      match fin, syn, rst with
      | true, false, false -> Ok (Some `Fin)
      | false, true, false -> Ok (Some `Syn)
      | false, false, true -> Ok (Some `Rst)
      | false, false, false -> Ok None
      | _ -> Error (`Msg (Fmt.str "invalid flag combination: %02X" byte))
    in
    Ok (ack, flag, psh)

  let encode (ack, flag, psh) =
    (if ack then number ACK else 0) +
    (if psh then number PSH else 0) +
    (match flag with
     | None -> 0
     | Some `Fin -> number FIN
     | Some `Syn -> number SYN
     | Some `Rst -> number RST)

  let eq a b = match a, b with
    | None, None -> true
    | Some a, Some b ->
      begin match a, b with
        | `Syn, `Syn | `Fin, `Fin | `Rst, `Rst -> true
        | _ -> false
      end
    | _ -> false

  let pp ppf = function
    | None -> Fmt.string ppf ""
    | Some `Syn -> Fmt.string ppf "S"
    | Some `Fin -> Fmt.string ppf "F"
    | Some `Rst -> Fmt.string ppf "R"
end

type t = {
  src_port : int ;
  dst_port : int ;
  seq : Sequence.t ;
  ack : Sequence.t option ;
  flag : [ `Syn | `Fin | `Rst ] option ;
  push : bool ;
  window : int ;
  options : tcp_option list ;
  payload : string list ;
  payload_len : int ;
}

let equal a b =
  a.src_port = b.src_port && a.dst_port = b.dst_port &&
  Sequence.equal a.seq b.seq &&
  (match a.ack, b.ack with
   | None, None -> true | Some a, Some b -> Sequence.equal a b | _ -> false) &&
  Flag.eq a.flag b.flag &&
  a.push = b.push &&
  a.window = b.window &&
  List.length a.options = List.length b.options &&
  List.for_all2 equal_option a.options b.options &&
  String.(equal (concat "" a.payload) (concat "" b.payload))

let max_win = 1 lsl 16 - 1

let mss t =
  List.fold_left
    (fun acc -> function MaximumSegmentSize x -> Some x | _ -> acc)
    None t.options

let ws t =
  List.fold_left
    (fun acc -> function WindowScale x -> Some x | _ -> acc)
    None t.options

(* we always take our IP as source, thus of_segment -- to be used for a
   received segment -- needs to swap *)
let to_id ~src ~dst t = (dst, t.dst_port, src, t.src_port)

let pp ppf t =
  Fmt.pf ppf "%a%s@ seq %a@ ack %a@ window %d@ opts %a, %d bytes data"
    Flag.pp t.flag (if t.push then "P" else "")
    Sequence.pp t.seq
    Fmt.(option ~none:(any "no") Sequence.pp) t.ack
    t.window Fmt.(list ~sep:(any ";@ ") pp_option) t.options
    t.payload_len

let count_flags = function
  | Some (`Fin | `Syn) -> 1
  | _ -> 0

(* auxFns:1520 *)
let make_rst_from_cb cb (src, src_port, dst, dst_port) =
  src, dst,
  { src_port ; dst_port ; seq = cb.State.snd_nxt ; ack = Some cb.rcv_nxt ;
    flag = Some `Rst ; push = false ; window = 0 ; options = [] ;
    payload_len = 0 ; payload = [] }

(* auxFns:2219 *)
let dropwithreset seg =
  match seg.flag with
  | Some `Rst -> None
  | _ ->
    let ack, seq =
      match seg.ack with
      | Some ack -> None, ack (* never ACK an ACK *)
      | None ->
        let ack =
          let data_len = seg.payload_len
          and flag_len = count_flags seg.flag
          in
          Sequence.(addi (addi seg.seq data_len) flag_len)
        in
        Some ack, Sequence.zero
    in
    Some { src_port = seg.dst_port ;
           dst_port = seg.src_port ;
           seq ; ack ;
           flag = Some `Rst ; push = false ;
           window = 0 ; options = [] ;
           payload_len = 0 ; payload = [] }

(* auxFns:2331 *)
let drop_and_close id conn =
  match conn.State.tcp_state with
  | Syn_sent -> None
  | _ -> Some (make_rst_from_cb conn.control_block id)
 (* timed out and error handling (if err = timedout then cb.t_softerror ) *)

(* auxFns:1625 *)
let tcp_output_required now conn =
  let cb = conn.State.control_block in
  let snd_cwnd =
    let rxtcur = Subr.computed_rxtcur cb.State.t_rttinf in
    let than = match Mtime.add_span cb.State.t_idletime (Mtime.Span.of_uint64_ns rxtcur) with
      | None -> assert false
      | Some ms -> ms
    in
    if Sequence.equal cb.State.snd_max cb.State.snd_una && Mtime.is_later ~than now then
      (*: The connection is idle and has been for >= 1RTO :*)
      (*: Reduce [[snd_cwnd]] to commence slow start :*)
      cb.State.t_maxseg * Params.ss_fltsz
    else
      cb.State.snd_cwnd
  in
  (*: Calculate the amount of unused send window :*)
  let win = Int.min cb.State.snd_wnd snd_cwnd in
  let snd_wnd_unused = win - (Sequence.window cb.State.snd_nxt cb.State.snd_una) in
  (*: Is it possible that a FIN may need to be sent? :*)
  let fin_required =
    conn.State.cantsndmore &&
    match conn.State.tcp_state with Fin_wait_2 | Time_wait -> false | _ -> true
  in
  (*: Under BSD, we may need to send a [[FIN]] in state [[SYN_SENT]] or [[SYN_RECEIVED]], so we may
      effectively still have a [[SYN]] on the send queue. :*)
  let syn_not_acked = match conn.State.tcp_state with Syn_sent | Syn_received -> true | _ -> false in
  (*: Is there data or a FIN to transmit? :*)
  let last_sndq_data_seq = Sequence.addi cb.State.snd_una (Rope.length conn.State.sndq) in
  let last_sndq_data_and_fin_seq =
    Sequence.(addi (addi last_sndq_data_seq (if fin_required then 1 else 0))
                (if syn_not_acked then 1 else 0))
  in
  let have_data_to_send = Sequence.less cb.snd_nxt last_sndq_data_seq in
  let have_data_or_fin_to_send = Sequence.less cb.snd_nxt last_sndq_data_and_fin_seq in
  (*: The amount by which the right edge of the advertised window could be moved :*)
  let window_update_delta =
    (Int.min (Params.tcp_maxwin lsl cb.State.rcv_scale))
       (conn.rcvbufsize - Rope.length conn.rcvq) -
    Sequence.window cb.State.rcv_adv cb.State.rcv_nxt
  in
  (*: Send a window update? This occurs when (a) the advertised window can be increased by at
      least two maximum segment sizes, or (b) the advertised window can be increased by at least
      half the receive buffer size. See |tcp_output.c:322ff|. :*)
  let need_to_send_a_window_update =
    window_update_delta >= 2 * cb.State.t_maxseg || 2 * window_update_delta >= conn.rcvbufsize
  in
  (*: Note that silly window avoidance and [[max_sndwnd]] need to be dealt with
     here; see |tcp_output.c:309| :*)
  (*: Can a segment be transmitted? :*)
  let do_output =
    (*: Data to send and the send window has some space, or a FIN can be sent :*)
    (have_data_or_fin_to_send &&
     (if have_data_to_send then snd_wnd_unused > 0 else true)) || (* don't need space if only sending FIN *)
    (*: Can send a window update :*)
    need_to_send_a_window_update ||
    (*: An ACK should be sent immediately (e.g. in reply to a window probe) :*)
    cb.State.tf_shouldacknow
  in
  let persist_fun =
    let cant_send = not do_output && Rope.length conn.sndq = 0 && cb.State.tt_rexmt = None in
    let window_shrunk = win = 0 && snd_wnd_unused < 0 in  (*: [[win = 0]] if in [[SYN_SENT]], but still may send FIN :*)
                                                     (* (bsd_arch arch ==> tcp_sock.st <> SYN_SENT)) in *)
    if cant_send then  (* takes priority over window_shrunk; note this needs to be checked *)
      (*: Can not transmit a segment despite a non-empty send queue and no running persist or
          retransmit timer. Must be the case that the receiver's advertised window is now zero, so
          start the persist timer. Normal: |tcp_output.c:378ff| :*)
      Some (fun cb -> { cb with State.tt_rexmt = Subr.start_tt_persist now 0 cb.State.t_rttinf })
    else if window_shrunk then
        (*: The receiver's advertised window is zero and the receiver has retracted window space
            that it had previously advertised. Reset [[snd_nxt]] to [[snd_una]] because the data
            from [[snd_una]] to [[snd_nxt]] has likely not been buffered by the receiver and should
            be retransmitted. Bizzarely (on FreeBSD 4.6-RELEASE), if the persist timer is running
            reset its shift value :*)
        (* Window shrunk: |tcp_output.c:250ff| *)
      Some (fun cb ->
          let tt_rexmt = match cb.State.tt_rexmt with
            | Some ((Persist, _), d) -> Some ((State.Persist, 0), d)
            | _ -> Subr.start_tt_persist now 0 cb.t_rttinf
          in
          { cb with tt_rexmt ; snd_nxt = cb.snd_una })
    else
      (*: Otherwise, leave the persist timer alone :*)
      None
  in
  do_output, persist_fun

(* auxFns:1774 no ts and arch, though *)
let tcp_output_really_helper now (src, src_port, dst, dst_port) window_probe conn =
  let cb = conn.State.control_block in
  let snd_cwnd =
    let rxtcur = Subr.computed_rxtcur cb.State.t_rttinf in
    let than = match Mtime.add_span cb.State.t_idletime (Mtime.Span.of_uint64_ns rxtcur) with
      | None -> assert false
      | Some ms -> ms
    in
    if Sequence.equal cb.State.snd_max cb.State.snd_una && Mtime.is_later ~than now then
      (*: The connection is idle and has been for >= 1RTO :*)
      (*: Reduce [[snd_cwnd]] to commence slow start :*)
      cb.State.t_maxseg * Params.ss_fltsz
    else
      cb.State.snd_cwnd
  in
  let win0 = Int.min cb.State.snd_wnd snd_cwnd in
  let win = if window_probe && win0 = 0 then 1 else win0 in
  let snd_wnd_unused = win - (Sequence.window cb.State.snd_nxt cb.State.snd_una) in
  let fin_required =
    conn.State.cantsndmore &&
    match conn.State.tcp_state with State.Fin_wait_2 | State.Time_wait -> false | _ -> true
  in
  let last_sndq_data_seq = Sequence.addi cb.State.snd_una (Rope.length conn.sndq) in
  (*: The data to send in this segment (if any) :*)
  let data_to_send, more_data_could_be_sent =
    let data' =
      Rope.shift conn.State.sndq
        (Int.max 0
           (Int.min (Rope.length conn.State.sndq)
              (Sequence.window cb.State.snd_nxt cb.State.snd_una)))
        (* taking the minimum to avoid exceeding the sndq *)
    in
    let dlen = Rope.length data' in
    let len_could_be_sent = Int.max 0 snd_wnd_unused in
    let data' =
      let len = Int.min dlen (Int.min len_could_be_sent cb.State.t_maxseg) in
      Rope.chop data' len in
    data', dlen > cb.t_maxseg && len_could_be_sent > cb.t_maxseg
  in
  let dlen = Rope.length data_to_send in
  (*: Should [[FIN]] be set in this segment? :*)
  let fin = fin_required && Sequence.(greater_equal (addi cb.State.snd_nxt dlen) last_sndq_data_seq) in
  (*: If this socket has previously sent a [[FIN]] which has not yet been acked, and [[snd_nxt]]
      is past the [[FIN]]'s sequence number, then [[snd_nxt]] should be set to the sequence number
      of the [[FIN]] flag, i.e. a retransmission. Check that [[snd_una <> iss]] as in this case no
      data has yet been sent over the socket  :*)
  let snd_nxt =
    if fin &&
       (Sequence.equal (Sequence.addi cb.State.snd_nxt dlen) (Sequence.incr last_sndq_data_seq) &&
        not (Sequence.equal cb.State.snd_una cb.State.iss) ||
        Sequence.window cb.State.snd_nxt cb.State.iss = 2)
    then
      Sequence.addi cb.State.snd_nxt (-1)
    else
      cb.State.snd_nxt
  in
  (*: The BSD way: set [[PSH]] whenever sending the last byte of data in the send queue :*)
  let push = dlen > 0 && Sequence.equal (Sequence.addi cb.State.snd_nxt dlen) last_sndq_data_seq in
  (*: Calculate size of the receive window (based upon available buffer space) :*)
  let rcv_wnd' =
    let window_size = Sequence.window cb.State.rcv_adv cb.State.rcv_nxt in
    match conn.State.tcp_state with
    | Time_wait -> window_size
    | _ ->
      let rcv_wnd'' = Subr.calculate_bsd_rcv_wnd conn in
      Int.max window_size
        (Int.min (Params.tcp_maxwin lsl cb.State.rcv_scale)
           (if rcv_wnd'' < conn.rcvbufsize / 4 && rcv_wnd'' < cb.State.t_maxseg
            then 0  (*: Silly window avoidance: shouldn't advertise a tiny window :*)
            else rcv_wnd''))
  in
  (*: Advertise an appropriately scaled receive window :*)
  (*: Assert the advertised window is within a sensible range :*)
  let flag = if fin then Some `Fin else None in
  let seg =
    { src_port ; dst_port ; seq = snd_nxt;
      ack = Some cb.State.rcv_nxt ; flag ; push ;
      window = Int.min (rcv_wnd' lsr cb.rcv_scale) max_win ;
      options = [] ; payload_len = Rope.length data_to_send ;
      payload = Rope.to_strings data_to_send
    }
  in
  (*: If emitting a [[FIN]] for the first time then change TCP state :*)
  let tcp_state =
    if fin then
      match conn.State.tcp_state with
      | Established -> State.Fin_wait_1
      | Close_wait -> State.Last_ack
      | x -> x
    else
      conn.State.tcp_state
  in
  (*: Updated values to store in the control block after the segment is output :*)
  let snd_nxt' = Sequence.(addi (addi snd_nxt dlen) (if fin then 1 else 0)) in
  let snd_max = Sequence.max cb.State.snd_max snd_nxt' in
  (*: Following a |tcp_output| code walkthrough by SB: :*)
  let tt_rexmt =
    if (State.mode_of cb.tt_rexmt = None ||
	(State.mode_of cb.tt_rexmt = Some State.Persist && not window_probe)) &&
       Sequence.greater snd_nxt' cb.snd_una then
      (*: If the retransmit timer is not running, or the persist timer is
         running and this segment isn't a window probe, and this segment
         contains data or a [[FIN]] that occurs past [[snd_una]] (i.e.~new
         data), then start the retransmit timer. Note: if the persist timer is
         running it will be implicitly stopped :*)
      Subr.start_tt_rexmt now 0 false cb.t_rttinf
    else if window_probe && win0 <> 0 && State.mode_of cb.tt_rexmt = Some State.Persist then
      (*: If the segment is a window probe, and in either case the send window
         is not closed, stop any running persist timer. :*)
      None (*: stop persisting :*)
    else
      (*: Otherwise, leave the timers alone :*)
      cb.tt_rexmt
  in
  (*: Time this segment if it is sensible to do so, i.e.~the following conditions hold : (a) a
       segment is not already being timed, and (b) data or a FIN are being sent, and (c) the
       segment being emitted is not a retransmit, and (d) the segment is not a window probe :*)
  let t_rttseg =
    if cb.State.t_rttseg = None &&
       (dlen > 0 || fin) &&
       Sequence.greater snd_nxt' cb.State.snd_max &&
       not window_probe
    then
      Some (now, snd_nxt')
    else
      cb.State.t_rttseg
  in
  (*: Update the socket :*)
  let control_block = {
    cb with
    tt_rexmt ;
    snd_cwnd ;
    tf_rxwin0sent = (rcv_wnd' = 0) ;
    tf_shouldacknow = false ;
    t_rttseg ;
    snd_max ;
    snd_nxt = snd_nxt' ;
    tt_delack = None ;
    last_ack_sent = cb.State.rcv_nxt ;
    rcv_adv = Sequence.addi cb.State.rcv_nxt rcv_wnd' ;
    rcv_wnd = rcv_wnd' ;
    (* the rcv_wnd update does not occur in the model, the reasoning is
       TCP1_hostTypesScript.sml:538: "Don't check equality of [[rcv_wnd]]: we
       recalculate [[rcv_wnd]] lazily in [[tcp_output]] instead of after every
       successful [[recv()]] call, so our value is often out of date."
       we're doing this update here, since we use cb rcv_wnd in the in_window
       check in input.ml *)
  } in
  { conn with tcp_state ; control_block }, (src, dst, seg), more_data_could_be_sent

let tcp_output_really now (src, src_port, dst, dst_port) window_probe conn =
  let cb, seg, _ = tcp_output_really_helper now (src, src_port, dst, dst_port) window_probe conn in
  cb, seg

(* auxFns:2000 *)
let tcp_output_perhaps now id conn =
  let do_output, persist_fun = tcp_output_required now conn in
  let conn' = match persist_fun with
    | None -> conn
    | Some f ->
      let control_block = f conn.control_block in
      { conn with control_block }
  in
  if do_output then
    let rec send_more conn acc =
      let conn', seg, more_possible = tcp_output_really_helper now id false conn in
      let outs = seg :: acc in
      if more_possible then
        send_more conn' outs
      else
        conn', outs
    in
    let conn', outs = send_more conn' [] in
    conn', List.rev outs
  else
    conn', []

(* auxFns:1384 *)
let make_syn_ack cb (src, src_port, dst, dst_port) =
  let window = Int.min cb.State.rcv_wnd max_win in
  let options =
    MaximumSegmentSize cb.t_advmss ::
    (Option.map (fun sc -> WindowScale sc) cb.request_r_scale |> Option.to_list)
  in
  src, dst,
  { src_port ; dst_port ; seq = cb.iss ; ack = Some cb.rcv_nxt ;
    flag = Some `Syn ; push = false ; window ; options ;
    payload_len = 0 ; payload = [] }

(* auxFns:1333 *)
let make_syn cb (src, src_port, dst, dst_port) =
  let window = Int.min cb.State.rcv_wnd max_win in
  let options =
    MaximumSegmentSize cb.State.t_advmss ::
    (Option.map (fun sc -> WindowScale sc) cb.request_r_scale |> Option.to_list)
  in
  src, dst,
  { src_port ; dst_port ; seq = cb.State.iss ; ack = None ;
    flag = Some `Syn ; push = false ;
    window ; options ; payload_len = 0 ; payload = [] }

(* auxFns:1437 *)
let make_ack cb ~fin (src, src_port, dst, dst_port) =
  let window = Int.min (cb.State.rcv_wnd lsr cb.rcv_scale) max_win in
  (* sack *)
  src, dst,
  { src_port ; dst_port ;
    seq = if fin then cb.snd_una else cb.snd_nxt ;
    ack = Some cb.rcv_nxt ;
    flag = if fin then Some `Fin else None ;
    push = false ; window ; options = [] ;
    payload_len = 0 ; payload = [] }

let checksum ~src ~dst cs =
  let len = Cstruct.length cs in
  let protocol = 0x06 in
  (* construct pseudoheader *)
  match src, dst with
  | Ipaddr.V4 src, Ipaddr.V4 dst ->
      let buf = Bytes.make 12 '\000' in
      Bytes.set_int32_be buf 0 (Ipaddr.V4.to_int32 src);
      Bytes.set_int32_be buf 4 (Ipaddr.V4.to_int32 dst);
      Bytes.set_uint8 buf 9 protocol;
      Bytes.set_uint16_be buf 10 len;
      let sum = Checksum.feed_string ~off:0 ~len:12 0 (Bytes.unsafe_to_string buf) in
      Cstruct.BE.set_uint16 cs 16 0;
      let sum = Checksum.feed_cstruct sum cs in
      Checksum.finally sum
  | Ipaddr.V6 src, Ipaddr.V6 dst ->
      let buf = Bytes.make 40 '\000' in
      Bytes.blit_string (Ipaddr.V6.to_octets src) 0 buf 0 16;
      Bytes.blit_string (Ipaddr.V6.to_octets dst) 0 buf 16 16;
      Bytes.set_uint16_be buf 34 len;
      Bytes.set_uint8 buf 39 protocol;
      let sum = Checksum.feed_string ~off:0 ~len:40 0 (Bytes.unsafe_to_string buf) in
      Cstruct.BE.set_uint16 cs 16 0;
      let sum = Checksum.feed_cstruct sum cs in
      Checksum.finally sum
  | _ -> invalid_arg "mixing IPv4 and IPv6 addresses not supported"

let encode_into buf t =
  let opt_len = length_options t.options in
  Cstruct.BE.set_uint16 buf 0 t.src_port;
  Cstruct.BE.set_uint16 buf 2 t.dst_port;
  Cstruct.BE.set_uint32 buf 4 (Sequence.to_int32 t.seq);
  Cstruct.BE.set_uint32 buf 8 (match t.ack with None -> 0l | Some a -> Sequence.to_int32 a);
  Cstruct.set_uint8 buf 12 ((header_size + opt_len) lsl 2); (* upper 4 bit, lower 4 reserved *)
  Cstruct.set_uint8 buf 13 (Flag.encode ((match t.ack with None -> false | Some _ -> true), t.flag, t.push));
  Cstruct.BE.set_uint16 buf 14 t.window;
  let _ = encode_options buf 20 t.options in
  let rec go dst_off = function
    | [] -> ()
    | x :: r ->
        let len = String.length x in
        Cstruct.blit_from_string x 0 buf dst_off len; go (dst_off + len) r in
  go (20 + opt_len) t.payload

let length t =
  header_size + t.payload_len + length_options t.options

let encode t =
  let buf = Cstruct.create (length t) in
  encode_into buf t;
  buf

let encode_and_checksum_into now buf ~src ~dst t =
  encode_into buf t;
  let checksum = checksum ~src ~dst buf in
  Cstruct.BE.set_uint16 buf 16 checksum;
  State.Tracing.debug (fun m -> m "%a [%a] out %u %s"
                          State.Connection.pp (src, t.src_port, dst, t.dst_port)
                          Mtime.pp now t.payload_len
                          (Base64.encode_string (Cstruct.to_string buf)))

let encode_and_checksum now ~src ~dst t =
  let buf = Cstruct.create (length t) in
  encode_and_checksum_into now buf ~src ~dst t;
  buf

(* NOTE(dinosaure): We would like the data to be located on the minor heap. For
   this reason, to convert from [Cstruct.t] to strings, we fragment the data so
   that each chunk can be allocated as quickly as possible (on the minor heap),
   unlike allocation on the major heap, where data exceeding [0x7ff] bytes is
   allocated. *)
let to_chunks cs =
  let rec go acc cs =
    if Cstruct.length cs == 0 then List.rev acc
    else begin
      let len = Int.min 0x7ff (Cstruct.length cs) in
      let buf = Bytes.create len in
      Cstruct.blit_to_bytes cs 0 buf 0 len;
      go (Bytes.unsafe_to_string buf :: acc) (Cstruct.shift cs len)
    end in
  go [] cs

let decode data =
  let* () = guard (Cstruct.length data >= header_size) (`Msg "too small") in
  let src_port = Cstruct.BE.get_uint16 data 0
  and dst_port = Cstruct.BE.get_uint16 data 2
  and seq = Sequence.of_int32 (Cstruct.BE.get_uint32 data 4)
  and ack = Sequence.of_int32 (Cstruct.BE.get_uint32 data 8)
  and data_off = (Cstruct.get_uint8 data 12 lsr 4) * 4 (* lower 4 are reserved [can't assume they're 0] *)
  in
  let* () =
    guard (data_off >= 5)
      (`Msg ("data offset field must be greater than or equal to 5, but it is " ^ string_of_int data_off))
  in
  let* ackf, flag, push = Flag.decode (Cstruct.get_uint8 data 13) in
  let window = Cstruct.BE.get_uint16 data 14
  and checksum = Cstruct.BE.get_uint16 data 16
  (* and _up = Cstruct.BE.get_uint16 data 18 *)
  in
  let* () =
    guard (Cstruct.length data >= data_off) (`Msg "data_offset too big")
  in
  let options_buf = Cstruct.sub data header_size (data_off - header_size) in
  let* options = decode_options options_buf in
  let payload = Cstruct.shift data data_off in
  let payload_len = Cstruct.length payload in
  let payload = to_chunks payload in
  let ack = if ackf then Some ack else None in
  Ok ({ src_port; dst_port; seq; ack; flag; push; window; options; payload_len; payload },
      checksum)

let decode_and_validate ~src ~dst data =
  let* t, pkt_csum = decode data in
  let computed = checksum ~src ~dst data in
  (* these are already checks done in deliver_in_4, etc. *)
  let pkt_csum = if pkt_csum = 0xffff then 0x0 else pkt_csum in
  let* () =
    guard (computed = pkt_csum) (`Msg "invalid checksum")
  in
  let* () =
    match src, dst with
    | Ipaddr.V4 src, Ipaddr.V4 dst ->
      let* () =
        guard Ipaddr.V4.(compare src broadcast <> 0)
          (`Msg "segment from broadcast")
      in
      guard Ipaddr.V4.(compare dst broadcast <> 0)
        (`Msg "segment to broadcast")
    | _ -> Ok ()
  in
  let* () =
    guard (not (Ipaddr.is_multicast src))
      (`Msg "segment from multicast address")
  in
  let* () =
    guard (not (Ipaddr.is_multicast dst))
      (`Msg "segment to multicast address")
  in
  let* () =
    guard (not ((Ipaddr.compare src dst = 0 && t.src_port = t.dst_port)))
      (`Msg "segment source and destination ip and port are equal")
  in
  Ok (t, to_id ~src ~dst t)