Statistics
| Branch: | Revision:

root / slirp / tcp_output.c @ b6dce92e

History | View | Annotate | Download (14.1 kB)

1
/*
2
 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3
 *        The Regents of the University of California.  All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 * 3. Neither the name of the University nor the names of its contributors
14
 *    may be used to endorse or promote products derived from this software
15
 *    without specific prior written permission.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 *        @(#)tcp_output.c        8.3 (Berkeley) 12/30/93
30
 * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp
31
 */
32

    
33
/*
34
 * Changes and additions relating to SLiRP
35
 * Copyright (c) 1995 Danny Gasparovski.
36
 *
37
 * Please read the file COPYRIGHT for the
38
 * terms and conditions of the copyright.
39
 */
40

    
41
#include <slirp.h>
42

    
43
static const u_char  tcp_outflags[TCP_NSTATES] = {
44
        TH_RST|TH_ACK, 0,      TH_SYN,        TH_SYN|TH_ACK,
45
        TH_ACK,        TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK,
46
        TH_FIN|TH_ACK, TH_ACK, TH_ACK,
47
};
48

    
49

    
50
#define MAX_TCPOPTLEN        32        /* max # bytes that go in options */
51

    
52
/*
53
 * Tcp output routine: figure out what should be sent and send it.
54
 */
55
int
56
tcp_output(struct tcpcb *tp)
57
{
58
        register struct socket *so = tp->t_socket;
59
        register long len, win;
60
        int off, flags, error;
61
        register struct mbuf *m;
62
        register struct tcpiphdr *ti;
63
        u_char opt[MAX_TCPOPTLEN];
64
        unsigned optlen, hdrlen;
65
        int idle, sendalot;
66

    
67
        DEBUG_CALL("tcp_output");
68
        DEBUG_ARG("tp = %lx", (long )tp);
69

    
70
        /*
71
         * Determine length of data that should be transmitted,
72
         * and flags that will be used.
73
         * If there is some data or critical controls (SYN, RST)
74
         * to send, then transmit; otherwise, investigate further.
75
         */
76
        idle = (tp->snd_max == tp->snd_una);
77
        if (idle && tp->t_idle >= tp->t_rxtcur)
78
                /*
79
                 * We have been idle for "a while" and no acks are
80
                 * expected to clock out any data we send --
81
                 * slow start to get ack "clock" running again.
82
                 */
83
                tp->snd_cwnd = tp->t_maxseg;
84
again:
85
        sendalot = 0;
86
        off = tp->snd_nxt - tp->snd_una;
87
        win = min(tp->snd_wnd, tp->snd_cwnd);
88

    
89
        flags = tcp_outflags[tp->t_state];
90

    
91
        DEBUG_MISC((dfd, " --- tcp_output flags = 0x%x\n",flags));
92

    
93
        /*
94
         * If in persist timeout with window of 0, send 1 byte.
95
         * Otherwise, if window is small but nonzero
96
         * and timer expired, we will send what we can
97
         * and go to transmit state.
98
         */
99
        if (tp->t_force) {
100
                if (win == 0) {
101
                        /*
102
                         * If we still have some data to send, then
103
                         * clear the FIN bit.  Usually this would
104
                         * happen below when it realizes that we
105
                         * aren't sending all the data.  However,
106
                         * if we have exactly 1 byte of unset data,
107
                         * then it won't clear the FIN bit below,
108
                         * and if we are in persist state, we wind
109
                         * up sending the packet without recording
110
                         * that we sent the FIN bit.
111
                         *
112
                         * We can't just blindly clear the FIN bit,
113
                         * because if we don't have any more data
114
                         * to send then the probe will be the FIN
115
                         * itself.
116
                         */
117
                        if (off < so->so_snd.sb_cc)
118
                                flags &= ~TH_FIN;
119
                        win = 1;
120
                } else {
121
                        tp->t_timer[TCPT_PERSIST] = 0;
122
                        tp->t_rxtshift = 0;
123
                }
124
        }
125

    
126
        len = min(so->so_snd.sb_cc, win) - off;
127

    
128
        if (len < 0) {
129
                /*
130
                 * If FIN has been sent but not acked,
131
                 * but we haven't been called to retransmit,
132
                 * len will be -1.  Otherwise, window shrank
133
                 * after we sent into it.  If window shrank to 0,
134
                 * cancel pending retransmit and pull snd_nxt
135
                 * back to (closed) window.  We will enter persist
136
                 * state below.  If the window didn't close completely,
137
                 * just wait for an ACK.
138
                 */
139
                len = 0;
140
                if (win == 0) {
141
                        tp->t_timer[TCPT_REXMT] = 0;
142
                        tp->snd_nxt = tp->snd_una;
143
                }
144
        }
145

    
146
        if (len > tp->t_maxseg) {
147
                len = tp->t_maxseg;
148
                sendalot = 1;
149
        }
150
        if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
151
                flags &= ~TH_FIN;
152

    
153
        win = sbspace(&so->so_rcv);
154

    
155
        /*
156
         * Sender silly window avoidance.  If connection is idle
157
         * and can send all data, a maximum segment,
158
         * at least a maximum default-size segment do it,
159
         * or are forced, do it; otherwise don't bother.
160
         * If peer's buffer is tiny, then send
161
         * when window is at least half open.
162
         * If retransmitting (possibly after persist timer forced us
163
         * to send into a small window), then must resend.
164
         */
165
        if (len) {
166
                if (len == tp->t_maxseg)
167
                        goto send;
168
                if ((1 || idle || tp->t_flags & TF_NODELAY) &&
169
                    len + off >= so->so_snd.sb_cc)
170
                        goto send;
171
                if (tp->t_force)
172
                        goto send;
173
                if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
174
                        goto send;
175
                if (SEQ_LT(tp->snd_nxt, tp->snd_max))
176
                        goto send;
177
        }
178

    
179
        /*
180
         * Compare available window to amount of window
181
         * known to peer (as advertised window less
182
         * next expected input).  If the difference is at least two
183
         * max size segments, or at least 50% of the maximum possible
184
         * window, then want to send a window update to peer.
185
         */
186
        if (win > 0) {
187
                /*
188
                 * "adv" is the amount we can increase the window,
189
                 * taking into account that we are limited by
190
                 * TCP_MAXWIN << tp->rcv_scale.
191
                 */
192
                long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
193
                        (tp->rcv_adv - tp->rcv_nxt);
194

    
195
                if (adv >= (long) (2 * tp->t_maxseg))
196
                        goto send;
197
                if (2 * adv >= (long) so->so_rcv.sb_datalen)
198
                        goto send;
199
        }
200

    
201
        /*
202
         * Send if we owe peer an ACK.
203
         */
204
        if (tp->t_flags & TF_ACKNOW)
205
                goto send;
206
        if (flags & (TH_SYN|TH_RST))
207
                goto send;
208
        if (SEQ_GT(tp->snd_up, tp->snd_una))
209
                goto send;
210
        /*
211
         * If our state indicates that FIN should be sent
212
         * and we have not yet done so, or we're retransmitting the FIN,
213
         * then we need to send.
214
         */
215
        if (flags & TH_FIN &&
216
            ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
217
                goto send;
218

    
219
        /*
220
         * TCP window updates are not reliable, rather a polling protocol
221
         * using ``persist'' packets is used to insure receipt of window
222
         * updates.  The three ``states'' for the output side are:
223
         *        idle                        not doing retransmits or persists
224
         *        persisting                to move a small or zero window
225
         *        (re)transmitting        and thereby not persisting
226
         *
227
         * tp->t_timer[TCPT_PERSIST]
228
         *        is set when we are in persist state.
229
         * tp->t_force
230
         *        is set when we are called to send a persist packet.
231
         * tp->t_timer[TCPT_REXMT]
232
         *        is set when we are retransmitting
233
         * The output side is idle when both timers are zero.
234
         *
235
         * If send window is too small, there is data to transmit, and no
236
         * retransmit or persist is pending, then go to persist state.
237
         * If nothing happens soon, send when timer expires:
238
         * if window is nonzero, transmit what we can,
239
         * otherwise force out a byte.
240
         */
241
        if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
242
            tp->t_timer[TCPT_PERSIST] == 0) {
243
                tp->t_rxtshift = 0;
244
                tcp_setpersist(tp);
245
        }
246

    
247
        /*
248
         * No reason to send a segment, just return.
249
         */
250
        return (0);
251

    
252
send:
253
        /*
254
         * Before ESTABLISHED, force sending of initial options
255
         * unless TCP set not to do any options.
256
         * NOTE: we assume that the IP/TCP header plus TCP options
257
         * always fit in a single mbuf, leaving room for a maximum
258
         * link header, i.e.
259
         *        max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
260
         */
261
        optlen = 0;
262
        hdrlen = sizeof (struct tcpiphdr);
263
        if (flags & TH_SYN) {
264
                tp->snd_nxt = tp->iss;
265
                if ((tp->t_flags & TF_NOOPT) == 0) {
266
                        uint16_t mss;
267

    
268
                        opt[0] = TCPOPT_MAXSEG;
269
                        opt[1] = 4;
270
                        mss = htons((uint16_t) tcp_mss(tp, 0));
271
                        memcpy((caddr_t)(opt + 2), (caddr_t)&mss, sizeof(mss));
272
                        optlen = 4;
273
                }
274
         }
275

    
276
         hdrlen += optlen;
277

    
278
        /*
279
         * Adjust data length if insertion of options will
280
         * bump the packet length beyond the t_maxseg length.
281
         */
282
         if (len > tp->t_maxseg - optlen) {
283
                len = tp->t_maxseg - optlen;
284
                sendalot = 1;
285
         }
286

    
287
        /*
288
         * Grab a header mbuf, attaching a copy of data to
289
         * be transmitted, and initialize the header from
290
         * the template for sends on this connection.
291
         */
292
        if (len) {
293
                m = m_get(so->slirp);
294
                if (m == NULL) {
295
                        error = 1;
296
                        goto out;
297
                }
298
                m->m_data += IF_MAXLINKHDR;
299
                m->m_len = hdrlen;
300

    
301
                sbcopy(&so->so_snd, off, (int) len, mtod(m, caddr_t) + hdrlen);
302
                m->m_len += len;
303

    
304
                /*
305
                 * If we're sending everything we've got, set PUSH.
306
                 * (This will keep happy those implementations which only
307
                 * give data to the user when a buffer fills or
308
                 * a PUSH comes in.)
309
                 */
310
                if (off + len == so->so_snd.sb_cc)
311
                        flags |= TH_PUSH;
312
        } else {
313
                m = m_get(so->slirp);
314
                if (m == NULL) {
315
                        error = 1;
316
                        goto out;
317
                }
318
                m->m_data += IF_MAXLINKHDR;
319
                m->m_len = hdrlen;
320
        }
321

    
322
        ti = mtod(m, struct tcpiphdr *);
323

    
324
        memcpy((caddr_t)ti, &tp->t_template, sizeof (struct tcpiphdr));
325

    
326
        /*
327
         * Fill in fields, remembering maximum advertised
328
         * window for use in delaying messages about window sizes.
329
         * If resending a FIN, be sure not to use a new sequence number.
330
         */
331
        if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
332
            tp->snd_nxt == tp->snd_max)
333
                tp->snd_nxt--;
334
        /*
335
         * If we are doing retransmissions, then snd_nxt will
336
         * not reflect the first unsent octet.  For ACK only
337
         * packets, we do not want the sequence number of the
338
         * retransmitted packet, we want the sequence number
339
         * of the next unsent octet.  So, if there is no data
340
         * (and no SYN or FIN), use snd_max instead of snd_nxt
341
         * when filling in ti_seq.  But if we are in persist
342
         * state, snd_max might reflect one byte beyond the
343
         * right edge of the window, so use snd_nxt in that
344
         * case, since we know we aren't doing a retransmission.
345
         * (retransmit and persist are mutually exclusive...)
346
         */
347
        if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
348
                ti->ti_seq = htonl(tp->snd_nxt);
349
        else
350
                ti->ti_seq = htonl(tp->snd_max);
351
        ti->ti_ack = htonl(tp->rcv_nxt);
352
        if (optlen) {
353
                memcpy((caddr_t)(ti + 1), (caddr_t)opt, optlen);
354
                ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
355
        }
356
        ti->ti_flags = flags;
357
        /*
358
         * Calculate receive window.  Don't shrink window,
359
         * but avoid silly window syndrome.
360
         */
361
        if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg)
362
                win = 0;
363
        if (win > (long)TCP_MAXWIN << tp->rcv_scale)
364
                win = (long)TCP_MAXWIN << tp->rcv_scale;
365
        if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
366
                win = (long)(tp->rcv_adv - tp->rcv_nxt);
367
        ti->ti_win = htons((uint16_t) (win>>tp->rcv_scale));
368

    
369
        if (SEQ_GT(tp->snd_up, tp->snd_una)) {
370
                ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq)));
371
                ti->ti_flags |= TH_URG;
372
        } else
373
                /*
374
                 * If no urgent pointer to send, then we pull
375
                 * the urgent pointer to the left edge of the send window
376
                 * so that it doesn't drift into the send window on sequence
377
                 * number wraparound.
378
                 */
379
                tp->snd_up = tp->snd_una;                /* drag it along */
380

    
381
        /*
382
         * Put TCP length in extended header, and then
383
         * checksum extended header and data.
384
         */
385
        if (len + optlen)
386
                ti->ti_len = htons((uint16_t)(sizeof (struct tcphdr) +
387
                    optlen + len));
388
        ti->ti_sum = cksum(m, (int)(hdrlen + len));
389

    
390
        /*
391
         * In transmit state, time the transmission and arrange for
392
         * the retransmit.  In persist state, just set snd_max.
393
         */
394
        if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
395
                tcp_seq startseq = tp->snd_nxt;
396

    
397
                /*
398
                 * Advance snd_nxt over sequence space of this segment.
399
                 */
400
                if (flags & (TH_SYN|TH_FIN)) {
401
                        if (flags & TH_SYN)
402
                                tp->snd_nxt++;
403
                        if (flags & TH_FIN) {
404
                                tp->snd_nxt++;
405
                                tp->t_flags |= TF_SENTFIN;
406
                        }
407
                }
408
                tp->snd_nxt += len;
409
                if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
410
                        tp->snd_max = tp->snd_nxt;
411
                        /*
412
                         * Time this transmission if not a retransmission and
413
                         * not currently timing anything.
414
                         */
415
                        if (tp->t_rtt == 0) {
416
                                tp->t_rtt = 1;
417
                                tp->t_rtseq = startseq;
418
                        }
419
                }
420

    
421
                /*
422
                 * Set retransmit timer if not currently set,
423
                 * and not doing an ack or a keep-alive probe.
424
                 * Initial value for retransmit timer is smoothed
425
                 * round-trip time + 2 * round-trip time variance.
426
                 * Initialize shift counter which is used for backoff
427
                 * of retransmit time.
428
                 */
429
                if (tp->t_timer[TCPT_REXMT] == 0 &&
430
                    tp->snd_nxt != tp->snd_una) {
431
                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
432
                        if (tp->t_timer[TCPT_PERSIST]) {
433
                                tp->t_timer[TCPT_PERSIST] = 0;
434
                                tp->t_rxtshift = 0;
435
                        }
436
                }
437
        } else
438
                if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
439
                        tp->snd_max = tp->snd_nxt + len;
440

    
441
        /*
442
         * Fill in IP length and desired time to live and
443
         * send to IP level.  There should be a better way
444
         * to handle ttl and tos; we could keep them in
445
         * the template, but need a way to checksum without them.
446
         */
447
        m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */
448

    
449
    {
450

    
451
        ((struct ip *)ti)->ip_len = m->m_len;
452

    
453
        ((struct ip *)ti)->ip_ttl = IPDEFTTL;
454
        ((struct ip *)ti)->ip_tos = so->so_iptos;
455

    
456
        error = ip_output(so, m);
457
    }
458
        if (error) {
459
out:
460
                return (error);
461
        }
462

    
463
        /*
464
         * Data sent (as far as we can tell).
465
         * If this advertises a larger window than any other segment,
466
         * then remember the size of the advertised window.
467
         * Any pending ACK has now been sent.
468
         */
469
        if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
470
                tp->rcv_adv = tp->rcv_nxt + win;
471
        tp->last_ack_sent = tp->rcv_nxt;
472
        tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
473
        if (sendalot)
474
                goto again;
475

    
476
        return (0);
477
}
478

    
479
void
480
tcp_setpersist(struct tcpcb *tp)
481
{
482
    int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
483

    
484
        /*
485
         * Start/restart persistence timer.
486
         */
487
        TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
488
            t * tcp_backoff[tp->t_rxtshift],
489
            TCPTV_PERSMIN, TCPTV_PERSMAX);
490
        if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
491
                tp->t_rxtshift++;
492
}