1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-24 11:29:10 +00:00

Here we update the modular tcp to be able to switch to an

alternate TCP stack in other then the closed state (pre-listen/connect).
The idea is that *if* that is supported by the alternate stack, it
is asked if its ok to switch. If it approves the "handoff" then we
allow the switch to happen. Also the fini() function now gets a flag
to tell if you are switching away *or* the tcb is destroyed. The
init() call into the alternate stack is moved to the end so the
tcb is more fully formed before the init transpires.

Sponsored by:	Netflix Inc.
Differential Revision:	D6790
This commit is contained in:
Randall Stewart 2016-08-16 15:11:46 +00:00
parent 858a3f496f
commit 587d67c008
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=304223
6 changed files with 116 additions and 37 deletions

View File

@ -633,7 +633,8 @@ when trying to use a TCP function block that is not available;
.Xr mod_cc 4 ,
.Xr siftr 4 ,
.Xr syncache 4 ,
.Xr setkey 8
.Xr setkey 8 ,
.Xr tcp_functions 9
.Rs
.%A "V. Jacobson"
.%A "R. Braden"

View File

@ -114,14 +114,17 @@ struct tcp_function_block {
struct inpcb *inp, struct tcpcb *tp);
/* Optional memory allocation/free routine */
void (*tfb_tcp_fb_init)(struct tcpcb *);
void (*tfb_tcp_fb_fini)(struct tcpcb *);
void (*tfb_tcp_fb_fini)(struct tcpcb *, int);
/* Optional timers, must define all if you define one */
int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
void (*tfb_tcp_timer_activate)(struct tcpcb *,
uint32_t, u_int);
int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
/* Optional functions */
void (*tfb_tcp_rexmit_tmr)(struct tcpcb *);
void (*tfb_tcp_handoff_ok)(struct tcpcb *);
/* System use */
volatile uint32_t tfb_refcnt;
uint32_t tfb_flags;
};
@ -157,6 +160,16 @@ in the
.Va tfb_tcp_fb_fini
field.
.Pp
If the
.Va tfb_tcp_fb_fini
argument is non-NULL, the function to which it points is called when the
kernel is destroying the TCP control block or when the socket is transitioning
to use a different TCP stack.
The function is called with arguments of the TCP control block and an integer
flag.
The flag will be zero if the socket is transitioning to use another TCP stack
or one if the TCP control block is being destroyed.
.Pp
If the TCP stack implements additional timers, the TCP stack should set a
non-NULL pointer in the
.Va tfb_tcp_timer_stop_all ,
@ -193,6 +206,37 @@ However, care must be taken to ensure the retransmit timer leaves the
TCP control block in a valid state for the remainder of the retransmit
timer logic.
.Pp
A user may select a new TCP stack before calling
.Xr connect 2
or
.Xr listen 2 .
Optionally, a TCP stack may also allow a user to begin using the TCP stack for
a connection that is in a later state by setting a non-NULL function pointer in
the
.Va tfb_tcp_handoff_ok
field.
If this field is non-NULL and a user attempts to select that TCP stack after
calling
.Xr connect 2
or
.Xr listen 2
for that socket, the kernel will call the function pointed to by the
.Va tfb_tcp_handoff_ok
field.
The function should return 0 if the user is allowed to switch the socket to use
the TCP stack. Otherwise, the function should return an error code, which will
be returned to the user.
If the
.Va tfb_tcp_handoff_ok
field is
.Dv NULL
and a user attempts to select the TCP stack after calling
.Xr connect 2
or
.Xr listen 2
for that socket, the operation will fail and the kernel will return
.Er EINVAL .
.Pp
The
.Va tfb_refcnt
and
@ -269,8 +313,10 @@ The
.Fa blk
argument references a function block that is not currently registered.
.Sh SEE ALSO
.Xr malloc 9 ,
.Xr tcp 4
.Xr connect 2 ,
.Xr listen 2 ,
.Xr tcp 4 ,
.Xr malloc 9
.Sh HISTORY
This framework first appeared in
.Fx 11.0 .

View File

@ -1187,9 +1187,6 @@ tcp_newtcpcb(struct inpcb *inp)
tp->t_fb = tcp_func_set_ptr;
refcount_acquire(&tp->t_fb->tfb_refcnt);
rw_runlock(&tcp_function_lock);
if (tp->t_fb->tfb_tcp_fb_init) {
(*tp->t_fb->tfb_tcp_fb_init)(tp);
}
/*
* Use the current system default CC algorithm.
*/
@ -1201,7 +1198,7 @@ tcp_newtcpcb(struct inpcb *inp)
if (CC_ALGO(tp)->cb_init != NULL)
if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp);
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
@ -1210,7 +1207,7 @@ tcp_newtcpcb(struct inpcb *inp)
tp->osd = &tm->osd;
if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp);
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
@ -1271,6 +1268,9 @@ tcp_newtcpcb(struct inpcb *inp)
*/
tcp_pcap_tcpcb_init(tp);
#endif
if (tp->t_fb->tfb_tcp_fb_init) {
(*tp->t_fb->tfb_tcp_fb_init)(tp);
}
return (tp); /* XXX */
}
@ -1484,7 +1484,7 @@ tcp_discardcb(struct tcpcb *tp)
if (tp->t_timers->tt_draincnt == 0) {
/* We own the last reference on tcpcb, let's free it. */
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp);
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
@ -1513,7 +1513,7 @@ tcp_timer_discard(void *ptp)
if (tp->t_timers->tt_draincnt == 0) {
/* We own the last reference on this tcpcb, let's free it. */
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp);
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);

View File

@ -842,7 +842,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
KASSERT(rblk != NULL,
("cannot find blk %p out of syncache?", blk));
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp);
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_fb = rblk;
if (tp->t_fb->tfb_tcp_fb_init) {

View File

@ -1420,40 +1420,59 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
if (error)
return (error);
INP_WLOCK_RECHECK(inp);
if (tp->t_state != TCPS_CLOSED) {
/*
* The user has advanced the state
* past the initial point, we can't
* switch since we are down the road
* and a new set of functions may
* not be compatibile.
*/
INP_WUNLOCK(inp);
return(EINVAL);
}
blk = find_and_ref_tcp_functions(&fsn);
if (blk == NULL) {
INP_WUNLOCK(inp);
return (ENOENT);
}
if (tp->t_fb != blk) {
if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
if (tp->t_fb == blk) {
/* You already have this */
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
return (0);
}
if (tp->t_state != TCPS_CLOSED) {
int error=EINVAL;
/*
* The user has advanced the state
* past the initial point, we may not
* be able to switch.
*/
if (blk->tfb_tcp_handoff_ok != NULL) {
/*
* Does the stack provide a
* query mechanism, if so it may
* still be possible?
*/
error = (*blk->tfb_tcp_handoff_ok)(tp);
}
if (error) {
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
return (ENOENT);
return(error);
}
}
if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
return (ENOENT);
}
/*
* Release the old refcnt, the
* lookup acquired a ref on the
* new one already.
*/
if (tp->t_fb->tfb_tcp_fb_fini) {
/*
* Release the old refcnt, the
* lookup acquires a ref on the
* new one.
* Tell the stack to cleanup with 0 i.e.
* the tcb is not going away.
*/
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp);
refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_fb = blk;
if (tp->t_fb->tfb_tcp_fb_init) {
(*tp->t_fb->tfb_tcp_fb_init)(tp);
}
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
}
refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_fb = blk;
if (tp->t_fb->tfb_tcp_fb_init) {
(*tp->t_fb->tfb_tcp_fb_init)(tp);
}
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {

View File

@ -116,6 +116,18 @@ struct socket;
* does not know your callbacks you must provide a
* stop_all function that loops through and calls
* tcp_timer_stop() with each of your defined timers.
* Adding a tfb_tcp_handoff_ok function allows the socket
* option to change stacks to query you even if the
* connection is in a later stage. You return 0 to
* say you can take over and run your stack, you return
* non-zero (an error number) to say no you can't.
* If the function is undefined you can only change
* in the early states (before connect or listen).
* tfb_tcp_fb_fini is changed to add a flag to tell
* the old stack if the tcb is being destroyed or
* not. A one in the flag means the TCB is being
* destroyed, a zero indicates its transitioning to
* another stack (via socket option).
*/
struct tcp_function_block {
char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
@ -128,7 +140,7 @@ struct tcp_function_block {
struct inpcb *inp, struct tcpcb *tp);
/* Optional memory allocation/free routine */
void (*tfb_tcp_fb_init)(struct tcpcb *);
void (*tfb_tcp_fb_fini)(struct tcpcb *);
void (*tfb_tcp_fb_fini)(struct tcpcb *, int);
/* Optional timers, must define all if you define one */
int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
void (*tfb_tcp_timer_activate)(struct tcpcb *,
@ -136,6 +148,7 @@ struct tcp_function_block {
int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
void (*tfb_tcp_rexmit_tmr)(struct tcpcb *);
int (*tfb_tcp_handoff_ok)(struct tcpcb *);
volatile uint32_t tfb_refcnt;
uint32_t tfb_flags;
};