Commit f971e791 authored by Gleb Smirnoff's avatar Gleb Smirnoff
Browse files

tcp_hpts: rename input queue to drop queue and trim dead code

The HPTS input queue is in reality used only for "delayed drops".
When a TCP stack decides to drop a connection on the output path
it can't do that due to locking protocol between main tcp_output()
and stacks.  So, rack/bbr utilize HPTS to drop the connection in
a different context.

In the past the queue could also process input packets in context
of HPTS thread, but now no stack uses this, so remove this
functionality.

Reviewed by:		rrs
Differential revision:	https://reviews.freebsd.org/D33025
parent b0a7c008
...@@ -629,7 +629,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) ...@@ -629,7 +629,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
* If using hpts lets drop a random number in so * If using hpts lets drop a random number in so
* not all new connections fall on the same CPU. * not all new connections fall on the same CPU.
*/ */
inp->inp_hpts_cpu = inp->inp_input_cpu = hpts_random_cpu(inp); inp->inp_hpts_cpu = inp->inp_dropq_cpu = hpts_random_cpu(inp);
#endif #endif
refcount_init(&inp->inp_refcount, 1); /* Reference from socket. */ refcount_init(&inp->inp_refcount, 1); /* Reference from socket. */
INP_WLOCK(inp); INP_WLOCK(inp);
...@@ -1760,7 +1760,7 @@ in_pcbrele_rlocked(struct inpcb *inp) ...@@ -1760,7 +1760,7 @@ in_pcbrele_rlocked(struct inpcb *inp)
MPASS(inp->inp_flags & INP_FREED); MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL); MPASS(inp->inp_socket == NULL);
MPASS(inp->inp_in_hpts == 0); MPASS(inp->inp_in_hpts == 0);
MPASS(inp->inp_in_input == 0); MPASS(inp->inp_in_dropq == 0);
INP_RUNLOCK(inp); INP_RUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp); uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true); return (true);
...@@ -1778,7 +1778,7 @@ in_pcbrele_wlocked(struct inpcb *inp) ...@@ -1778,7 +1778,7 @@ in_pcbrele_wlocked(struct inpcb *inp)
MPASS(inp->inp_flags & INP_FREED); MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL); MPASS(inp->inp_socket == NULL);
MPASS(inp->inp_in_hpts == 0); MPASS(inp->inp_in_hpts == 0);
MPASS(inp->inp_in_input == 0); MPASS(inp->inp_in_dropq == 0);
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp); uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true); return (true);
......
...@@ -234,22 +234,21 @@ struct inpcb { ...@@ -234,22 +234,21 @@ struct inpcb {
* fields can *not* be collapsed into a signal bit field. * fields can *not* be collapsed into a signal bit field.
*/ */
#if defined(__amd64__) || defined(__i386__) #if defined(__amd64__) || defined(__i386__)
volatile uint8_t inp_in_hpts; /* on output hpts (lock b) */ uint8_t inp_in_hpts; /* on output hpts (lock b) */
volatile uint8_t inp_in_input; /* on input hpts (lock b) */ uint8_t inp_in_dropq; /* on input hpts (lock b) */
#else #else
volatile uint32_t inp_in_hpts; /* on output hpts (lock b) */ uint32_t inp_in_hpts; /* on output hpts (lock b) */
volatile uint32_t inp_in_input; /* on input hpts (lock b) */ uint32_t inp_in_dropq; /* on input hpts (lock b) */
#endif #endif
volatile uint16_t inp_hpts_cpu; /* Lock (i) */ volatile uint16_t inp_hpts_cpu; /* Lock (i) */
volatile uint16_t inp_irq_cpu; /* Set by LRO in behalf of or the driver */ volatile uint16_t inp_irq_cpu; /* Set by LRO in behalf of or the driver */
u_int inp_refcount; /* (i) refcount */ u_int inp_refcount; /* (i) refcount */
int inp_flags; /* (i) generic IP/datagram flags */ int inp_flags; /* (i) generic IP/datagram flags */
int inp_flags2; /* (i) generic IP/datagram flags #2*/ int inp_flags2; /* (i) generic IP/datagram flags #2*/
volatile uint16_t inp_input_cpu; /* Lock (i) */ uint16_t inp_dropq_cpu; /* Lock (i) */
volatile uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */ uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */
inp_input_cpu_set : 1, /* on input hpts (i) */ inp_dropq_cpu_set : 1, /* on input hpts (i) */
inp_hpts_calls :1, /* (i) from output hpts */ inp_hpts_calls :1, /* (i) from output hpts */
inp_input_calls :1, /* (i) from input hpts */
inp_irq_cpu_set :1, /* (i) from LRO/Driver */ inp_irq_cpu_set :1, /* (i) from LRO/Driver */
inp_spare_bits2 : 3; inp_spare_bits2 : 3;
uint8_t inp_numa_domain; /* numa domain */ uint8_t inp_numa_domain; /* numa domain */
...@@ -257,7 +256,8 @@ struct inpcb { ...@@ -257,7 +256,8 @@ struct inpcb {
struct socket *inp_socket; /* (i) back pointer to socket */ struct socket *inp_socket; /* (i) back pointer to socket */
uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */ uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */
uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */ uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */
TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */ uint32_t inp_dropq_gencnt;
TAILQ_ENTRY(inpcb) inp_dropq; /* hpts drop queue next lock(b) */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct ucred *inp_cred; /* (c) cache of socket cred */ struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */ u_int32_t inp_flow; /* (i) IPv6 flow information */
......
...@@ -62,15 +62,7 @@ __FBSDID("$FreeBSD$"); ...@@ -62,15 +62,7 @@ __FBSDID("$FreeBSD$");
* Of course this is a bare bones example and the stack will probably * Of course this is a bare bones example and the stack will probably
* have more consideration then just the above. * have more consideration then just the above.
* *
* Now the second function (actually two functions I guess :D) * In order to run input queued segments from the HPTS context the
* the tcp_hpts system provides is the ability to either abort
* a connection (later) or process input on a connection.
* Why would you want to do this? To keep processor locality
* and or not have to worry about untangling any recursive
* locks. The input function now is hooked to the new LRO
* system as well.
*
* In order to use the input redirection function the
* tcp stack must define an input function for * tcp stack must define an input function for
* tfb_do_queued_segments(). This function understands * tfb_do_queued_segments(). This function understands
* how to dequeue a array of packets that were input and * how to dequeue a array of packets that were input and
...@@ -109,6 +101,10 @@ __FBSDID("$FreeBSD$"); ...@@ -109,6 +101,10 @@ __FBSDID("$FreeBSD$");
* you have defined the tfb_do_segment_nounlock() as * you have defined the tfb_do_segment_nounlock() as
* described above. * described above.
* *
* Now the second function the tcp_hpts system provides is the ability
* to abort a connection later. Why would you want to do this?
* To not have to worry about untangling any recursive locks.
*
* The second feature of the input side of hpts is the * The second feature of the input side of hpts is the
* dropping of a connection. This is due to the way that * dropping of a connection. This is due to the way that
* locking may have occured on the INP_WLOCK. So if * locking may have occured on the INP_WLOCK. So if
...@@ -202,6 +198,8 @@ __FBSDID("$FreeBSD$"); ...@@ -202,6 +198,8 @@ __FBSDID("$FreeBSD$");
/* Each hpts has its own p_mtx which is used for locking */ /* Each hpts has its own p_mtx which is used for locking */
#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED) #define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED)
#define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx)
#define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx)
TAILQ_HEAD(hptsh, inpcb); TAILQ_HEAD(hptsh, inpcb);
struct tcp_hpts_entry { struct tcp_hpts_entry {
/* Cache line 0x00 */ /* Cache line 0x00 */
...@@ -226,10 +224,11 @@ struct tcp_hpts_entry { ...@@ -226,10 +224,11 @@ struct tcp_hpts_entry {
uint8_t p_fill[3]; /* Fill to 32 bits */ uint8_t p_fill[3]; /* Fill to 32 bits */
/* Cache line 0x40 */ /* Cache line 0x40 */
void *p_inp; void *p_inp;
struct hptsh p_input; /* For the tcp-input runner */ TAILQ_HEAD(, inpcb) p_dropq; /* Delayed drop queue */
/* Hptsi wheel */ /* Hptsi wheel */
struct hptsh *p_hptss; struct hptsh *p_hptss;
int32_t p_on_inqueue_cnt; /* Count on input queue in this hpts */ uint32_t p_dropq_cnt; /* Count on drop queue */
uint32_t p_dropq_gencnt;
uint32_t p_hpts_sleep_time; /* Current sleep interval having a max uint32_t p_hpts_sleep_time; /* Current sleep interval having a max
* of 255ms */ * of 255ms */
uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */ uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */
...@@ -270,7 +269,6 @@ static int hpts_does_tp_logging = 0; ...@@ -270,7 +269,6 @@ static int hpts_does_tp_logging = 0;
static int hpts_use_assigned_cpu = 1; static int hpts_use_assigned_cpu = 1;
static int32_t hpts_uses_oldest = OLDEST_THRESHOLD; static int32_t hpts_uses_oldest = OLDEST_THRESHOLD;
static void tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv);
static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout); static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout);
static void tcp_hpts_thread(void *ctx); static void tcp_hpts_thread(void *ctx);
static void tcp_init_hptsi(void *st); static void tcp_init_hptsi(void *st);
...@@ -558,41 +556,6 @@ hpts_sane_pace_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, struct hpt ...@@ -558,41 +556,6 @@ hpts_sane_pace_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, struct hpt
} }
} }
static inline void
hpts_sane_input_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp, int clear)
{
HPTS_MTX_ASSERT(hpts);
KASSERT(hpts->p_cpu == inp->inp_hpts_cpu,
("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp));
KASSERT(inp->inp_in_input != 0,
("%s: hpts:%p inp:%p not on the input hpts?", __FUNCTION__, hpts, inp));
TAILQ_REMOVE(&hpts->p_input, inp, inp_input);
hpts->p_on_inqueue_cnt--;
KASSERT(hpts->p_on_inqueue_cnt >= 0,
("Hpts in goes negative inp:%p hpts:%p",
inp, hpts));
KASSERT((((TAILQ_EMPTY(&hpts->p_input) != 0) && (hpts->p_on_inqueue_cnt == 0)) ||
((TAILQ_EMPTY(&hpts->p_input) == 0) && (hpts->p_on_inqueue_cnt > 0))),
("%s hpts:%p input cnt (p_on_inqueue):%d and queue state mismatch",
__FUNCTION__, hpts, hpts->p_on_inqueue_cnt));
if (clear)
inp->inp_in_input = 0;
}
static inline void
hpts_sane_input_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, int line)
{
HPTS_MTX_ASSERT(hpts);
KASSERT(hpts->p_cpu == inp->inp_hpts_cpu,
("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp));
KASSERT(inp->inp_in_input == 0,
("%s: hpts:%p inp:%p already on the input hpts?", __FUNCTION__, hpts, inp));
TAILQ_INSERT_TAIL(&hpts->p_input, inp, inp_input);
inp->inp_in_input = 1;
hpts->p_on_inqueue_cnt++;
in_pcbref(inp);
}
static struct tcp_hpts_entry * static struct tcp_hpts_entry *
tcp_hpts_lock(struct inpcb *inp) tcp_hpts_lock(struct inpcb *inp)
{ {
...@@ -614,19 +577,19 @@ tcp_hpts_lock(struct inpcb *inp) ...@@ -614,19 +577,19 @@ tcp_hpts_lock(struct inpcb *inp)
} }
static struct tcp_hpts_entry * static struct tcp_hpts_entry *
tcp_input_lock(struct inpcb *inp) tcp_dropq_lock(struct inpcb *inp)
{ {
struct tcp_hpts_entry *hpts; struct tcp_hpts_entry *hpts;
int32_t hpts_num; int32_t hpts_num;
again: again:
hpts_num = inp->inp_input_cpu; hpts_num = inp->inp_dropq_cpu;
hpts = tcp_pace.rp_ent[hpts_num]; hpts = tcp_pace.rp_ent[hpts_num];
KASSERT(mtx_owned(&hpts->p_mtx) == 0, KASSERT(mtx_owned(&hpts->p_mtx) == 0,
("Hpts:%p owns mtx prior-to lock line:%d", ("Hpts:%p owns mtx prior-to lock line:%d",
hpts, __LINE__)); hpts, __LINE__));
mtx_lock(&hpts->p_mtx); mtx_lock(&hpts->p_mtx);
if (hpts_num != inp->inp_input_cpu) { if (hpts_num != inp->inp_dropq_cpu) {
mtx_unlock(&hpts->p_mtx); mtx_unlock(&hpts->p_mtx);
goto again; goto again;
} }
...@@ -652,13 +615,38 @@ tcp_hpts_remove_locked_output(struct tcp_hpts_entry *hpts, struct inpcb *inp, in ...@@ -652,13 +615,38 @@ tcp_hpts_remove_locked_output(struct tcp_hpts_entry *hpts, struct inpcb *inp, in
} }
static void static void
tcp_hpts_remove_locked_input(struct tcp_hpts_entry *hpts, struct inpcb *inp, int32_t flags, int32_t line) tcp_dropq_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp)
{ {
bool released __diagused;
HPTS_MTX_ASSERT(hpts); HPTS_MTX_ASSERT(hpts);
if (inp->inp_in_input) { INP_WLOCK_ASSERT(inp);
hpts_sane_input_remove(hpts, inp, 1);
tcp_remove_hpts_ref(inp, hpts, line); if (inp->inp_in_dropq != IHPTS_ONQUEUE)
return;
MPASS(hpts->p_cpu == inp->inp_dropq_cpu);
if (__predict_true(inp->inp_dropq_gencnt == hpts->p_dropq_gencnt)) {
TAILQ_REMOVE(&hpts->p_dropq, inp, inp_dropq);
MPASS(hpts->p_dropq_cnt > 0);
hpts->p_dropq_cnt--;
inp->inp_in_dropq = IHPTS_NONE;
released = in_pcbrele_wlocked(inp);
MPASS(released == false);
} else {
/*
* tcp_delayed_drop() now owns the TAILQ head of this inp.
* Can't TAILQ_REMOVE, just mark it.
*/
#ifdef INVARIANTS
struct inpcb *tmp;
TAILQ_FOREACH(tmp, &hpts->p_dropq, inp_dropq)
MPASS(tmp != inp);
#endif
inp->inp_in_dropq = IHPTS_MOVING;
} }
} }
/* /*
...@@ -669,7 +657,7 @@ tcp_hpts_remove_locked_input(struct tcp_hpts_entry *hpts, struct inpcb *inp, int ...@@ -669,7 +657,7 @@ tcp_hpts_remove_locked_input(struct tcp_hpts_entry *hpts, struct inpcb *inp, int
* *
* Valid values in the flags are * Valid values in the flags are
* HPTS_REMOVE_OUTPUT - remove from the output of the hpts. * HPTS_REMOVE_OUTPUT - remove from the output of the hpts.
* HPTS_REMOVE_INPUT - remove from the input of the hpts. * HPTS_REMOVE_DROPQ - remove from the drop queue of the hpts.
* Note that you can use one or both values together * Note that you can use one or both values together
* and get two actions. * and get two actions.
*/ */
...@@ -684,9 +672,9 @@ __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line) ...@@ -684,9 +672,9 @@ __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line)
tcp_hpts_remove_locked_output(hpts, inp, flags, line); tcp_hpts_remove_locked_output(hpts, inp, flags, line);
mtx_unlock(&hpts->p_mtx); mtx_unlock(&hpts->p_mtx);
} }
if (flags & HPTS_REMOVE_INPUT) { if (flags & HPTS_REMOVE_DROPQ) {
hpts = tcp_input_lock(inp); hpts = tcp_dropq_lock(inp);
tcp_hpts_remove_locked_input(hpts, inp, flags, line); tcp_dropq_remove(hpts, inp);
mtx_unlock(&hpts->p_mtx); mtx_unlock(&hpts->p_mtx);
} }
} }
...@@ -1097,31 +1085,29 @@ __tcp_hpts_insert(struct inpcb *inp, uint32_t slot, int32_t line){ ...@@ -1097,31 +1085,29 @@ __tcp_hpts_insert(struct inpcb *inp, uint32_t slot, int32_t line){
} }
void void
__tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason, int32_t line) tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason)
{ {
struct tcp_hpts_entry *hpts; struct tcp_hpts_entry *hpts;
struct tcpcb *tp; struct tcpcb *tp = intotcpcb(inp);
tp = intotcpcb(inp); INP_WLOCK_ASSERT(inp);
hpts = tcp_input_lock(tp->t_inpcb); inp->inp_hpts_drop_reas = reason;
if (inp->inp_in_input == 0) { if (inp->inp_in_dropq != IHPTS_NONE)
/* Ok we need to set it on the hpts in the current slot */ return;
hpts_sane_input_insert(hpts, inp, line); hpts = tcp_dropq_lock(tp->t_inpcb);
if ((hpts->p_hpts_active == 0) && MPASS(hpts->p_cpu == inp->inp_dropq_cpu);
(hpts->p_on_min_sleep == 0)){
/* TAILQ_INSERT_TAIL(&hpts->p_dropq, inp, inp_dropq);
* Activate the hpts if it is sleeping. inp->inp_in_dropq = IHPTS_ONQUEUE;
*/ inp->inp_dropq_gencnt = hpts->p_dropq_gencnt;
hpts->p_direct_wake = 1; hpts->p_dropq_cnt++;
tcp_wakehpts(hpts); in_pcbref(inp);
}
} else if ((hpts->p_hpts_active == 0) && if ((hpts->p_hpts_active == 0) && (hpts->p_on_min_sleep == 0)){
(hpts->p_on_min_sleep == 0)){
hpts->p_direct_wake = 1; hpts->p_direct_wake = 1;
tcp_wakehpts(hpts); tcp_wakehpts(hpts);
} }
inp->inp_hpts_drop_reas = reason; HPTS_UNLOCK(hpts);
mtx_unlock(&hpts->p_mtx);
} }
static uint16_t static uint16_t
...@@ -1136,8 +1122,8 @@ hpts_random_cpu(struct inpcb *inp){ ...@@ -1136,8 +1122,8 @@ hpts_random_cpu(struct inpcb *inp){
* If one has been set use it i.e. we want both in and out on the * If one has been set use it i.e. we want both in and out on the
* same hpts. * same hpts.
*/ */
if (inp->inp_input_cpu_set) { if (inp->inp_dropq_cpu_set) {
return (inp->inp_input_cpu); return (inp->inp_dropq_cpu);
} else if (inp->inp_hpts_cpu_set) { } else if (inp->inp_hpts_cpu_set) {
return (inp->inp_hpts_cpu); return (inp->inp_hpts_cpu);
} }
...@@ -1160,8 +1146,8 @@ hpts_cpuid(struct inpcb *inp, int *failed) ...@@ -1160,8 +1146,8 @@ hpts_cpuid(struct inpcb *inp, int *failed)
* If one has been set use it i.e. we want both in and out on the * If one has been set use it i.e. we want both in and out on the
* same hpts. * same hpts.
*/ */
if (inp->inp_input_cpu_set) { if (inp->inp_dropq_cpu_set) {
return (inp->inp_input_cpu); return (inp->inp_dropq_cpu);
} else if (inp->inp_hpts_cpu_set) { } else if (inp->inp_hpts_cpu_set) {
return (inp->inp_hpts_cpu); return (inp->inp_hpts_cpu);
} }
...@@ -1249,117 +1235,50 @@ tcp_drop_in_pkts(struct tcpcb *tp) ...@@ -1249,117 +1235,50 @@ tcp_drop_in_pkts(struct tcpcb *tp)
* list. * list.
*/ */
static void static void
tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv) tcp_delayed_drop(struct tcp_hpts_entry *hpts)
{ {
TAILQ_HEAD(, inpcb) head = TAILQ_HEAD_INITIALIZER(head);
struct inpcb *inp, *tmp;
struct tcpcb *tp; struct tcpcb *tp;
struct inpcb *inp;
uint16_t drop_reason;
int16_t set_cpu;
uint32_t did_prefetch = 0;
int dropped;
HPTS_MTX_ASSERT(hpts); HPTS_MTX_ASSERT(hpts);
NET_EPOCH_ASSERT(); NET_EPOCH_ASSERT();
while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) { TAILQ_SWAP(&head, &hpts->p_dropq, inpcb, inp_dropq);
HPTS_MTX_ASSERT(hpts); hpts->p_dropq_cnt = 0;
hpts_sane_input_remove(hpts, inp, 0); hpts->p_dropq_gencnt++;
if (inp->inp_input_cpu_set == 0) { HPTS_UNLOCK(hpts);
set_cpu = 1;
} else { TAILQ_FOREACH_SAFE(inp, &head, inp_dropq, tmp) {
set_cpu = 0;
}
hpts->p_inp = inp;
drop_reason = inp->inp_hpts_drop_reas;
inp->inp_in_input = 0;
mtx_unlock(&hpts->p_mtx);
INP_WLOCK(inp); INP_WLOCK(inp);
#ifdef VIMAGE MPASS(inp->inp_hpts_drop_reas != 0);
CURVNET_SET(inp->inp_vnet); if (__predict_false(inp->inp_in_dropq == IHPTS_MOVING)) {
#endif inp->inp_in_dropq = IHPTS_NONE;
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) { if (in_pcbrele_wlocked(inp) == false)
out:
hpts->p_inp = NULL;
if (in_pcbrele_wlocked(inp) == 0) {
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
}
#ifdef VIMAGE
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
continue; continue;
} }
tp = intotcpcb(inp); MPASS(inp->inp_in_dropq == IHPTS_ONQUEUE);
if ((tp == NULL) || (tp->t_inpcb == NULL)) { inp->inp_in_dropq = IHPTS_NONE;
goto out; if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) {
} if (in_pcbrele_wlocked(inp) == false)
if (drop_reason) {
/* This tcb is being destroyed for drop_reason */
tcp_drop_in_pkts(tp);
tp = tcp_drop(tp, drop_reason);
if (tp == NULL) {
INP_WLOCK(inp);
}
if (in_pcbrele_wlocked(inp) == 0)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
#ifdef VIMAGE
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
continue; continue;
} }
if (set_cpu) { CURVNET_SET(inp->inp_vnet);
/* if (__predict_true((tp = intotcpcb(inp)) != NULL)) {
* Setup so the next time we will move to the right MPASS(tp->t_inpcb == inp);
* CPU. This should be a rare event. It will
* sometimes happens when we are the client side
* (usually not the server). Somehow tcp_output()
* gets called before the tcp_do_segment() sets the
* intial state. This means the r_cpu and r_hpts_cpu
* is 0. We get on the hpts, and then tcp_input()
* gets called setting up the r_cpu to the correct
* value. The hpts goes off and sees the mis-match.
* We simply correct it here and the CPU will switch
* to the new hpts nextime the tcb gets added to the
* the hpts (not this time) :-)
*/
tcp_set_hpts(inp);
}
if (tp->t_fb_ptr != NULL) {
kern_prefetch(tp->t_fb_ptr, &did_prefetch);
did_prefetch = 1;
}
if ((tp->t_fb->tfb_do_queued_segments != NULL) && tp->t_in_pkt) {
if (inp->inp_in_input)
tcp_hpts_remove(inp, HPTS_REMOVE_INPUT);
dropped = (*tp->t_fb->tfb_do_queued_segments)(inp->inp_socket, tp, 0);
if (dropped) {
/* Re-acquire the wlock so we can release the reference */
INP_WLOCK(inp);
}
} else if (tp->t_in_pkt) {
/*
* We reach here only if we had a
* stack that supported INP_SUPPORTS_MBUFQ
* and then somehow switched to a stack that
* does not. The packets are basically stranded
* and would hang with the connection until
* cleanup without this code. Its not the
* best way but I know of no other way to
* handle it since the stack needs functions
* it does not have to handle queued packets.
*/
tcp_drop_in_pkts(tp); tcp_drop_in_pkts(tp);
tp = tcp_drop(tp, inp->inp_hpts_drop_reas);
if (tp == NULL)
INP_WLOCK(inp);
} }
if (in_pcbrele_wlocked(inp) == 0) if (in_pcbrele_wlocked(inp) == false)
INP_WUNLOCK(inp); INP_WUNLOCK(inp);
INP_UNLOCK_ASSERT(inp);
#ifdef VIMAGE
CURVNET_RESTORE(); CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
hpts->p_inp = NULL;
} }
mtx_lock(&hpts->p_mtx); /* XXXGL */
} }
static void static void
...@@ -1489,10 +1408,10 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout) ...@@ -1489,10 +1408,10 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
hpts->p_nxt_slot = hpts->p_prev_slot; hpts->p_nxt_slot = hpts->p_prev_slot;
hpts->p_runningslot = hpts_slot(hpts->p_prev_slot, 1); hpts->p_runningslot = hpts_slot(hpts->p_prev_slot, 1);
} }
KASSERT((((TAILQ_EMPTY(&hpts->p_input) != 0) && (hpts->p_on_inqueue_cnt == 0)) || KASSERT((((TAILQ_EMPTY(&hpts->p_dropq) != 0) && (hpts->p_dropq_cnt == 0)) ||
((TAILQ_EMPTY(&hpts->p_input) == 0) && (hpts->p_on_inqueue_cnt > 0))), ((TAILQ_EMPTY(&hpts->p_dropq) == 0) && (hpts->p_dropq_cnt > 0))),
("%s hpts:%p in_hpts cnt:%d and queue state mismatch", ("%s hpts:%p in_hpts cnt:%d and queue state mismatch",
__FUNCTION__, hpts, hpts->p_on_inqueue_cnt)); __FUNCTION__, hpts, hpts->p_dropq_cnt));
HPTS_MTX_ASSERT(hpts); HPTS_MTX_ASSERT(hpts);
if (hpts->p_on_queue_cnt == 0) { if (hpts->p_on_queue_cnt == 0) {
goto no_one; goto no_one;
...@@ -1716,10 +1635,10 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout) ...@@ -1716,10 +1635,10 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
* Check to see if we took an excess amount of time and need to run * Check to see if we took an excess amount of time and need to run
* more ticks (if we did not hit eno-bufs). * more ticks (if we did not hit eno-bufs).
*/ */
KASSERT((((TAILQ_EMPTY(&hpts->p_input) != 0) && (hpts->p_on_inqueue_cnt == 0)) || KASSERT((((TAILQ_EMPTY(&hpts->p_dropq) != 0) && (hpts->p_dropq_cnt == 0)) ||
((TAILQ_EMPTY(&hpts->p_input) == 0) && (hpts->p_on_inqueue_cnt > 0))), ((TAILQ_EMPTY(&hpts->p_dropq) == 0) && (hpts->p_dropq_cnt > 0))),
("%s hpts:%p in_hpts cnt:%d queue state mismatch", ("%s hpts:%p in_hpts cnt:%d queue state mismatch",
__FUNCTION__, hpts, hpts->p_on_inqueue_cnt));