diff options
author | David S. Miller <davem@davemloft.net> | 2018-10-10 22:27:38 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-10-10 22:27:38 -0700 |
commit | 49b538e79b59378e7ac09742bb4bd57b3b91fd94 (patch) | |
tree | 7ad8f1e4507d6cecd9856cff0902528bc373d91f /net/rxrpc/input.c | |
parent | 9a4890bd6d6325a1c88564a20ab310b2d56f6094 (diff) | |
parent | c1e15b4944c9fa7fbbb74f7a5920a1e31b4b965a (diff) | |
download | linux-49b538e79b59378e7ac09742bb4bd57b3b91fd94.tar.bz2 |
Merge tag 'rxrpc-fixes-20181008' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
David Howells says:
====================
rxrpc: Fix packet reception code
Here are a set of patches that prepares for and fix problems in rxrpc's
package reception code. There serious problems are:
(A) There's a window between binding the socket and setting the data_ready
hook in which packets can find their way into the UDP socket's receive
queues.
(B) The skb_recv_udp() will return an error (and clear the error state) if
there was an error on the Tx side. rxrpc doesn't handle this.
(C) The rxrpc data_ready handler doesn't fully drain the UDP receive
queue.
(D) The rxrpc data_ready handler assumes it is called in a non-reentrant
state.
The second patch fixes (A) - (C); the third patch renders (B) and (C)
non-issues by using the recap_rcv hook instead of data_ready - and the
final patch fixes (D). That last is the most complex.
The preparatory patches are:
(1) Fix some places that are doing things in the wrong net namespace.
(2) Stop taking the rcu read lock as it's held by the IP input routine in
the call chain.
(3) Only end the Tx phase if *we* rotated the final packet out of the Tx
buffer.
(4) Don't assume that the call state won't change after dropping the
call_state lock.
(5) Only take receive window and MTU suze parameters from an ACK packet if
it's the latest ACK packet.
(6) Record connection-level abort information correctly.
(7) Fix a trace line.
And then there are three main patches - note that these are mixed in with
the preparatory patches somewhat:
(1) Fix the setup window (A), skb_recv_udp() error check (B) and packet
drainage (C).
(2) Switch to using the encap_rcv instead of data_ready to cut out the
effects of the UDP read queues and get the packets delivered directly.
(3) Add more locking into the various packet input paths to defend against
re-entrance (D).
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rxrpc/input.c')
-rw-r--r-- | net/rxrpc/input.c | 253 |
1 files changed, 132 insertions, 121 deletions
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 800f5b8a1baa..570b49d2da42 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -216,10 +216,11 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, /* * Apply a hard ACK by advancing the Tx window. */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, +static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; + bool rot_last = false; int ix; u8 annotation; @@ -243,15 +244,17 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = list; list = skb; - if (annotation & RXRPC_TX_ANNO_LAST) + if (annotation & RXRPC_TX_ANNO_LAST) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + rot_last = true; + } if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) summary->nr_rot_new_acks++; } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + trace_rxrpc_transmit(call, (rot_last ? rxrpc_transmit_rotate_last : rxrpc_transmit_rotate)); wake_up(&call->waitq); @@ -262,6 +265,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = NULL; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } + + return rot_last; } /* @@ -273,23 +278,26 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, const char *abort_why) { + unsigned int state; ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); write_lock(&call->state_lock); - switch (call->state) { + state = call->state; + switch (state) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: if (reply_begun) - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY; else - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY; break; case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); rxrpc_notify_socket(call); + state = call->state; break; default: @@ -297,11 +305,10 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, } write_unlock(&call->state_lock); - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); - } else { + else trace_rxrpc_transmit(call, rxrpc_transmit_end); - } _leave(" = ok"); return true; @@ -332,11 +339,11 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } - if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_proto_abort("TXL", call, top); - return false; + if (!rxrpc_rotate_tx_window(call, top, &summary)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } } if (!rxrpc_end_tx_phase(call, true, "ETD")) return false; @@ -452,13 +459,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, } } + spin_lock(&call->input_lock); + /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && !rxrpc_receiving_reply(call)) - return; + goto unlock; call->ackr_prev_seq = seq; @@ -488,12 +497,16 @@ next_subpacket: if (flags & RXRPC_LAST_PACKET) { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq != call->rx_top) - return rxrpc_proto_abort("LSN", call, seq); + seq != call->rx_top) { + rxrpc_proto_abort("LSN", call, seq); + goto unlock; + } } else { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - after_eq(seq, call->rx_top)) - return rxrpc_proto_abort("LSA", call, seq); + after_eq(seq, call->rx_top)) { + rxrpc_proto_abort("LSA", call, seq); + goto unlock; + } } trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); @@ -560,8 +573,10 @@ next_subpacket: skip: offset += len; if (flags & RXRPC_JUMBO_PACKET) { - if (skb_copy_bits(skb, offset, &flags, 1) < 0) - return rxrpc_proto_abort("XJF", call, seq); + if (skb_copy_bits(skb, offset, &flags, 1) < 0) { + rxrpc_proto_abort("XJF", call, seq); + goto unlock; + } offset += sizeof(struct rxrpc_jumbo_header); seq++; serial++; @@ -601,6 +616,9 @@ ack: trace_rxrpc_notify_socket(call->debug_id, serial); rxrpc_notify_socket(call); } + +unlock: + spin_unlock(&call->input_lock); _leave(" [queued]"); } @@ -687,15 +705,14 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, ping_time = call->ping_time; smp_rmb(); - ping_serial = call->ping_serial; + ping_serial = READ_ONCE(call->ping_serial); if (orig_serial == call->acks_lost_ping) rxrpc_input_check_for_lost_ack(call); - if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || - before(orig_serial, ping_serial)) + if (before(orig_serial, ping_serial) || + !test_and_clear_bit(RXRPC_CALL_PINGING, &call->flags)) return; - clear_bit(RXRPC_CALL_PINGING, &call->flags); if (after(orig_serial, ping_serial)) return; @@ -861,15 +878,32 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + return; + + buf.info.rxMTU = 0; ioffset = offset + nr_acks + 3; - if (skb->len >= ioffset + sizeof(buf.info)) { - if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) - return rxrpc_proto_abort("XAI", call, 0); + if (skb->len >= ioffset + sizeof(buf.info) && + skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + + spin_lock(&call->input_lock); + + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + goto out; + call->acks_latest_ts = skb->tstamp; + call->acks_latest = sp->hdr.serial; + + /* Parse rwind and mtu sizes if provided. */ + if (buf.info.rxMTU) rxrpc_input_ackinfo(call, skb, &buf.info); - } - if (first_soft_ack == 0) - return rxrpc_proto_abort("AK0", call, 0); + if (first_soft_ack == 0) { + rxrpc_proto_abort("AK0", call, 0); + goto out; + } /* Ignore ACKs unless we are or have just been transmitting. */ switch (READ_ONCE(call->state)) { @@ -879,39 +913,35 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, case RXRPC_CALL_SERVER_AWAIT_ACK: break; default: - return; - } - - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) { - _debug("discard ACK %d <= %d", - sp->hdr.serial, call->acks_latest); - return; + goto out; } - call->acks_latest_ts = skb->tstamp; - call->acks_latest = sp->hdr.serial; if (before(hard_ack, call->tx_hard_ack) || - after(hard_ack, call->tx_top)) - return rxrpc_proto_abort("AKW", call, 0); - if (nr_acks > call->tx_top - hard_ack) - return rxrpc_proto_abort("AKN", call, 0); + after(hard_ack, call->tx_top)) { + rxrpc_proto_abort("AKW", call, 0); + goto out; + } + if (nr_acks > call->tx_top - hard_ack) { + rxrpc_proto_abort("AKN", call, 0); + goto out; + } - if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack, &summary); + if (after(hard_ack, call->tx_hard_ack)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + rxrpc_end_tx_phase(call, false, "ETA"); + goto out; + } + } if (nr_acks > 0) { - if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) - return rxrpc_proto_abort("XSA", call, 0); + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) { + rxrpc_proto_abort("XSA", call, 0); + goto out; + } rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, &summary); } - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_end_tx_phase(call, false, "ETA"); - return; - } - if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && @@ -920,7 +950,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, false, true, rxrpc_propose_ack_ping_for_lost_reply); - return rxrpc_congestion_management(call, skb, &summary, acked_serial); + rxrpc_congestion_management(call, skb, &summary, acked_serial); +out: + spin_unlock(&call->input_lock); } /* @@ -933,9 +965,12 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top, &summary); - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + spin_lock(&call->input_lock); + + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, "ETL"); + + spin_unlock(&call->input_lock); } /* @@ -1018,18 +1053,19 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, } /* - * Handle a new call on a channel implicitly completing the preceding call on - * that channel. + * Handle a new service call on a channel implicitly completing the preceding + * call on that channel. This does not apply to client conns. * * TODO: If callNumber > call_id + 1, renegotiate security. */ -static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, +static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, + struct rxrpc_connection *conn, struct rxrpc_call *call) { switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); - break; + /* Fall through */ case RXRPC_CALL_COMPLETE: break; default: @@ -1037,11 +1073,13 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } + trace_rxrpc_improper_term(call); break; } - trace_rxrpc_improper_term(call); + spin_lock(&rx->incoming_lock); __rxrpc_disconnect_call(conn, call); + spin_unlock(&rx->incoming_lock); rxrpc_notify_socket(call); } @@ -1120,8 +1158,10 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * The socket is locked by the caller and this prevents the socket from being * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. + * + * Called with the RCU read lock held from the IP layer via UDP. */ -void rxrpc_data_ready(struct sock *udp_sk) +int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { struct rxrpc_connection *conn; struct rxrpc_channel *chan; @@ -1130,38 +1170,17 @@ void rxrpc_data_ready(struct sock *udp_sk) struct rxrpc_local *local = udp_sk->sk_user_data; struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; - struct sk_buff *skb; unsigned int channel; - int ret, skew = 0; + int skew = 0; _enter("%p", udp_sk); - ASSERT(!irqs_disabled()); - - skb = skb_recv_udp(udp_sk, 0, 1, &ret); - if (!skb) { - if (ret == -EAGAIN) - return; - _debug("UDP socket error %d", ret); - return; - } - if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); rxrpc_new_skb(skb, rxrpc_skb_rx_received); - _net("recv skb %p", skb); - - /* we'll probably need to checksum it (didn't call sock_recvmsg) */ - if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); - _leave(" [CSUM failed]"); - return; - } - - __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); + skb_pull(skb, sizeof(struct udphdr)); /* The UDP protocol already released all skb resources; * we are free to add our own data there. @@ -1176,11 +1195,13 @@ void rxrpc_data_ready(struct sock *udp_sk) static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); - return; + rxrpc_free_skb(skb, rxrpc_skb_rx_lost); + return 0; } } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); trace_rxrpc_rx_packet(sp); switch (sp->hdr.type) { @@ -1234,8 +1255,6 @@ void rxrpc_data_ready(struct sock *udp_sk) if (sp->hdr.serviceId == 0) goto bad_message; - rcu_read_lock(); - if (rxrpc_to_server(sp)) { /* Weed out packets to services we're not offering. Packets * that would begin a call are explicitly rejected and the rest @@ -1247,7 +1266,7 @@ void rxrpc_data_ready(struct sock *udp_sk) if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.seq == 1) goto unsupported_service; - goto discard_unlock; + goto discard; } } @@ -1257,17 +1276,23 @@ void rxrpc_data_ready(struct sock *udp_sk) goto wrong_security; if (sp->hdr.serviceId != conn->service_id) { - if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) || - conn->service_id != conn->params.service_id) + int old_id; + + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) + goto reupgrade; + old_id = cmpxchg(&conn->service_id, conn->params.service_id, + sp->hdr.serviceId); + + if (old_id != conn->params.service_id && + old_id != sp->hdr.serviceId) goto reupgrade; - conn->service_id = sp->hdr.serviceId; } if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } /* Note the serial number skew here */ @@ -1286,19 +1311,19 @@ void rxrpc_data_ready(struct sock *udp_sk) /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) - goto discard_unlock; + goto discard; if (sp->hdr.callNumber == chan->last_call) { if (chan->call || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - goto discard_unlock; + goto discard; /* For the previous service call, if completed * successfully, we discard all further packets. */ if (rxrpc_conn_is_service(conn) && chan->last_type == RXRPC_PACKET_TYPE_ACK) - goto discard_unlock; + goto discard; /* But otherwise we need to retransmit the final packet * from data cached in the connection record. @@ -1309,18 +1334,16 @@ void rxrpc_data_ready(struct sock *udp_sk) sp->hdr.serial, sp->hdr.flags, 0); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } call = rcu_dereference(chan->call); if (sp->hdr.callNumber > chan->call_id) { - if (rxrpc_to_client(sp)) { - rcu_read_unlock(); + if (rxrpc_to_client(sp)) goto reject_packet; - } if (call) - rxrpc_input_implicit_end_call(conn, call); + rxrpc_input_implicit_end_call(rx, conn, call); call = NULL; } @@ -1337,55 +1360,42 @@ void rxrpc_data_ready(struct sock *udp_sk) if (!call || atomic_read(&call->usage) == 0) { if (rxrpc_to_client(sp) || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - goto bad_message_unlock; + goto bad_message; if (sp->hdr.seq != 1) - goto discard_unlock; - call = rxrpc_new_incoming_call(local, rx, peer, conn, skb); - if (!call) { - rcu_read_unlock(); + goto discard; + call = rxrpc_new_incoming_call(local, rx, skb); + if (!call) goto reject_packet; - } rxrpc_send_ping(call, skb, skew); mutex_unlock(&call->user_mutex); } rxrpc_input_call_packet(call, skb, skew); - goto discard_unlock; + goto discard; -discard_unlock: - rcu_read_unlock(); discard: rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: trace_rxrpc_rx_done(0, 0); - return; - -out_unlock: - rcu_read_unlock(); - goto out; + return 0; wrong_security: - rcu_read_unlock(); trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RXKADINCONSISTENCY, EBADMSG); skb->priority = RXKADINCONSISTENCY; goto post_abort; unsupported_service: - rcu_read_unlock(); trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); skb->priority = RX_INVALID_OPERATION; goto post_abort; reupgrade: - rcu_read_unlock(); trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); goto protocol_error; -bad_message_unlock: - rcu_read_unlock(); bad_message: trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); @@ -1397,4 +1407,5 @@ reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); + return 0; } |