From 60c7a3c9c77239d5a7430c42a2c796881716cca1 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 2 Nov 2013 02:36:31 -0700 Subject: Bluetooth: Fix issue with RFCOMM getsockopt operation The commit 94a86df01082557e2de45865e538d7fb6c46231c seem to have uncovered a long standing bug that did not trigger so far. BUG: unable to handle kernel paging request at 00000009dd503502 IP: [] rfcomm_sock_getsockopt+0x128/0x200 PGD 0 Oops: 0000 [#1] SMP Modules linked in: ath5k ath mac80211 cfg80211 CPU: 2 PID: 1459 Comm: bluetoothd Not tainted 3.11.0-133163-gcebd830 #2 Hardware name: System manufacturer System Product Name/P6T DELUXE V2, BIOS 1202 12/22/2010 task: ffff8803304106a0 ti: ffff88033046a000 task.ti: ffff88033046a000 RIP: 0010:[] [] rfcomm_sock_getsockopt+0x128/0x200 RSP: 0018:ffff88033046bed8 EFLAGS: 00010246 RAX: 00000009dd503502 RBX: 0000000000000003 RCX: 00007fffa2ed5548 RDX: 0000000000000003 RSI: 0000000000000012 RDI: ffff88032fd37480 RBP: ffff88033046bf28 R08: 00007fffa2ed554c R09: ffff88032f5707d8 R10: 00007fffa2ed5548 R11: 0000000000000202 R12: ffff880330bbd000 R13: 00007fffa2ed5548 R14: 0000000000000003 R15: 00007fffa2ed554c FS: 00007fc44cfac700(0000) GS:ffff88033fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000009dd503502 CR3: 00000003304c2000 CR4: 00000000000007e0 Stack: ffff88033046bf28 ffffffff815b0f2f ffff88033046bf18 0002ffff81105ef6 0000000600000000 ffff88032fd37480 0000000000000012 00007fffa2ed5548 0000000000000003 00007fffa2ed554c ffff88033046bf78 ffffffff814c0380 Call Trace: [] ? rfcomm_sock_setsockopt+0x5f/0x190 [] SyS_getsockopt+0x60/0xb0 [] system_call_fastpath+0x16/0x1b Code: 02 00 00 00 0f 47 d0 4c 89 ef e8 74 13 cd ff 83 f8 01 19 c9 f7 d1 83 e1 f2 e9 4b ff ff ff 0f 1f 44 00 00 49 8b 84 24 70 02 00 00 <4c> 8b 30 4c 89 c0 e8 2d 19 cd ff 85 c0 49 89 d7 b9 f2 ff ff ff RIP [] rfcomm_sock_getsockopt+0x128/0x200 RSP CR2: 00000009dd503502 It triggers in the following segment of the code: 0x1313 is in rfcomm_sock_getsockopt (net/bluetooth/rfcomm/sock.c:743). 738 739 static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) 740 { 741 struct sock *sk = sock->sk; 742 struct rfcomm_conninfo cinfo; 743 struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn; 744 int len, err = 0; 745 u32 opt; 746 747 BT_DBG("sk %p", sk); The l2cap_pi(sk) is wrong here since it should have been rfcomm_pi(sk), but that socket of course does not contain the low-level connection details requested here. Tracking down the actual offending commit, it seems that this has been introduced when doing some L2CAP refactoring: commit 8c1d787be4b62d2d1b6f04953eca4bcf7c839d44 Author: Gustavo F. Padovan Date: Wed Apr 13 20:23:55 2011 -0300 @@ -743,6 +743,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u struct sock *sk = sock->sk; struct sock *l2cap_sk; struct rfcomm_conninfo cinfo; + struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn; int len, err = 0; u32 opt; @@ -787,8 +788,8 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; - cinfo.hci_handle = l2cap_pi(l2cap_sk)->conn->hcon->handle; - memcpy(cinfo.dev_class, l2cap_pi(l2cap_sk)->conn->hcon->dev_class, 3); + cinfo.hci_handle = conn->hcon->handle; + memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); The l2cap_sk got accidentally mixed into the sk (which is RFCOMM) and now causing a problem within getsocketopt() system call. To fix this, just re-introduce l2cap_sk and make sure the right socket is used for the low-level connection details. Reported-by: Fabio Rossi Reported-by: Janusz Dziedzic Tested-by: Janusz Dziedzic Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/rfcomm/sock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 30b3721dc6d7..7096cfe3bd5a 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -732,8 +732,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; + struct sock *l2cap_sk; + struct l2cap_conn *conn; struct rfcomm_conninfo cinfo; - struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn; int len, err = 0; u32 opt; @@ -776,6 +777,9 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u break; } + l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; + conn = l2cap_pi(l2cap_sk)->chan->conn; + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); -- cgit v1.2.3 From c507f138fc7c2025d75b65018810dc0561d0bdb9 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Tue, 5 Nov 2013 16:02:24 +0900 Subject: Bluetooth: Fix RFCOMM bind fail for L2CAP sock L2CAP socket bind checks its bdaddr type but RFCOMM kernel thread does not assign proper bdaddr type for L2CAP sock. This can cause that RFCOMM failure. Signed-off-by: Seung-Woo Kim Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ca957d34b0c8..292fa64a19dd 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -694,6 +694,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_family = AF_BLUETOOTH; addr.l2_psm = 0; addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (*err < 0) goto failed; @@ -1983,6 +1984,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) addr.l2_family = AF_BLUETOOTH; addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (err < 0) { BT_ERR("Bind failed %d", err); -- cgit v1.2.3 From 8992da099332db896144465a01ad636f58bdebd9 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Tue, 5 Nov 2013 17:15:42 +0900 Subject: Bluetooth: Fix to set proper bdaddr_type for RFCOMM connect L2CAP socket validates proper bdaddr_type for connect, so this patch fixes to set explictly bdaddr_type for RFCOMM connect. Signed-off-by: Seung-Woo Kim Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 292fa64a19dd..ce7521120d69 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -720,6 +720,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_family = AF_BLUETOOTH; addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); if (*err == 0 || *err == -EINPROGRESS) return s; -- cgit v1.2.3 From 31e8ce80baa7eaed9eec611deba982b24beed9e5 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Tue, 5 Nov 2013 18:46:33 +0900 Subject: Bluetooth: Fix crash in l2cap_chan_send after l2cap_chan_del Removing a bond and disconnecting from a specific remote device can cause l2cap_chan_send() is called after l2cap_chan_del() is called. This causes following crash. [ 1384.972086] Unable to handle kernel NULL pointer dereference at virtual address 00000008 [ 1384.972090] pgd = c0004000 [ 1384.972125] [00000008] *pgd=00000000 [ 1384.972137] Internal error: Oops: 17 [#1] PREEMPT SMP ARM [ 1384.972144] Modules linked in: [ 1384.972156] CPU: 0 PID: 841 Comm: krfcommd Not tainted 3.10.14-gdf22a71-dirty #435 [ 1384.972162] task: df29a100 ti: df178000 task.ti: df178000 [ 1384.972182] PC is at l2cap_create_basic_pdu+0x30/0x1ac [ 1384.972191] LR is at l2cap_chan_send+0x100/0x1d4 [ 1384.972198] pc : [] lr : [] psr: 40000113 [ 1384.972198] sp : df179d40 ip : c083a010 fp : 00000008 [ 1384.972202] r10: 00000004 r9 : 0000065a r8 : 000003f5 [ 1384.972206] r7 : 00000000 r6 : 00000000 r5 : df179e84 r4 : da557000 [ 1384.972210] r3 : 00000000 r2 : 00000004 r1 : df179e84 r0 : 00000000 [ 1384.972215] Flags: nZcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel [ 1384.972220] Control: 10c53c7d Table: 5c8b004a DAC: 00000015 [ 1384.972224] Process krfcommd (pid: 841, stack limit = 0xdf178238) [ 1384.972229] Stack: (0xdf179d40 to 0xdf17a000) [ 1384.972238] 9d40: 00000000 da557000 00000004 df179e84 00000004 000003f5 0000065a 00000000 [ 1384.972245] 9d60: 00000008 c0521c78 df179e84 da557000 00000004 da557204 de0c6800 df179e84 [ 1384.972253] 9d80: da557000 00000004 da557204 c0526b7c 00000004 df724000 df179e84 00000004 [ 1384.972260] 9da0: df179db0 df29a100 c083bc48 c045481c 00000001 00000000 00000000 00000000 [ 1384.972267] 9dc0: 00000000 df29a100 00000000 00000000 00000000 00000000 df179e10 00000000 [ 1384.972274] 9de0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1384.972281] 9e00: 00000000 00000000 00000000 00000000 df179e4c c000ec80 c0b538c0 00000004 [ 1384.972288] 9e20: df724000 df178000 00000000 df179e84 c0b538c0 00000000 df178000 c07f4570 [ 1384.972295] 9e40: dcad9c00 df179e74 c07f4394 df179e60 df178000 00000000 df179e84 de247010 [ 1384.972303] 9e60: 00000043 c0454dec 00000001 00000004 df315c00 c0530598 00000004 df315c0c [ 1384.972310] 9e80: ffffc32c 00000000 00000000 df179ea0 00000001 00000000 00000000 00000000 [ 1384.972317] 9ea0: df179ebc 00000004 df315c00 c05df838 00000000 c0530810 c07d08c0 d7017303 [ 1384.972325] 9ec0: 6ec245b9 00000000 df315c00 c0531b04 c07f3fe0 c07f4018 da67a300 df315c00 [ 1384.972332] 9ee0: 00000000 c05334e0 df315c00 df315b80 df315c00 de0c6800 da67a300 00000000 [ 1384.972339] 9f00: de0c684c c0533674 df204100 df315c00 df315c00 df204100 df315c00 c082b138 [ 1384.972347] 9f20: c053385c c0533754 a0000113 df178000 00000001 c083bc48 00000000 c053385c [ 1384.972354] 9f40: 00000000 00000000 00000000 c05338c4 00000000 df9f0000 df9f5ee4 df179f6c [ 1384.972360] 9f60: df178000 c0049db4 00000000 00000000 c07f3ff8 00000000 00000000 00000000 [ 1384.972368] 9f80: df179f80 df179f80 00000000 00000000 df179f90 df179f90 df9f5ee4 c0049cfc [ 1384.972374] 9fa0: 00000000 00000000 00000000 c000f168 00000000 00000000 00000000 00000000 [ 1384.972381] 9fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1384.972388] 9fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00010000 00000600 [ 1384.972411] [] (l2cap_create_basic_pdu+0x30/0x1ac) from [] (l2cap_chan_send+0x100/0x1d4) [ 1384.972425] [] (l2cap_chan_send+0x100/0x1d4) from [] (l2cap_sock_sendmsg+0xa8/0x104) [ 1384.972440] [] (l2cap_sock_sendmsg+0xa8/0x104) from [] (sock_sendmsg+0xac/0xcc) [ 1384.972453] [] (sock_sendmsg+0xac/0xcc) from [] (kernel_sendmsg+0x2c/0x34) [ 1384.972469] [] (kernel_sendmsg+0x2c/0x34) from [] (rfcomm_send_frame+0x58/0x7c) [ 1384.972481] [] (rfcomm_send_frame+0x58/0x7c) from [] (rfcomm_send_ua+0x98/0xbc) [ 1384.972494] [] (rfcomm_send_ua+0x98/0xbc) from [] (rfcomm_recv_disc+0xac/0x100) [ 1384.972506] [] (rfcomm_recv_disc+0xac/0x100) from [] (rfcomm_recv_frame+0x144/0x264) [ 1384.972519] [] (rfcomm_recv_frame+0x144/0x264) from [] (rfcomm_process_rx+0x74/0xfc) [ 1384.972531] [] (rfcomm_process_rx+0x74/0xfc) from [] (rfcomm_process_sessions+0x58/0x160) [ 1384.972543] [] (rfcomm_process_sessions+0x58/0x160) from [] (rfcomm_run+0x68/0x110) [ 1384.972558] [] (rfcomm_run+0x68/0x110) from [] (kthread+0xb8/0xbc) [ 1384.972576] [] (kthread+0xb8/0xbc) from [] (ret_from_fork+0x14/0x2c) [ 1384.972586] Code: e3100004 e1a07003 e5946000 1a000057 (e5969008) [ 1384.972614] ---[ end trace 6170b7ce00144e8c ]--- Signed-off-by: Seung-Woo Kim Signed-off-by: Johan Hedberg --- net/bluetooth/l2cap_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 63fa11109a1c..11b5d097f602 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2452,6 +2452,9 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, int err; struct sk_buff_head seg_queue; + if (!chan->conn) + return -ENOTCONN; + /* Connectionless channel */ if (chan->chan_type == L2CAP_CHAN_CONN_LESS) { skb = l2cap_create_connless_pdu(chan, msg, len, priority); -- cgit v1.2.3 From 86ca9eac31d0d8c4fe61b5726e6d63197bc435a6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Nov 2013 11:30:39 +0200 Subject: Bluetooth: Fix rejecting SMP security request in slave role The SMP security request is for a slave role device to request the master role device to initiate a pairing request. If we receive this command while we're in the slave role we should reject it appropriately. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index b5562abdd6e0..ccad6c1e3ec3 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -739,6 +739,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); + if (!(conn->hcon->link_mode & HCI_LM_MASTER)) + return SMP_CMD_NOTSUPP; + hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req); if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) -- cgit v1.2.3 From 4819224853dff325f0aabdb3dc527d768fa482e3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 6 Nov 2013 21:48:22 -0800 Subject: netfilter: fix connlimit Kconfig prompt string Under Core Netfilter Configuration, connlimit match support has an extra double quote at the end of it. Fixes a portion of kernel bugzilla #52671: https://bugzilla.kernel.org/show_bug.cgi?id=52671 Signed-off-by: Randy Dunlap Reported-by: lailavrazda1979@gmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 48acec17e27a..c3398cd99b94 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -909,7 +909,7 @@ config NETFILTER_XT_MATCH_CONNLABEL connection simultaneously. config NETFILTER_XT_MATCH_CONNLIMIT - tristate '"connlimit" match support"' + tristate '"connlimit" match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED ---help--- -- cgit v1.2.3 From a6441b7a39f18acb68c83cd738f1310881aa8a0b Mon Sep 17 00:00:00 2001 From: Martin Topholm Date: Thu, 14 Nov 2013 15:35:30 +0100 Subject: netfilter: synproxy: send mss option to backend When the synproxy_parse_options is called on the client ack the mss option will not be present. Consequently mss wont be included in the backend syn packet, which falls back to 536 bytes mss. Therefore XT_SYNPROXY_OPT_MSS is explicitly flagged when recovering mss value from cookie. Signed-off-by: Martin Topholm Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_SYNPROXY.c | 1 + net/ipv6/netfilter/ip6t_SYNPROXY.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 01cffeaa0085..f13bd91d9a56 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -244,6 +244,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, this_cpu_inc(snet->stats->cookie_valid); opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index bf9f612c1bc2..f78f41aca8e9 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -259,6 +259,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, this_cpu_inc(snet->stats->cookie_valid); opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); -- cgit v1.2.3 From c1898c4c295b735c05af4c09664993fd8f257c2b Mon Sep 17 00:00:00 2001 From: Martin Topholm Date: Thu, 14 Nov 2013 15:35:31 +0100 Subject: netfilter: synproxy: correct wscale option passing Timestamp are used to store additional syncookie parameters such as sack, ecn, and wscale. The wscale value we need to encode is the client's wscale, since we can't recover that later in the session. Next overwrite the wscale option so the later synproxy_send_client_synack will send the backend's wscale to the client. Signed-off-by: Martin Topholm Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_synproxy_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index cdf4567ba9b3..9858e3e51a3a 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -151,9 +151,10 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info, opts->tsecr = opts->tsval; opts->tsval = tcp_time_stamp & ~0x3f; - if (opts->options & XT_SYNPROXY_OPT_WSCALE) - opts->tsval |= info->wscale; - else + if (opts->options & XT_SYNPROXY_OPT_WSCALE) { + opts->tsval |= opts->wscale; + opts->wscale = info->wscale; + } else opts->tsval |= 0xf; if (opts->options & XT_SYNPROXY_OPT_SACK_PERM) -- cgit v1.2.3 From 23dfe136e2bf8d9ea1095704c535368a9bc721da Mon Sep 17 00:00:00 2001 From: Phil Oester Date: Sat, 16 Nov 2013 20:37:46 -0800 Subject: netfilter: fix wrong byte order in nf_ct_seqadj_set internal information In commit 41d73ec053d2, sequence number adjustments were moved to a separate file. Unfortunately, the sequence numbers that are stored in the nf_ct_seqadj structure are expressed in host byte order. The necessary ntohl call was removed when the call to adjust_tcp_sequence was collapsed into nf_ct_seqadj_set. This broke the FTP NAT helper. Fix it by adding back the byte order conversions. Reported-by: Dawid Stawiarski Signed-off-by: Phil Oester Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_seqadj.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index 5f9bfd060dea..17c1bcb182c6 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -41,8 +41,8 @@ int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, spin_lock_bh(&ct->lock); this_way = &seqadj->seq[dir]; if (this_way->offset_before == this_way->offset_after || - before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; + before(this_way->correction_pos, ntohl(seq))) { + this_way->correction_pos = ntohl(seq); this_way->offset_before = this_way->offset_after; this_way->offset_after += off; } -- cgit v1.2.3 From 8691a9a3382f17fbf1ed808c956672c70369a2e0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 16 Nov 2013 22:16:47 +0100 Subject: netfilter: nft_compat: fix error path in nft_parse_compat() The patch 0ca743a55991: "netfilter: nf_tables: add compatibility layer for x_tables", leads to the following Smatch warning: "net/netfilter/nft_compat.c:140 nft_parse_compat() warn: signedness bug returning '(-34)'" This nft_parse_compat function returns error codes but the return type is u8 so the error codes are transformed into small positive values. The callers don't check the return. Reported-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index a82667c64729..da0c1f4ada12 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -128,7 +128,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, }; -static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) +static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; u32 flags; @@ -148,7 +148,8 @@ static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + return 0; } static int @@ -166,8 +167,11 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); - if (ctx->nla[NFTA_RULE_COMPAT]) - proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + if (ctx->nla[NFTA_RULE_COMPAT]) { + ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); + if (ret < 0) + goto err; + } nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); @@ -356,8 +360,11 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); - if (ctx->nla[NFTA_RULE_COMPAT]) - proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + if (ctx->nla[NFTA_RULE_COMPAT]) { + ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); + if (ret < 0) + goto err; + } nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); -- cgit v1.2.3 From 0c3c6c00c69649f4749642b3e5d82125fde1600c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 18 Nov 2013 12:53:59 +0100 Subject: netfilter: nf_conntrack: decrement global counter after object release nf_conntrack_free() decrements our counter (net->ct.count) before releasing the conntrack object. That counter is used in the nf_conntrack_cleanup_net_list path to check if it's time to kmem_cache_destroy our cache of conntrack objects. I think we have a race there that should be easier to trigger (although still hard) with CONFIG_DEBUG_OBJECTS_FREE as object releases become slowier according to the following splat: [ 1136.321305] WARNING: CPU: 2 PID: 2483 at lib/debugobjects.c:260 debug_print_object+0x83/0xa0() [ 1136.321311] ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x20 ... [ 1136.321390] Call Trace: [ 1136.321398] [] dump_stack+0x45/0x56 [ 1136.321405] [] warn_slowpath_common+0x78/0xa0 [ 1136.321410] [] warn_slowpath_fmt+0x47/0x50 [ 1136.321414] [] debug_print_object+0x83/0xa0 [ 1136.321420] [] ? execute_in_process_context+0x90/0x90 [ 1136.321424] [] debug_check_no_obj_freed+0x20b/0x250 [ 1136.321429] [] ? kmem_cache_destroy+0x92/0x100 [ 1136.321433] [] kmem_cache_free+0x125/0x210 [ 1136.321436] [] kmem_cache_destroy+0x92/0x100 [ 1136.321443] [] nf_conntrack_cleanup_net_list+0x126/0x160 [nf_conntrack] [ 1136.321449] [] nf_conntrack_pernet_exit+0x6d/0x80 [nf_conntrack] [ 1136.321453] [] ops_exit_list.isra.3+0x53/0x60 [ 1136.321457] [] cleanup_net+0x100/0x1b0 [ 1136.321460] [] process_one_work+0x18e/0x430 [ 1136.321463] [] worker_thread+0x119/0x390 [ 1136.321467] [] ? manage_workers.isra.23+0x2a0/0x2a0 [ 1136.321470] [] kthread+0xbb/0xc0 [ 1136.321472] [] ? kthread_create_on_node+0x110/0x110 [ 1136.321477] [] ret_from_fork+0x7c/0xb0 [ 1136.321479] [] ? kthread_create_on_node+0x110/0x110 [ 1136.321481] ---[ end trace 25f53c192da70825 ]--- Reported-by: Linus Torvalds Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e22d950c60b3..43549eb7a7be 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -764,9 +764,10 @@ void nf_conntrack_free(struct nf_conn *ct) struct net *net = nf_ct_net(ct); nf_ct_ext_destroy(ct); - atomic_dec(&net->ct.count); nf_ct_ext_free(ct); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + smp_mb__before_atomic_dec(); + atomic_dec(&net->ct.count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); -- cgit v1.2.3 From acab78b99633f12aa2b697474562e19c5718a1ca Mon Sep 17 00:00:00 2001 From: Luís Fernando Cornachioni Estrozi Date: Wed, 6 Nov 2013 21:39:32 +0000 Subject: netfilter: ebt_ip6: fix source and destination matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bug was introduced on commit 0898f99a2. This just recovers two checks that existed before as suggested by Bart De Schuymer. Signed-off-by: Luís Fernando Cornachioni Estrozi Signed-off-by: Bart De Schuymer Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_ip6.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 99c85668f551..17fd5f2cb4b8 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -48,10 +48,12 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) if (info->bitmask & EBT_IP6_TCLASS && FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) return false; - if (FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, - &info->saddr), EBT_IP6_SOURCE) || + if ((info->bitmask & EBT_IP6_SOURCE && + FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr), EBT_IP6_SOURCE)) || + (info->bitmask & EBT_IP6_DEST && FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, - &info->daddr), EBT_IP6_DEST)) + &info->daddr), EBT_IP6_DEST))) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; -- cgit v1.2.3 From dcdfdf56b4a6c9437fc37dbc9cee94a788f9b0c4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 19 Nov 2013 19:12:34 -0800 Subject: ipv4: fix race in concurrent ip_route_input_slow() CPUs can ask for local route via ip_route_input_noref() concurrently. if nh_rth_input is not cached yet, CPUs will proceed to allocate equivalent DSTs on 'lo' and then will try to cache them in nh_rth_input via rt_cache_route() Most of the time they succeed, but on occasion the following two lines: orig = *p; prev = cmpxchg(p, orig, rt); in rt_cache_route() do race and one of the cpus fails to complete cmpxchg. But ip_route_input_slow() doesn't check the return code of rt_cache_route(), so dst is leaking. dst_destroy() is never called and 'lo' device refcnt doesn't go to zero, which can be seen in the logs as: unregister_netdevice: waiting for lo to become free. Usage count = 1 Adding mdelay() between above two lines makes it easily reproducible. Fix it similar to nh_pcpu_rth_output case. Fixes: d2d68ba9fe8b ("ipv4: Cache input routes in fib_info nexthops.") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f428935c50db..f8da28278014 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1776,8 +1776,12 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - if (do_cache) - rt_cache_route(&FIB_RES_NH(res), rth); + if (do_cache) { + if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { + rth->dst.flags |= DST_NOCACHE; + rt_add_uncached_list(rth); + } + } skb_dst_set(skb, &rth->dst); err = 0; goto out; -- cgit v1.2.3 From d2615bf450694c1302d86b9cc8a8958edfe4c3a4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 19 Nov 2013 20:47:15 -0500 Subject: net: core: Always propagate flag changes to interfaces The following commit: b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 net: only invoke dev->change_rx_flags when device is UP tried to fix a problem with VLAN devices and promiscuouse flag setting. The issue was that VLAN device was setting a flag on an interface that was down, thus resulting in bad promiscuity count. This commit blocked flag propagation to any device that is currently down. A later commit: deede2fabe24e00bd7e246eb81cd5767dc6fcfc7 vlan: Don't propagate flag changes on down interfaces fixed VLAN code to only propagate flags when the VLAN interface is up, thus fixing the same issue as above, only localized to VLAN. The problem we have now is that if we have create a complex stack involving multiple software devices like bridges, bonds, and vlans, then it is possible that the flags would not propagate properly to the physical devices. A simple examle of the scenario is the following: eth0----> bond0 ----> bridge0 ---> vlan50 If bond0 or eth0 happen to be down at the time bond0 is added to the bridge, then eth0 will never have promisc mode set which is currently required for operation as part of the bridge. As a result, packets with vlan50 will be dropped by the interface. The only 2 devices that implement the special flag handling are VLAN and DSA and they both have required code to prevent incorrect flag propagation. As a result we can remove the generic solution introduced in b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 and leave it to the individual devices to decide whether they will block flag propagation or not. Reported-by: Stefan Priebe Suggested-by: Veaceslav Falico Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7e00a7342ee6..ba3b7ea5ebb3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4996,7 +4996,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; - if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + if (ops->ndo_change_rx_flags) ops->ndo_change_rx_flags(dev, flags); } -- cgit v1.2.3 From f873042093c0b418d2351fe142222b625c740149 Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Sat, 7 Dec 2013 22:12:05 +0800 Subject: bridge: flush br's address entry in fdb when remove the bridge dev When the following commands are executed: brctl addbr br0 ifconfig br0 hw ether rmmod bridge The calltrace will occur: [ 563.312114] device eth1 left promiscuous mode [ 563.312188] br0: port 1(eth1) entered disabled state [ 563.468190] kmem_cache_destroy bridge_fdb_cache: Slab cache still has objects [ 563.468197] CPU: 6 PID: 6982 Comm: rmmod Tainted: G O 3.12.0-0.7-default+ #9 [ 563.468199] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 563.468200] 0000000000000880 ffff88010f111e98 ffffffff814d1c92 ffff88010f111eb8 [ 563.468204] ffffffff81148efd ffff88010f111eb8 0000000000000000 ffff88010f111ec8 [ 563.468206] ffffffffa062a270 ffff88010f111ed8 ffffffffa063ac76 ffff88010f111f78 [ 563.468209] Call Trace: [ 563.468218] [] dump_stack+0x6a/0x78 [ 563.468234] [] kmem_cache_destroy+0xfd/0x100 [ 563.468242] [] br_fdb_fini+0x10/0x20 [bridge] [ 563.468247] [] br_deinit+0x4e/0x50 [bridge] [ 563.468254] [] SyS_delete_module+0x199/0x2b0 [ 563.468259] [] system_call_fastpath+0x16/0x1b [ 570.377958] Bridge firewalling registered --------------------------- cut here ------------------------------- The reason is that when the bridge dev's address is changed, the br_fdb_change_mac_address() will add new address in fdb, but when the bridge was removed, the address entry in the fdb did not free, the bridge_fdb_cache still has objects when destroy the cache, Fix this by flushing the bridge address entry when removing the bridge. v2: according to the Toshiaki Makita and Vlad's suggestion, I only delete the vlan0 entry, it still have a leak here if the vlan id is other number, so I need to call fdb_delete_by_port(br, NULL, 1) to flush all entries whose dst is NULL for the bridge. Suggested-by: Toshiaki Makita Suggested-by: Vlad Yasevich Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- net/bridge/br_if.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 6e6194fcd88e..4bf02adb5dc2 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -172,6 +172,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_fdb_delete_by_port(br, NULL, 1); + br_vlan_flush(br); del_timer_sync(&br->gc_timer); -- cgit v1.2.3 From f3d3342602f8bcbf37d7c46641cb9bca7618eb1c Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 Nov 2013 03:14:22 +0100 Subject: net: rework recvmsg handler msg_name and msg_namelen logic This patch now always passes msg->msg_namelen as 0. recvmsg handlers must set msg_namelen to the proper size <= sizeof(struct sockaddr_storage) to return msg_name to the user. This prevents numerous uninitialized memory leaks we had in the recvmsg handlers and makes it harder for new code to accidentally leak uninitialized memory. Optimize for the case recvfrom is called with NULL as address. We don't need to copy the address at all, so set it to NULL before invoking the recvmsg handler. We can do so, because all the recvmsg handlers must cope with the case a plain read() is called on them. read() also sets msg_name to NULL. Also document these changes in include/linux/net.h as suggested by David Miller. Changes since RFC: Set msg->msg_name = NULL if user specified a NULL in msg_name but had a non-null msg_namelen in verify_iovec/verify_compat_iovec. This doesn't affect sendto as it would bail out earlier while trying to copy-in the address. It also more naturally reflects the logic by the callers of verify_iovec. With this change in place I could remove " if (!uaddr || msg_sys->msg_namelen == 0) msg->msg_name = NULL ". This change does not alter the user visible error logic as we ignore msg_namelen as long as msg_name is NULL. Also remove two unnecessary curly brackets in ___sys_recvmsg and change comments to netdev style. Cc: David Miller Suggested-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- crypto/algif_hash.c | 2 -- crypto/algif_skcipher.c | 1 - drivers/isdn/mISDN/socket.c | 13 ++++--------- drivers/net/ppp/pppoe.c | 2 -- include/linux/net.h | 8 ++++++++ net/appletalk/ddp.c | 16 +++++++--------- net/atm/common.c | 2 -- net/ax25/af_ax25.c | 4 ++-- net/bluetooth/af_bluetooth.c | 9 ++------- net/bluetooth/hci_sock.c | 2 -- net/bluetooth/rfcomm/sock.c | 1 - net/bluetooth/sco.c | 1 - net/caif/caif_socket.c | 4 ---- net/compat.c | 3 ++- net/core/iovec.c | 3 ++- net/ipx/af_ipx.c | 3 +-- net/irda/af_irda.c | 4 ---- net/iucv/af_iucv.c | 2 -- net/key/af_key.c | 1 - net/l2tp/l2tp_ppp.c | 2 -- net/llc/af_llc.c | 2 -- net/netlink/af_netlink.c | 2 -- net/netrom/af_netrom.c | 3 +-- net/nfc/llcp_sock.c | 2 -- net/nfc/rawsock.c | 2 -- net/packet/af_packet.c | 32 +++++++++++++++----------------- net/rds/recv.c | 2 -- net/rose/af_rose.c | 8 +++++--- net/rxrpc/ar-recvmsg.c | 9 ++++++--- net/socket.c | 19 +++++++++++-------- net/tipc/socket.c | 6 ------ net/unix/af_unix.c | 5 ----- net/vmw_vsock/af_vsock.c | 2 -- net/vmw_vsock/vmci_transport.c | 2 -- net/x25/af_x25.c | 3 +-- 35 files changed, 67 insertions(+), 115 deletions(-) (limited to 'net') diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 0262210cad38..ef5356cd280a 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -161,8 +161,6 @@ static int hash_recvmsg(struct kiocb *unused, struct socket *sock, else if (len < ds) msg->msg_flags |= MSG_TRUNC; - msg->msg_namelen = 0; - lock_sock(sk); if (ctx->more) { ctx->more = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index a1c4f0a55583..6a6dfc062d2a 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -432,7 +432,6 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, long copied = 0; lock_sock(sk); - msg->msg_namelen = 0; for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; iovlen--, iov++) { unsigned long seglen = iov->iov_len; diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index e47dcb9d1e91..5cefb479c707 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -117,7 +117,6 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sk_buff *skb; struct sock *sk = sock->sk; - struct sockaddr_mISDN *maddr; int copied, err; @@ -135,9 +134,9 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) { - msg->msg_namelen = sizeof(struct sockaddr_mISDN); - maddr = (struct sockaddr_mISDN *)msg->msg_name; + if (msg->msg_name) { + struct sockaddr_mISDN *maddr = msg->msg_name; + maddr->family = AF_ISDN; maddr->dev = _pms(sk)->dev->id; if ((sk->sk_protocol == ISDN_P_LAPD_TE) || @@ -150,11 +149,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, maddr->sapi = _pms(sk)->ch.addr & 0xFF; maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xFF; } - } else { - if (msg->msg_namelen) - printk(KERN_WARNING "%s: too small namelen %d\n", - __func__, msg->msg_namelen); - msg->msg_namelen = 0; + msg->msg_namelen = sizeof(*maddr); } copied = skb->len + MISDN_HEADER_LEN; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 5f66e30d9823..82ee6ed954cb 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -979,8 +979,6 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, if (error < 0) goto end; - m->msg_namelen = 0; - if (skb) { total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len); diff --git a/include/linux/net.h b/include/linux/net.h index b292a0435571..4bcee94cef93 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -164,6 +164,14 @@ struct proto_ops { #endif int (*sendmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len); + /* Notes for implementing recvmsg: + * =============================== + * msg->msg_namelen should get updated by the recvmsg handlers + * iff msg_name != NULL. It is by default 0 to prevent + * returning uninitialized memory to user space. The recvfrom + * handlers can assume that msg.msg_name is either NULL or has + * a minimum size of sizeof(struct sockaddr_storage). + */ int (*recvmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7fee50d637f9..7d424ac6e760 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1735,7 +1735,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr size_t size, int flags) { struct sock *sk = sock->sk; - struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp; int copied = 0; int offset = 0; @@ -1764,14 +1763,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr } err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); - if (!err) { - if (sat) { - sat->sat_family = AF_APPLETALK; - sat->sat_port = ddp->deh_sport; - sat->sat_addr.s_node = ddp->deh_snode; - sat->sat_addr.s_net = ddp->deh_snet; - } - msg->msg_namelen = sizeof(*sat); + if (!err && msg->msg_name) { + struct sockaddr_at *sat = msg->msg_name; + sat->sat_family = AF_APPLETALK; + sat->sat_port = ddp->deh_sport; + sat->sat_addr.s_node = ddp->deh_snode; + sat->sat_addr.s_net = ddp->deh_snet; + msg->msg_namelen = sizeof(*sat); } skb_free_datagram(sk, skb); /* Free the datagram. */ diff --git a/net/atm/common.c b/net/atm/common.c index 737bef59ce89..7b491006eaf4 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -531,8 +531,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; - msg->msg_namelen = 0; - if (sock->state != SS_CONNECTED) return -ENOTCONN; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a00123ebb0ae..7bb1605bdfd9 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1636,11 +1636,11 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (msg->msg_namelen != 0) { - struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; + if (msg->msg_name) { ax25_digi digi; ax25_address src; const unsigned char *mac = skb_mac_header(skb); + struct sockaddr_ax25 *sax = msg->msg_name; memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f6a1671ea2ff..56ca494621c6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -224,10 +224,9 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { - if (sk->sk_shutdown & RCV_SHUTDOWN) { - msg->msg_namelen = 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; - } + return err; } @@ -245,8 +244,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, &msg->msg_namelen); - else - msg->msg_namelen = 0; } skb_free_datagram(sk, skb); @@ -295,8 +292,6 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & MSG_OOB) return -EOPNOTSUPP; - msg->msg_namelen = 0; - BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 71f0be173080..6a6c8bb4fd72 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -856,8 +856,6 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c4d3d423f89b..c80766f892c3 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -615,7 +615,6 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); - msg->msg_namelen = 0; return 0; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 12a0e51e21e1..24fa3964b3c8 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -711,7 +711,6 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { sco_conn_defer_accept(pi->conn->hcon, pi->setting); sk->sk_state = BT_CONFIG; - msg->msg_namelen = 0; release_sock(sk); return 0; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 05a41c7ec304..d6be3edb7a43 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -286,8 +286,6 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; - m->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; @@ -361,8 +359,6 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - /* * Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg diff --git a/net/compat.c b/net/compat.c index 89032580bd1d..618c6a8a911b 100644 --- a/net/compat.c +++ b/net/compat.c @@ -93,7 +93,8 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - kern_msg->msg_name = kern_address; + if (kern_msg->msg_name) + kern_msg->msg_name = kern_address; } else kern_msg->msg_name = NULL; diff --git a/net/core/iovec.c b/net/core/iovec.c index 4cdb7c48dad6..b61869429f4c 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - m->msg_name = address; + if (m->msg_name) + m->msg_name = address; } else { m->msg_name = NULL; } diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 7a1e0fc1bd4d..e096025b477f 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1823,8 +1823,6 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb->tstamp.tv64) sk->sk_stamp = skb->tstamp; - msg->msg_namelen = sizeof(*sipx); - if (sipx) { sipx->sipx_family = AF_IPX; sipx->sipx_port = ipx->ipx_source.sock; @@ -1832,6 +1830,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net; sipx->sipx_type = ipx->ipx_type; sipx->sipx_zero = 0; + msg->msg_namelen = sizeof(*sipx); } rc = copied; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 0f676908d15b..de7db23049f1 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1385,8 +1385,6 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); - msg->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -1451,8 +1449,6 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); - msg->msg_namelen = 0; - do { int chunk; struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 168aff5e60de..c4b7218058b6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1324,8 +1324,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, int err = 0; u32 offset; - msg->msg_namelen = 0; - if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && diff --git a/net/key/af_key.c b/net/key/af_key.c index 911ef03bf8fb..545f047868ad 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3616,7 +3616,6 @@ static int pfkey_recvmsg(struct kiocb *kiocb, if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; - msg->msg_namelen = 0; skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index ffda81ef1a70..be5fadf34739 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -197,8 +197,6 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state & PPPOX_BOUND) goto end; - msg->msg_namelen = 0; - err = 0; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6cba486353e8..7b01b9f5846c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -720,8 +720,6 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; - msg->msg_namelen = 0; - lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f0176e1a5a81..bca50b95c182 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2335,8 +2335,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } #endif - msg->msg_namelen = 0; - copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 698814bfa7ad..53c19a35fc6d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1179,10 +1179,9 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); + msg->msg_namelen = sizeof(*sax); } - msg->msg_namelen = sizeof(*sax); - skb_free_datagram(sk, skb); release_sock(sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index d308402b67d8..824c6056bf82 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -807,8 +807,6 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index cd958b381f96..66bcd2eb5773 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -244,8 +244,6 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return rc; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2e8286b47c28..61bd50adead1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2660,7 +2660,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; - struct sockaddr_ll *sll; int vnet_hdr_len = 0; err = -EINVAL; @@ -2744,22 +2743,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_free; } - /* - * If the address length field is there to be filled in, we fill - * it in now. - */ - - sll = &PACKET_SKB_CB(skb)->sa.ll; - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); - - /* - * You lose any data beyond the buffer you gave. If it worries a - * user program they can ask the device for its MTU anyway. + /* You lose any data beyond the buffer you gave. If it worries + * a user program they can ask the device for its MTU + * anyway. */ - copied = skb->len; if (copied > len) { copied = len; @@ -2772,9 +2759,20 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); - if (msg->msg_name) + if (msg->msg_name) { + /* If the address length field is there to be filled + * in, we fill it in now. + */ + if (sock->type == SOCK_PACKET) { + msg->msg_namelen = sizeof(struct sockaddr_pkt); + } else { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + + offsetof(struct sockaddr_ll, sll_addr); + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, msg->msg_namelen); + } if (pkt_sk(sk)->auxdata) { struct tpacket_auxdata aux; diff --git a/net/rds/recv.c b/net/rds/recv.c index 9f0f17cf6bf9..de339b24ca14 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -410,8 +410,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); - msg->msg_namelen = 0; - if (msg_flags & MSG_OOB) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index e98fcfbe6007..33af77246bfe 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1216,7 +1216,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name; size_t copied; unsigned char *asmptr; struct sk_buff *skb; @@ -1252,8 +1251,11 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (srose != NULL) { - memset(srose, 0, msg->msg_namelen); + if (msg->msg_name) { + struct sockaddr_rose *srose; + + memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); + srose = msg->msg_name; srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 4b48687c3890..898492a8d61b 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -143,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, /* copy the peer address and timestamp */ if (!continue_call) { - if (msg->msg_name && msg->msg_namelen > 0) + if (msg->msg_name) { + size_t len = + sizeof(call->conn->trans->peer->srx); memcpy(msg->msg_name, - &call->conn->trans->peer->srx, - sizeof(call->conn->trans->peer->srx)); + &call->conn->trans->peer->srx, len); + msg->msg_namelen = len; + } sock_recv_ts_and_drops(msg, &rx->sk, skb); } diff --git a/net/socket.c b/net/socket.c index c226aceee65b..fc285564e49e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1840,8 +1840,10 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iov = &iov; iov.iov_len = size; iov.iov_base = ubuf; - msg.msg_name = (struct sockaddr *)&address; - msg.msg_namelen = sizeof(address); + /* Save some cycles and don't copy the address if not needed */ + msg.msg_name = addr ? (struct sockaddr *)&address : NULL; + /* We assume all kernel code knows the size of sockaddr_storage */ + msg.msg_namelen = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, size, flags); @@ -2221,16 +2223,14 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, goto out; } - /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + /* Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); - if (MSG_CMSG_COMPAT & flags) { + if (MSG_CMSG_COMPAT & flags) err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); - } else + else err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; @@ -2239,6 +2239,9 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + /* We assume all kernel code knows the size of sockaddr_storage */ + msg_sys->msg_namelen = 0; + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3906527259d1..3b61851bb927 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -980,9 +980,6 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1091,9 +1088,6 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c1f403bed683..01625ccc3ae6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1754,7 +1754,6 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_sock *u = unix_sk(sk); - msg->msg_namelen = 0; if (u->addr) { msg->msg_namelen = u->addr->len; memcpy(msg->msg_name, u->addr->name, u->addr->len); @@ -1778,8 +1777,6 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - err = mutex_lock_interruptible(&u->readlock); if (err) { err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); @@ -1924,8 +1921,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); - msg->msg_namelen = 0; - /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 545c08b8a1d4..5adfd94c5b85 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1662,8 +1662,6 @@ vsock_stream_recvmsg(struct kiocb *kiocb, vsk = vsock_sk(sk); err = 0; - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state != SS_CONNECTED) { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 9d6986634e0b..687360da62d9 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1746,8 +1746,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; - msg->msg_namelen = 0; - /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 45a3ab5612c1..7622789d3750 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1340,10 +1340,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, if (sx25) { sx25->sx25_family = AF_X25; sx25->sx25_addr = x25->dest_addr; + msg->msg_namelen = sizeof(*sx25); } - msg->msg_namelen = sizeof(struct sockaddr_x25); - x25_check_rbuf(sk); rc = copied; out_free_dgram: -- cgit v1.2.3 From 68c6beb373955da0886d8f4f5995b3922ceda4be Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 Nov 2013 03:14:34 +0100 Subject: net: add BUG_ON if kernel advertises msg_namelen > sizeof(struct sockaddr_storage) In that case it is probable that kernel code overwrote part of the stack. So we should bail out loudly here. The BUG_ON may be removed in future if we are sure all protocols are conformant. Suggested-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index fc285564e49e..0b18693f2be6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -221,12 +221,13 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, int err; int len; + BUG_ON(klen > sizeof(struct sockaddr_storage)); err = get_user(len, ulen); if (err) return err; if (len > klen) len = klen; - if (len < 0 || len > sizeof(struct sockaddr_storage)) + if (len < 0) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) -- cgit v1.2.3 From aec6f90d412fe30b88fdea3d1880734c837e15d4 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Thu, 21 Nov 2013 04:43:11 +0100 Subject: wimax: remove dead code This removes a code line that is between a "return 0;" and an error label. This code line can never be reached. Found by Coverity (CID: 1130529) Signed-off-by: Michael Opdenacker Acked-by: Johannes Berg Signed-off-by: David S. Miller --- net/wimax/stack.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/wimax/stack.c b/net/wimax/stack.c index ef2191b969a7..ec8b577db135 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -610,7 +610,6 @@ int __init wimax_subsys_init(void) d_fnend(4, NULL, "() = 0\n"); return 0; - genl_unregister_family(&wimax_gnl_family); error_register_family: d_fnend(4, NULL, "() = %d\n", result); return result; -- cgit v1.2.3 From e40526cb20b5ee53419452e1f03d97092f144418 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 21 Nov 2013 16:50:58 +0100 Subject: packet: fix use after free race in send path when dev is released Salam reported a use after free bug in PF_PACKET that occurs when we're sending out frames on a socket bound device and suddenly the net device is being unregistered. It appears that commit 827d9780 introduced a possible race condition between {t,}packet_snd() and packet_notifier(). In the case of a bound socket, packet_notifier() can drop the last reference to the net_device and {t,}packet_snd() might end up suddenly sending a packet over a freed net_device. To avoid reverting 827d9780 and thus introducing a performance regression compared to the current state of things, we decided to hold a cached RCU protected pointer to the net device and maintain it on write side via bind spin_lock protected register_prot_hook() and __unregister_prot_hook() calls. In {t,}packet_snd() path, we access this pointer under rcu_read_lock through packet_cached_dev_get() that holds reference to the device to prevent it from being freed through packet_notifier() while we're in send path. This is okay to do as dev_put()/dev_hold() are per-cpu counters, so this should not be a performance issue. Also, the code simplifies a bit as we don't need need_rls_dev anymore. Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.") Reported-by: Salam Noureddine Signed-off-by: Daniel Borkmann Signed-off-by: Salam Noureddine Cc: Ben Greear Cc: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 59 ++++++++++++++++++++++++++++++-------------------- net/packet/internal.h | 1 + 2 files changed, 37 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 61bd50adead1..ac27c86ef6d1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -244,11 +244,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { - if (po->fanout) + if (po->fanout) { __fanout_link(sk, po); - else + } else { dev_add_pack(&po->prot_hook); + rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); + } + sock_hold(sk); po->running = 1; } @@ -266,10 +270,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) + if (po->fanout) { __fanout_unlink(sk, po); - else + } else { __dev_remove_pack(&po->prot_hook); + RCU_INIT_POINTER(po->cached_dev, NULL); + } + __sock_put(sk); if (sync) { @@ -2052,12 +2059,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -2070,7 +2089,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2084,19 +2103,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2173,8 +2190,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2212,7 +2228,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2228,7 +2243,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2240,19 +2255,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; - if (sock->type == SOCK_RAW) - reserve = dev->hard_header_len; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) + if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2386,15 +2399,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2614,6 +2626,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + RCU_INIT_POINTER(po->cached_dev, NULL); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); diff --git a/net/packet/internal.h b/net/packet/internal.h index c4e4b4561207..1035fa2d909c 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -113,6 +113,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; -- cgit v1.2.3 From 220815a9665f7deca98a09ecca655044f94cfa44 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Nov 2013 18:17:04 +0100 Subject: genetlink: fix genlmsg_multicast() bug Unfortunately, I introduced a tremendously stupid bug into genlmsg_multicast() when doing all those multicast group changes: it adjusts the group number, but then passes it to genlmsg_multicast_netns() which does that again. Somehow, my tests failed to catch this, so add a warning into genlmsg_multicast_netns() and remove the offending group ID adjustment. Also add a warning to the similar code in other functions so people who misuse them are more loudly warned. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/genetlink.h | 5 +---- net/netlink/genetlink.c | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ace4abf118d7..771af09e90eb 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -265,7 +265,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); @@ -283,9 +283,6 @@ static inline int genlmsg_multicast(struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) - return -EINVAL; - group = family->mcgrp_offset + group; return genlmsg_multicast_netns(family, &init_net, skb, portid, group, flags); } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 7dbc4f732c75..4518a57aa5fe 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1045,7 +1045,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return genlmsg_mcast(skb, portid, group, flags); @@ -1062,7 +1062,7 @@ void genl_notify(struct genl_family *family, if (nlh) report = nlmsg_report(nlh); - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return; group = family->mcgrp_offset + group; nlmsg_notify(sk, skb, portid, group, report, flags); -- cgit v1.2.3 From 9d8506cc2d7ea1f911c72c100193a3677f6668c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Nov 2013 11:10:04 -0800 Subject: gso: handle new frag_list of frags GRO packets Recently GRO started generating packets with frag_lists of frags. This was not handled by GSO, thus leading to a crash. Thankfully these packets are of a regular form and are easy to handle. This patch handles them in two ways. For completely non-linear frag_list entries, we simply continue to iterate over the frag_list frags once we exhaust the normal frags. For frag_list entries with linear parts, we call pskb_trim on the first part of the frag_list skb, and then process the rest of the frags in the usual way. This patch also kills a chunk of dead frag_list code that has obviously never ever been run since it ends up generating a bogus GSO-segmented packet with a frag_list entry. Future work is planned to split super big packets into TSO ones. Fixes: 8a29111c7ca6 ("net: gro: allow to build full sized skb") Reported-by: Christoph Paasch Reported-by: Jerry Chu Reported-by: Sander Eikelenboom Signed-off-by: Herbert Xu Signed-off-by: Eric Dumazet Tested-by: Sander Eikelenboom Tested-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 75 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8cec1e6b844d..2718fed53d8c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2796,6 +2796,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *fskb = skb_shinfo(skb)->frag_list; + skb_frag_t *skb_frag = skb_shinfo(skb)->frags; unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; @@ -2835,16 +2836,38 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (hsize > len || !sg) hsize = len; - if (!hsize && i >= nfrags) { - BUG_ON(fskb->len != len); + if (!hsize && i >= nfrags && skb_headlen(fskb) && + (skb_headlen(fskb) == len || sg)) { + BUG_ON(skb_headlen(fskb) > len); + + i = 0; + nfrags = skb_shinfo(fskb)->nr_frags; + skb_frag = skb_shinfo(fskb)->frags; + pos += skb_headlen(fskb); + + while (pos < offset + len) { + BUG_ON(i >= nfrags); + + size = skb_frag_size(skb_frag); + if (pos + size > offset + len) + break; + + i++; + pos += size; + skb_frag++; + } - pos += len; nskb = skb_clone(fskb, GFP_ATOMIC); fskb = fskb->next; if (unlikely(!nskb)) goto err; + if (unlikely(pskb_trim(nskb, len))) { + kfree_skb(nskb); + goto err; + } + hsize = skb_end_offset(nskb); if (skb_cow_head(nskb, doffset + headroom)) { kfree_skb(nskb); @@ -2881,7 +2904,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) nskb->data - tnl_hlen, doffset + tnl_hlen); - if (fskb != skb_shinfo(skb)->frag_list) + if (nskb->len == len + doffset) goto perform_csum_check; if (!sg) { @@ -2899,8 +2922,28 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; - while (pos < offset + len && i < nfrags) { - *frag = skb_shinfo(skb)->frags[i]; + while (pos < offset + len) { + if (i >= nfrags) { + BUG_ON(skb_headlen(fskb)); + + i = 0; + nfrags = skb_shinfo(fskb)->nr_frags; + skb_frag = skb_shinfo(fskb)->frags; + + BUG_ON(!nfrags); + + fskb = fskb->next; + } + + if (unlikely(skb_shinfo(nskb)->nr_frags >= + MAX_SKB_FRAGS)) { + net_warn_ratelimited( + "skb_segment: too many frags: %u %u\n", + pos, mss); + goto err; + } + + *frag = *skb_frag; __skb_frag_ref(frag); size = skb_frag_size(frag); @@ -2913,6 +2956,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (pos + size <= offset + len) { i++; + skb_frag++; pos += size; } else { skb_frag_size_sub(frag, pos + size - (offset + len)); @@ -2922,25 +2966,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) frag++; } - if (pos < offset + len) { - struct sk_buff *fskb2 = fskb; - - BUG_ON(pos + fskb->len != offset + len); - - pos += fskb->len; - fskb = fskb->next; - - if (fskb2->next) { - fskb2 = skb_clone(fskb2, GFP_ATOMIC); - if (!fskb2) - goto err; - } else - skb_get(fskb2); - - SKB_FRAG_ASSERT(nskb); - skb_shinfo(nskb)->frag_list = fskb2; - } - skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; -- cgit v1.2.3