From 37fdffb217a45609edccbb8b407d031143f551c0 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 21 Aug 2018 14:41:41 +0300 Subject: net/mlx5: WQ, fixes for fragmented WQ buffers API mlx5e netdevice used to calculate fragment edges by a call to mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct indication for queues smaller than a PAGE_SIZE, (broken by default on PowerPC, where PAGE_SIZE == 64KB). Here it is replaced by the correct new calls/API. Since (TX/RX) Work Queues buffers are fragmented, here we introduce changes to the API in core driver, so that it gets a stride index and returns the index of last stride on same fragment, and an additional wrapping function that returns the number of physically contiguous strides that can be written contiguously to the work queue. This obsoletes the following API functions, and their buggy usage in EN driver: * mlx5_wq_cyc_get_frag_size() * mlx5_wq_cyc_ctr2fragix() The new API improves modularity and hides the details of such calculation for mlx5e netdevice and mlx5_ib rdma drivers. New calculation is also more efficient, and improves performance as follows: Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size. Before: 16,477,619 pps After: 17,085,793 pps 3.7% improvement Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 66d94b4557cf..88a041b73abf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1032,6 +1032,14 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, ((fbc->frag_sz_m1 & ix) << fbc->log_stride); } +static inline u32 +mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix) +{ + u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1; + + return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1); +} + int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); -- cgit v1.2.3 From ca2b497253ad01c80061a1f3ee9eb91b5d54a849 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Oct 2018 13:24:36 +0100 Subject: arm64: perf: Reject stand-alone CHAIN events for PMUv3 It doesn't make sense for a perf event to be configured as a CHAIN event in isolation, so extend the arm_pmu structure with a ->filter_match() function to allow the backend PMU implementation to reject CHAIN events early. Cc: Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 7 +++++++ drivers/perf/arm_pmu.c | 8 +++++++- include/linux/perf/arm_pmu.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8e38d5267f22..e213f8e867f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 7f01f6f60b87..d0b7dd8fb184 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -485,7 +485,13 @@ static int armpmu_filter_match(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(cpu, &armpmu->supported_cpus); + int ret; + + ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus); + if (ret && armpmu->filter_match) + return armpmu->filter_match(event); + + return ret; } static ssize_t armpmu_cpumask_show(struct device *dev, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 10f92e1d8e7b..bf309ff6f244 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -99,6 +99,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + int (*filter_match)(struct perf_event *event); int num_events; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 -- cgit v1.2.3 From f547fac624be53ad8b07e9ebca7654a7827ba61b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 12 Oct 2018 16:22:47 +0200 Subject: ipv6: rate-limit probes for neighbourless routes When commit 270972554c91 ("[IPV6]: ROUTE: Add Router Reachability Probing (RFC4191).") introduced router probing, the rt6_probe() function required that a neighbour entry existed. This neighbour entry is used to record the timestamp of the last probe via the ->updated field. Later, commit 2152caea7196 ("ipv6: Do not depend on rt->n in rt6_probe().") removed the requirement for a neighbour entry. Neighbourless routes skip the interval check and are not rate-limited. This patch adds rate-limiting for neighbourless routes, by recording the timestamp of the last probe in the fib6_info itself. Fixes: 2152caea7196 ("ipv6: Do not depend on rt->n in rt6_probe().") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 ++++ net/ipv6/route.c | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3d4930528db0..2d31e22babd8 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -159,6 +159,10 @@ struct fib6_info { struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; +#ifdef CONFIG_IPV6_ROUTER_PREF + unsigned long last_probe; +#endif + u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a366c05a239d..abcb5ae77319 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -520,10 +520,11 @@ static void rt6_probe_deferred(struct work_struct *w) static void rt6_probe(struct fib6_info *rt) { - struct __rt6_probe_work *work; + struct __rt6_probe_work *work = NULL; const struct in6_addr *nh_gw; struct neighbour *neigh; struct net_device *dev; + struct inet6_dev *idev; /* * Okay, this does not seem to be appropriate @@ -539,15 +540,12 @@ static void rt6_probe(struct fib6_info *rt) nh_gw = &rt->fib6_nh.nh_gw; dev = rt->fib6_nh.nh_dev; rcu_read_lock_bh(); + idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { - struct inet6_dev *idev; - if (neigh->nud_state & NUD_VALID) goto out; - idev = __in6_dev_get(dev); - work = NULL; write_lock(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, @@ -557,11 +555,13 @@ static void rt6_probe(struct fib6_info *rt) __neigh_set_probe_once(neigh); } write_unlock(&neigh->lock); - } else { + } else if (time_after(jiffies, rt->last_probe + + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } if (work) { + rt->last_probe = jiffies; INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; dev_hold(dev); -- cgit v1.2.3 From d805397c3822d57ca3884d4bea37b2291fc40992 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 15 Oct 2018 19:58:29 +0800 Subject: sctp: use the pmtu from the icmp packet to update transport pathmtu Other than asoc pmtu sync from all transports, sctp_assoc_sync_pmtu is also processing transport pmtu_pending by icmp packets. But it's meaningless to use sctp_dst_mtu(t->dst) as new pmtu for a transport. The right pmtu value should come from the icmp packet, and it would be saved into transport->mtu_info in this patch and used later when the pmtu sync happens in sctp_sendmsg_to_asoc or sctp_packet_config. Besides, without this patch, as pmtu can only be updated correctly when receiving a icmp packet and no place is holding sock lock, it will take long time if the sock is busy with sending packets. Note that it doesn't process transport->mtu_info in .release_cb(), as there is no enough information for pmtu update, like for which asoc or transport. It is not worth traversing all asocs to check pmtu_pending. So unlike tcp, sctp does this in tx path, for which mtu_info needs to be atomic_t. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 ++ net/sctp/associola.c | 3 ++- net/sctp/input.c | 1 + net/sctp/output.c | 6 ++++++ 4 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 28a7c8e44636..a11f93790476 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -876,6 +876,8 @@ struct sctp_transport { unsigned long sackdelay; __u32 sackfreq; + atomic_t mtu_info; + /* When was the last time that we heard from this transport? We use * this to pick new active and retran paths. */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 297d9cf960b9..a827a1f562bf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1450,7 +1450,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) /* Get the lowest pmtu of all the transports. */ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(t, sctp_dst_mtu(t->dst)); + sctp_transport_update_pmtu(t, + atomic_read(&t->mtu_info)); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index 9bbc5f92c941..5c36a99882ed 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -395,6 +395,7 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, return; if (sock_owned_by_user(sk)) { + atomic_set(&t->mtu_info, pmtu); asoc->pmtu_pending = 1; t->pmtu_pending = 1; return; diff --git a/net/sctp/output.c b/net/sctp/output.c index 7f849b01ec8e..67939ad99c01 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -120,6 +120,12 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, sctp_assoc_sync_pmtu(asoc); } + if (asoc->pmtu_pending) { + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); + asoc->pmtu_pending = 0; + } + /* If there a is a prepend chunk stick it on the list before * any other chunks get appended. */ -- cgit v1.2.3 From 9068e02f58740778d8270840657f1e250a2cc60f Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Fri, 5 Oct 2018 14:52:15 -0700 Subject: drm/edid: VSDB yCBCr420 Deep Color mode bit definitions HDMI Forum VSDB YCBCR420 deep color capability bits are 2:0. Correct definitions in the header for the mask to work correctly. Fixes: e6a9a2c3dc43 ("drm/edid: parse ycbcr 420 deep color information") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107893 Cc: # v4.14+ Signed-off-by: Clint Taylor Reviewed-by: Jani Nikula Reviewed-by: Shashank Sharma Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1538776335-12569-1-git-send-email-clinton.a.taylor@intel.com --- drivers/gpu/drm/drm_edid.c | 2 +- include/drm/drm_edid.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 1e2b9407c8d0..ff0bfc65a8c1 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4282,7 +4282,7 @@ static void drm_parse_ycbcr420_deep_color_info(struct drm_connector *connector, struct drm_hdmi_info *hdmi = &connector->display_info.hdmi; dc_mask = db[7] & DRM_EDID_YCBCR420_DC_MASK; - hdmi->y420_dc_modes |= dc_mask; + hdmi->y420_dc_modes = dc_mask; } static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector, diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index b25d12ef120a..e3c404833115 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -214,9 +214,9 @@ struct detailed_timing { #define DRM_EDID_HDMI_DC_Y444 (1 << 3) /* YCBCR 420 deep color modes */ -#define DRM_EDID_YCBCR420_DC_48 (1 << 6) -#define DRM_EDID_YCBCR420_DC_36 (1 << 5) -#define DRM_EDID_YCBCR420_DC_30 (1 << 4) +#define DRM_EDID_YCBCR420_DC_48 (1 << 2) +#define DRM_EDID_YCBCR420_DC_36 (1 << 1) +#define DRM_EDID_YCBCR420_DC_30 (1 << 0) #define DRM_EDID_YCBCR420_DC_MASK (DRM_EDID_YCBCR420_DC_48 | \ DRM_EDID_YCBCR420_DC_36 | \ DRM_EDID_YCBCR420_DC_30) -- cgit v1.2.3 From 0ac1077e3a549bf8d35971613e2be05bdbb41a00 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 16 Oct 2018 15:52:02 +0800 Subject: sctp: get pr_assoc and pr_stream all status with SCTP_PR_SCTP_ALL instead According to rfc7496 section 4.3 or 4.4: sprstat_policy: This parameter indicates for which PR-SCTP policy the user wants the information. It is an error to use SCTP_PR_SCTP_NONE in sprstat_policy. If SCTP_PR_SCTP_ALL is used, the counters provided are aggregated over all supported policies. We change to dump pr_assoc and pr_stream all status by SCTP_PR_SCTP_ALL instead, and return error for SCTP_PR_SCTP_NONE, as it also said "It is an error to use SCTP_PR_SCTP_NONE in sprstat_policy. " Fixes: 826d253d57b1 ("sctp: add SCTP_PR_ASSOC_STATUS on sctp sockopt") Fixes: d229d48d183f ("sctp: add SCTP_PR_STREAM_STATUS sockopt for prsctp") Reported-by: Ying Xu Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 1 + net/sctp/socket.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index b479db5c71d9..34dd3d497f2c 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -301,6 +301,7 @@ enum sctp_sinfo_flags { SCTP_SACK_IMMEDIATELY = (1 << 3), /* SACK should be sent without delay. */ /* 2 bits here have been used by SCTP_PR_SCTP_MASK */ SCTP_SENDALL = (1 << 6), + SCTP_PR_SCTP_ALL = (1 << 7), SCTP_NOTIFICATION = MSG_NOTIFICATION, /* Next message is not user msg but notification. */ SCTP_EOF = MSG_FIN, /* Initiate graceful shutdown process. */ }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f73e9d38d5ba..e25a20fc629a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7100,14 +7100,14 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len, } policy = params.sprstat_policy; - if (policy & ~SCTP_PR_SCTP_MASK) + if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); if (!asoc) goto out; - if (policy == SCTP_PR_SCTP_NONE) { + if (policy & SCTP_PR_SCTP_ALL) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { @@ -7159,7 +7159,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, } policy = params.sprstat_policy; - if (policy & ~SCTP_PR_SCTP_MASK) + if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); @@ -7175,7 +7175,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, goto out; } - if (policy == SCTP_PR_SCTP_NONE) { + if (policy == SCTP_PR_SCTP_ALL) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { -- cgit v1.2.3 From 9c0be3f6b5d776dfe3ed249862c244a4486414dc Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 13 Oct 2018 15:10:50 -0400 Subject: tracepoint: Fix tracepoint array element size mismatch commit 46e0c9be206f ("kernel: tracepoints: add support for relative references") changes the layout of the __tracepoint_ptrs section on architectures supporting relative references. However, it does so without turning struct tracepoint * const into const int elsewhere in the tracepoint code, which has the following side-effect: Setting mod->num_tracepoints is done in by module.c: mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", sizeof(*mod->tracepoints_ptrs), &mod->num_tracepoints); Basically, since sizeof(*mod->tracepoints_ptrs) is a pointer size (rather than sizeof(int)), num_tracepoints is erroneously set to half the size it should be on 64-bit arch. So a module with an odd number of tracepoints misses the last tracepoint due to effect of integer division. So in the module going notifier: for_each_tracepoint_range(mod->tracepoints_ptrs, mod->tracepoints_ptrs + mod->num_tracepoints, tp_module_going_check_quiescent, NULL); the expression (mod->tracepoints_ptrs + mod->num_tracepoints) actually evaluates to something within the bounds of the array, but miss the last tracepoint if the number of tracepoints is odd on 64-bit arch. Fix this by introducing a new typedef: tracepoint_ptr_t, which is either "const int" on architectures that have PREL32 relocations, or "struct tracepoint * const" on architectures that does not have this feature. Also provide a new tracepoint_ptr_defer() static inline to encapsulate deferencing this type rather than duplicate code and ugly idefs within the for_each_tracepoint_range() implementation. This issue appears in 4.19-rc kernels, and should ideally be fixed before the end of the rc cycle. Acked-by: Ard Biesheuvel Acked-by: Jessica Yu Link: http://lkml.kernel.org/r/20181013191050.22389-1-mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org Cc: Michael Ellerman Cc: Ingo Molnar Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: James Morris Cc: James Morris Cc: Josh Poimboeuf Cc: Kees Cook Cc: Nicolas Pitre Cc: Paul Mackerras Cc: Petr Mladek Cc: Russell King Cc: "Serge E. Hallyn" Cc: Sergey Senozhatsky Cc: Thomas Garnier Cc: Thomas Gleixner Cc: Will Deacon Cc: Andrew Morton Cc: Linus Torvalds Cc: Greg Kroah-Hartman Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (VMware) --- include/linux/module.h | 3 ++- include/linux/tracepoint-defs.h | 6 ++++++ include/linux/tracepoint.h | 36 +++++++++++++++++++++++------------- kernel/tracepoint.c | 24 ++++++++---------------- 4 files changed, 39 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index f807f15bebbe..e19ae08c7fb8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -430,7 +431,7 @@ struct module { #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; - struct tracepoint * const *tracepoints_ptrs; + tracepoint_ptr_t *tracepoints_ptrs; #endif #ifdef HAVE_JUMP_LABEL struct jump_entry *jump_entries; diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 22c5a46e9693..49ba9cde7e4b 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -35,6 +35,12 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +typedef const int tracepoint_ptr_t; +#else +typedef struct tracepoint * const tracepoint_ptr_t; +#endif + struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 041f7e56a289..538ba1a58f5b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -99,6 +99,29 @@ extern void syscall_unregfunc(void); #define TRACE_DEFINE_ENUM(x) #define TRACE_DEFINE_SIZEOF(x) +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) +{ + return offset_to_ptr(p); +} + +#define __TRACEPOINT_ENTRY(name) \ + asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ + " .balign 4 \n" \ + " .long __tracepoint_" #name " - . \n" \ + " .previous \n") +#else +static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) +{ + return *p; +} + +#define __TRACEPOINT_ENTRY(name) \ + static tracepoint_ptr_t __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ + &__tracepoint_##name +#endif + #endif /* _LINUX_TRACEPOINT_H */ /* @@ -253,19 +276,6 @@ extern void syscall_unregfunc(void); return static_key_false(&__tracepoint_##name.key); \ } -#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS -#define __TRACEPOINT_ENTRY(name) \ - asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ - " .balign 4 \n" \ - " .long __tracepoint_" #name " - . \n" \ - " .previous \n") -#else -#define __TRACEPOINT_ENTRY(name) \ - static struct tracepoint * const __tracepoint_ptr_##name __used \ - __attribute__((section("__tracepoints_ptrs"))) = \ - &__tracepoint_##name -#endif - /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index bf2c06ef9afc..a3be42304485 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -28,8 +28,8 @@ #include #include -extern struct tracepoint * const __start___tracepoints_ptrs[]; -extern struct tracepoint * const __stop___tracepoints_ptrs[]; +extern tracepoint_ptr_t __start___tracepoints_ptrs[]; +extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; DEFINE_SRCU(tracepoint_srcu); EXPORT_SYMBOL_GPL(tracepoint_srcu); @@ -371,25 +371,17 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); -static void for_each_tracepoint_range(struct tracepoint * const *begin, - struct tracepoint * const *end, +static void for_each_tracepoint_range( + tracepoint_ptr_t *begin, tracepoint_ptr_t *end, void (*fct)(struct tracepoint *tp, void *priv), void *priv) { + tracepoint_ptr_t *iter; + if (!begin) return; - - if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) { - const int *iter; - - for (iter = (const int *)begin; iter < (const int *)end; iter++) - fct(offset_to_ptr(iter), priv); - } else { - struct tracepoint * const *iter; - - for (iter = begin; iter < end; iter++) - fct(*iter, priv); - } + for (iter = begin; iter < end; iter++) + fct(tracepoint_ptr_deref(iter), priv); } #ifdef CONFIG_MODULES -- cgit v1.2.3 From 6b4f92af3d59e882d3ba04c44a815266890d188f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 12 Oct 2018 23:53:59 +0200 Subject: geneve, vxlan: Don't set exceptions if skb->len < mtu We shouldn't abuse exceptions: if the destination MTU is already higher than what we're transmitting, no exception should be created. Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path") Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path") Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/geneve.c | 7 +++---- drivers/net/vxlan.c | 4 ++-- include/net/dst.h | 10 ++++++++++ 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 61c4bfbeb41c..493cd382b8aa 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -830,8 +830,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(rt)) return PTR_ERR(rt); - skb_dst_update_pmtu(skb, dst_mtu(&rt->dst) - - GENEVE_IPV4_HLEN - info->options_len); + skb_tunnel_check_pmtu(skb, &rt->dst, + GENEVE_IPV4_HLEN + info->options_len); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { @@ -872,8 +872,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(dst)) return PTR_ERR(dst); - skb_dst_update_pmtu(skb, dst_mtu(dst) - - GENEVE_IPV6_HLEN - info->options_len); + skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 22e0ce592e07..27bd586b94b0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2194,7 +2194,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ndst = &rt->dst; - skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN_HEADROOM); + skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM); tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); @@ -2231,7 +2231,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } - skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN6_HEADROOM); + skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM); tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); diff --git a/include/net/dst.h b/include/net/dst.h index 7f735e76ca73..6cf0870414c7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -527,4 +527,14 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu); } +static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, + struct dst_entry *encap_dst, + int headroom) +{ + u32 encap_mtu = dst_mtu(encap_dst); + + if (skb->len > encap_mtu - headroom) + skb_dst_update_pmtu(skb, encap_mtu - headroom); +} + #endif /* _NET_DST_H */ -- cgit v1.2.3 From 5660b9d9d6a29c2c3cc12f62ae44bfb56b0a15a9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 17 Oct 2018 21:11:27 +0800 Subject: sctp: fix the data size calculation in sctp_data_size sctp data size should be calculated by subtracting data chunk header's length from chunk_hdr->length, not just data header. Fixes: 668c9beb9020 ("sctp: implement assign_number for sctp_stream_interleave") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 5ef1bad81ef5..9e3d32746430 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sctp_datahdr_len(&chunk->asoc->stream); + size -= sctp_datachk_len(&chunk->asoc->stream); return size; } -- cgit v1.2.3 From eb66ae030829605d61fbef1909ce310e29f78821 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 12 Oct 2018 15:22:59 -0700 Subject: mremap: properly flush TLB before releasing the page Jann Horn points out that our TLB flushing was subtly wrong for the mremap() case. What makes mremap() special is that we don't follow the usual "add page to list of pages to be freed, then flush tlb, and then free pages". No, mremap() obviously just _moves_ the page from one page table location to another. That matters, because mremap() thus doesn't directly control the lifetime of the moved page with a freelist: instead, the lifetime of the page is controlled by the page table locking, that serializes access to the entry. As a result, we need to flush the TLB not just before releasing the lock for the source location (to avoid any concurrent accesses to the entry), but also before we release the destination page table lock (to avoid the TLB being flushed after somebody else has already done something to that page). This also makes the whole "need_flush" logic unnecessary, since we now always end up flushing the TLB for every valid entry. Reported-and-tested-by: Jann Horn Acked-by: Will Deacon Tested-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/huge_mm.h | 2 +- mm/huge_memory.c | 10 ++++------ mm/mremap.c | 30 +++++++++++++----------------- 3 files changed, 18 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 99c19b06d9a4..fdcb45999b26 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -43,7 +43,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush); + pmd_t *old_pmd, pmd_t *new_pmd); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 58269f8ba7c4..deed97fba979 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1780,7 +1780,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd) bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) + pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; pmd_t pmd; @@ -1811,7 +1811,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); - if (pmd_present(pmd) && pmd_dirty(pmd)) + if (pmd_present(pmd)) force_flush = true; VM_BUG_ON(!pmd_none(*new_pmd)); @@ -1822,12 +1822,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, } pmd = move_soft_dirty_pmd(pmd); set_pmd_at(mm, new_addr, new_pmd, pmd); - if (new_ptl != old_ptl) - spin_unlock(new_ptl); if (force_flush) flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); - else - *need_flush = true; + if (new_ptl != old_ptl) + spin_unlock(new_ptl); spin_unlock(old_ptl); return true; } diff --git a/mm/mremap.c b/mm/mremap.c index 5c2e18505f75..a9617e72e6b7 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks, bool *need_flush) + unsigned long new_addr, bool need_rmap_locks) { struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; @@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte = ptep_get_and_clear(mm, old_addr, old_pte); /* - * If we are remapping a dirty PTE, make sure + * If we are remapping a valid PTE, make sure * to flush TLB before we drop the PTL for the - * old PTE or we may race with page_mkclean(). + * PTE. * - * This check has to be done after we removed the - * old PTE from page tables or another thread may - * dirty it after the check and before the removal. + * NOTE! Both old and new PTL matter: the old one + * for racing with page_mkclean(), the new one to + * make sure the physical page stays valid until + * the TLB entry for the old mapping has been + * flushed. */ - if (pte_present(pte) && pte_dirty(pte)) + if (pte_present(pte)) force_flush = true; pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); @@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, } arch_leave_lazy_mmu_mode(); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); - if (force_flush) - flush_tlb_range(vma, old_end - len, old_end); - else - *need_flush = true; pte_unmap_unlock(old_pte - 1, old_ptl); if (need_rmap_locks) drop_rmap_locks(vma); @@ -198,7 +198,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; - bool need_flush = false; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -229,8 +228,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (need_rmap_locks) take_rmap_locks(vma); moved = move_huge_pmd(vma, old_addr, new_addr, - old_end, old_pmd, new_pmd, - &need_flush); + old_end, old_pmd, new_pmd); if (need_rmap_locks) drop_rmap_locks(vma); if (moved) @@ -246,10 +244,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (extent > next - new_addr) extent = next - new_addr; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, - new_pmd, new_addr, need_rmap_locks, &need_flush); + new_pmd, new_addr, need_rmap_locks); } - if (need_flush) - flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); -- cgit v1.2.3