diff options
| author | David S. Miller <davem@davemloft.net> | 2016-09-30 01:50:57 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2016-09-30 01:50:57 -0400 |
| commit | bcdc6efabda3ba6a67f4cb8915873e7d6759b7e6 (patch) | |
| tree | d977daa27ad975a6acbc313ec792592f4cf50847 /net/ipv6 | |
| parent | fa1403548daf3a2c8c988f89db1053df70200405 (diff) | |
| parent | 6d4a741cbbfa6612a479656654ca5edf7becc72c (diff) | |
| download | linux-bcdc6efabda3ba6a67f4cb8915873e7d6759b7e6.tar.bz2 | |
Merge branch 'net_proc_perf'
Jia He says:
====================
Reduce cache miss for snmp_fold_field
In a PowerPc server with large cpu number(160), besides commit
a3a773726c9f ("net: Optimize snmp stat aggregation by walking all
the percpu data at once"), I watched several other snmp_fold_field
callsites which would cause high cache miss rate.
test source code:
================
My simple test case, which read from the procfs items endlessly:
/***********************************************************/
int main(int argc, char **argv)
{
int i;
int fd = -1 ;
int rdsize = 0;
char buf[LINELEN+1];
buf[LINELEN] = 0;
memset(buf,0,LINELEN);
if(1 >= argc) {
printf("file name empty\n");
return -1;
}
fd = open(argv[1], O_RDWR, 0644);
if(0 > fd){
printf("open error\n");
return -2;
}
for(i=0;i<0xffffffff;i++) {
while(0 < (rdsize = read(fd,buf,LINELEN))){
//nothing here
}
lseek(fd, 0, SEEK_SET);
}
close(fd);
return 0;
}
/**********************************************************/
compile and run:
================
gcc test.c -o test
perf stat -d -e cache-misses ./test /proc/net/snmp
perf stat -d -e cache-misses ./test /proc/net/snmp6
perf stat -d -e cache-misses ./test /proc/net/sctp/snmp
perf stat -d -e cache-misses ./test /proc/net/xfrm_stat
before the patch set:
====================
Performance counter stats for 'system wide':
355911097 cache-misses [40.08%]
2356829300 L1-dcache-loads [60.04%]
355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%]
346544541 LLC-loads [59.97%]
389763 LLC-load-misses # 0.11% of all LL-cache hits [40.02%]
6.245162638 seconds time elapsed
After the patch set:
===================
Performance counter stats for 'system wide':
194992476 cache-misses [40.03%]
6718051877 L1-dcache-loads [60.07%]
194871921 L1-dcache-load-misses # 2.90% of all L1-dcache hits [60.11%]
187632232 LLC-loads [60.04%]
464466 LLC-load-misses # 0.25% of all LL-cache hits [39.89%]
6.868422769 seconds time elapsed
The cache-miss rate can be reduced from 15% to 2.9%
changelog
=========
v6:
- correct v5
v5:
- order local variables from longest to shortest line
v4:
- move memset into one block of if statement in snmp6_seq_show_item
- remove the changes in netstat_seq_show considerred the stack usage is too large
v3:
- introduce generic interface (suggested by Marcelo Ricardo Leitner)
- use max_t instead of self defined macro (suggested by David Miller)
v2:
- fix bug in udplite statistics.
- snmp_seq_show is split into 2 parts
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
| -rw-r--r-- | net/ipv6/addrconf.c | 12 | ||||
| -rw-r--r-- | net/ipv6/proc.c | 30 |
2 files changed, 28 insertions, 14 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f1f5d439788..35d4baa55c9d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4961,18 +4961,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, &stats[0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, &stats[0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read(&mib[i]), &stats[i]); - memset(&stats[items], 0, pad); + memset(&stats[ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -5005,7 +5005,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0af84..cc8e3ae9ca73 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,11 @@ #include <net/transp_v6.h> #include <net/ipv6.h> +#define MAX4(a, b, c, d) \ + max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { + unsigned long buff[SNMP_MIB_MAX]; int i; - unsigned long val; - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + if (pcpumib) { + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, itemlist, pcpumib); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { + u64 buff64[SNMP_MIB_MAX]; int i; + memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) |