summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/hv/hv_kvp_daemon.c40
-rw-r--r--tools/power/cpupower/.gitignore7
-rw-r--r--tools/power/cpupower/Makefile3
-rw-r--r--tools/power/cpupower/debug/i386/Makefile5
-rw-r--r--tools/power/cpupower/man/cpupower-monitor.115
-rw-r--r--tools/power/cpupower/utils/helpers/cpuid.c2
-rw-r--r--tools/power/cpupower/utils/helpers/helpers.h18
-rw-r--r--tools/power/cpupower/utils/helpers/sysfs.c19
-rw-r--r--tools/power/cpupower/utils/helpers/topology.c53
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c21
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h17
-rw-r--r--tools/power/cpupower/utils/idle_monitor/snb_idle.c10
-rw-r--r--tools/testing/selftests/vm/Makefile4
-rw-r--r--tools/testing/selftests/vm/thuge-gen.c254
14 files changed, 393 insertions, 75 deletions
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 5959affd8820..d25a46925e61 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -43,6 +43,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
+#include <net/if.h>
/*
* KVP protocol: The user mode component first registers with the
@@ -88,6 +89,7 @@ static char *os_major = "";
static char *os_minor = "";
static char *processor_arch;
static char *os_build;
+static char *os_version;
static char *lic_version = "Unknown version";
static struct utsname uts_buf;
@@ -297,7 +299,7 @@ static int kvp_file_init(void)
return 0;
}
-static int kvp_key_delete(int pool, __u8 *key, int key_size)
+static int kvp_key_delete(int pool, const char *key, int key_size)
{
int i;
int j, k;
@@ -340,7 +342,7 @@ static int kvp_key_delete(int pool, __u8 *key, int key_size)
return 1;
}
-static int kvp_key_add_or_modify(int pool, __u8 *key, int key_size, __u8 *value,
+static int kvp_key_add_or_modify(int pool, const char *key, int key_size, const char *value,
int value_size)
{
int i;
@@ -394,7 +396,7 @@ static int kvp_key_add_or_modify(int pool, __u8 *key, int key_size, __u8 *value,
return 0;
}
-static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value,
+static int kvp_get_value(int pool, const char *key, int key_size, char *value,
int value_size)
{
int i;
@@ -426,8 +428,8 @@ static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value,
return 1;
}
-static int kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size,
- __u8 *value, int value_size)
+static int kvp_pool_enumerate(int pool, int index, char *key, int key_size,
+ char *value, int value_size)
{
struct kvp_record *record;
@@ -453,7 +455,9 @@ void kvp_get_os_info(void)
char *p, buf[512];
uname(&uts_buf);
- os_build = uts_buf.release;
+ os_version = uts_buf.release;
+ os_build = strdup(uts_buf.release);
+
os_name = uts_buf.sysname;
processor_arch = uts_buf.machine;
@@ -462,7 +466,7 @@ void kvp_get_os_info(void)
* string to be of the form: x.y.z
* Strip additional information we may have.
*/
- p = strchr(os_build, '-');
+ p = strchr(os_version, '-');
if (p)
*p = '\0';
@@ -879,7 +883,7 @@ static int kvp_process_ip_address(void *addrp,
addr_length = INET6_ADDRSTRLEN;
}
- if ((length - *offset) < addr_length + 1)
+ if ((length - *offset) < addr_length + 2)
return HV_E_FAIL;
if (str == NULL) {
strcpy(buffer, "inet_ntop failed\n");
@@ -887,11 +891,13 @@ static int kvp_process_ip_address(void *addrp,
}
if (*offset == 0)
strcpy(buffer, tmp);
- else
+ else {
+ strcat(buffer, ";");
strcat(buffer, tmp);
- strcat(buffer, ";");
+ }
*offset += strlen(str) + 1;
+
return 0;
}
@@ -953,7 +959,9 @@ kvp_get_ip_info(int family, char *if_name, int op,
* supported address families; if not we gather info on
* the specified address family.
*/
- if ((family != 0) && (curp->ifa_addr->sa_family != family)) {
+ if ((((family != 0) &&
+ (curp->ifa_addr->sa_family != family))) ||
+ (curp->ifa_flags & IFF_LOOPBACK)) {
curp = curp->ifa_next;
continue;
}
@@ -1478,13 +1486,19 @@ int main(void)
len = recvfrom(fd, kvp_recv_buffer, sizeof(kvp_recv_buffer), 0,
addr_p, &addr_l);
- if (len < 0 || addr.nl_pid) {
+ if (len < 0) {
syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s",
addr.nl_pid, errno, strerror(errno));
close(fd);
return -1;
}
+ if (addr.nl_pid) {
+ syslog(LOG_WARNING, "Received packet from untrusted pid:%u",
+ addr.nl_pid);
+ continue;
+ }
+
incoming_msg = (struct nlmsghdr *)kvp_recv_buffer;
incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg);
hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data;
@@ -1649,7 +1663,7 @@ int main(void)
strcpy(key_name, "OSMinorVersion");
break;
case OSVersion:
- strcpy(key_value, os_build);
+ strcpy(key_value, os_version);
strcpy(key_name, "OSVersion");
break;
case ProcessorArchitecture:
diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore
index 8a83dd2ffc11..d42073f12609 100644
--- a/tools/power/cpupower/.gitignore
+++ b/tools/power/cpupower/.gitignore
@@ -20,3 +20,10 @@ utils/cpufreq-set.o
utils/cpufreq-aperf.o
cpupower
bench/cpufreq-bench
+debug/kernel/Module.symvers
+debug/i386/centrino-decode
+debug/i386/dump_psb
+debug/i386/intel_gsic
+debug/i386/powernow-k8-decode
+debug/x86_64/centrino-decode
+debug/x86_64/powernow-k8-decode
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index cf397bd26d0c..d875a74a3bdf 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -253,7 +253,8 @@ clean:
| xargs rm -f
-rm -f $(OUTPUT)cpupower
-rm -f $(OUTPUT)libcpupower.so*
- -rm -rf $(OUTPUT)po/*.{gmo,pot}
+ -rm -rf $(OUTPUT)po/*.gmo
+ -rm -rf $(OUTPUT)po/*.pot
$(MAKE) -C bench O=$(OUTPUT) clean
diff --git a/tools/power/cpupower/debug/i386/Makefile b/tools/power/cpupower/debug/i386/Makefile
index 3ba158f0e287..c05cc0ac80c7 100644
--- a/tools/power/cpupower/debug/i386/Makefile
+++ b/tools/power/cpupower/debug/i386/Makefile
@@ -26,7 +26,10 @@ $(OUTPUT)powernow-k8-decode: powernow-k8-decode.c
all: $(OUTPUT)centrino-decode $(OUTPUT)dump_psb $(OUTPUT)intel_gsic $(OUTPUT)powernow-k8-decode
clean:
- rm -rf $(OUTPUT){centrino-decode,dump_psb,intel_gsic,powernow-k8-decode}
+ rm -rf $(OUTPUT)centrino-decode
+ rm -rf $(OUTPUT)dump_psb
+ rm -rf $(OUTPUT)intel_gsic
+ rm -rf $(OUTPUT)powernow-k8-decode
install:
$(INSTALL) -d $(DESTDIR)${bindir}
diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1
index 1141c2073719..e01c35d13b6e 100644
--- a/tools/power/cpupower/man/cpupower-monitor.1
+++ b/tools/power/cpupower/man/cpupower-monitor.1
@@ -7,11 +7,11 @@ cpupower\-monitor \- Report processor frequency and idle statistics
.RB "\-l"
.B cpupower monitor
-.RB [ "\-m <mon1>," [ "<mon2>,..." ] ]
+.RB [ -c ] [ "\-m <mon1>," [ "<mon2>,..." ] ]
.RB [ "\-i seconds" ]
.br
.B cpupower monitor
-.RB [ "\-m <mon1>," [ "<mon2>,..." ] ]
+.RB [ -c ][ "\-m <mon1>," [ "<mon2>,..." ] ]
.RB command
.br
.SH DESCRIPTION
@@ -64,6 +64,17 @@ Only display specific monitors. Use the monitor string(s) provided by \-l option
Measure intervall.
.RE
.PP
+\-c
+.RS 4
+Schedule the process on every core before starting and ending measuring.
+This could be needed for the Idle_Stats monitor when no other MSR based
+monitor (has to be run on the core that is measured) is run in parallel.
+This is to wake up the processors from deeper sleep states and let the
+kernel re
+-account its cpuidle (C-state) information before reading the
+cpuidle timings from sysfs.
+.RE
+.PP
command
.RS 4
Measure idle and frequency characteristics of an arbitrary command/workload.
diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
index 906895d21cce..93b0aa74ca03 100644
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -158,6 +158,8 @@ out:
cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO;
case 0x2A: /* SNB */
case 0x2D: /* SNB Xeon */
+ case 0x3A: /* IVB */
+ case 0x3E: /* IVB Xeon */
cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO;
cpu_info->caps |= CPUPOWER_CAP_IS_SNB;
break;
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 2eb584cf2f55..aa9e95486a2d 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -92,6 +92,14 @@ extern int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info);
extern struct cpupower_cpu_info cpupower_cpu_info;
/* cpuid and cpuinfo helpers **************************/
+struct cpuid_core_info {
+ int pkg;
+ int core;
+ int cpu;
+
+ /* flags */
+ unsigned int is_online:1;
+};
/* CPU topology/hierarchy parsing ******************/
struct cpupower_topology {
@@ -101,18 +109,12 @@ struct cpupower_topology {
unsigned int threads; /* per core */
/* Array gets mallocated with cores entries, holding per core info */
- struct {
- int pkg;
- int core;
- int cpu;
-
- /* flags */
- unsigned int is_online:1;
- } *core_info;
+ struct cpuid_core_info *core_info;
};
extern int get_cpu_topology(struct cpupower_topology *cpu_top);
extern void cpu_topology_release(struct cpupower_topology cpu_top);
+
/* CPU topology/hierarchy parsing ******************/
/* X86 ONLY ****************************************/
diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c
index 96e28c124b5c..38ab91629463 100644
--- a/tools/power/cpupower/utils/helpers/sysfs.c
+++ b/tools/power/cpupower/utils/helpers/sysfs.c
@@ -37,25 +37,6 @@ unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
return (unsigned int) numread;
}
-static unsigned int sysfs_write_file(const char *path,
- const char *value, size_t len)
-{
- int fd;
- ssize_t numwrite;
-
- fd = open(path, O_WRONLY);
- if (fd == -1)
- return 0;
-
- numwrite = write(fd, value, len);
- if (numwrite < 1) {
- close(fd);
- return 0;
- }
- close(fd);
- return (unsigned int) numwrite;
-}
-
/*
* Detect whether a CPU is online
*
diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c
index 4eae2c47ba48..c13120af519b 100644
--- a/tools/power/cpupower/utils/helpers/topology.c
+++ b/tools/power/cpupower/utils/helpers/topology.c
@@ -20,9 +20,8 @@
#include <helpers/sysfs.h>
/* returns -1 on failure, 0 on success */
-int sysfs_topology_read_file(unsigned int cpu, const char *fname)
+static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result)
{
- unsigned long value;
char linebuf[MAX_LINE_LEN];
char *endp;
char path[SYSFS_PATH_MAX];
@@ -31,20 +30,12 @@ int sysfs_topology_read_file(unsigned int cpu, const char *fname)
cpu, fname);
if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
return -1;
- value = strtoul(linebuf, &endp, 0);
+ *result = strtol(linebuf, &endp, 0);
if (endp == linebuf || errno == ERANGE)
return -1;
- return value;
+ return 0;
}
-struct cpuid_core_info {
- unsigned int pkg;
- unsigned int thread;
- unsigned int cpu;
- /* flags */
- unsigned int is_online:1;
-};
-
static int __compare(const void *t1, const void *t2)
{
struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
@@ -53,9 +44,9 @@ static int __compare(const void *t1, const void *t2)
return -1;
else if (top1->pkg > top2->pkg)
return 1;
- else if (top1->thread < top2->thread)
+ else if (top1->core < top2->core)
return -1;
- else if (top1->thread > top2->thread)
+ else if (top1->core > top2->core)
return 1;
else if (top1->cpu < top2->cpu)
return -1;
@@ -73,28 +64,42 @@ static int __compare(const void *t1, const void *t2)
*/
int get_cpu_topology(struct cpupower_topology *cpu_top)
{
- int cpu, cpus = sysconf(_SC_NPROCESSORS_CONF);
+ int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
- cpu_top->core_info = malloc(sizeof(struct cpupower_topology) * cpus);
+ cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
if (cpu_top->core_info == NULL)
return -ENOMEM;
cpu_top->pkgs = cpu_top->cores = 0;
for (cpu = 0; cpu < cpus; cpu++) {
cpu_top->core_info[cpu].cpu = cpu;
cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
- cpu_top->core_info[cpu].pkg =
- sysfs_topology_read_file(cpu, "physical_package_id");
- if ((int)cpu_top->core_info[cpu].pkg != -1 &&
- cpu_top->core_info[cpu].pkg > cpu_top->pkgs)
- cpu_top->pkgs = cpu_top->core_info[cpu].pkg;
- cpu_top->core_info[cpu].core =
- sysfs_topology_read_file(cpu, "core_id");
+ if(sysfs_topology_read_file(
+ cpu,
+ "physical_package_id",
+ &(cpu_top->core_info[cpu].pkg)) < 0)
+ return -1;
+ if(sysfs_topology_read_file(
+ cpu,
+ "core_id",
+ &(cpu_top->core_info[cpu].core)) < 0)
+ return -1;
}
- cpu_top->pkgs++;
qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
__compare);
+ /* Count the number of distinct pkgs values. This works
+ because the primary sort of the core_info struct was just
+ done by pkg value. */
+ last_pkg = cpu_top->core_info[0].pkg;
+ for(cpu = 1; cpu < cpus; cpu++) {
+ if(cpu_top->core_info[cpu].pkg != last_pkg) {
+ last_pkg = cpu_top->core_info[cpu].pkg;
+ cpu_top->pkgs++;
+ }
+ }
+ cpu_top->pkgs++;
+
/* Intel's cores count is not consecutively numbered, there may
* be a core_id of 3, but none of 2. Assume there always is 0
* Get amount of cores by counting duplicates in a package
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index 0d6571e418db..c4bae9203a69 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -39,6 +39,7 @@ static int mode;
static int interval = 1;
static char *show_monitors_param;
static struct cpupower_topology cpu_top;
+static unsigned int wake_cpus;
/* ToDo: Document this in the manpage */
static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', };
@@ -84,7 +85,7 @@ int fill_string_with_spaces(char *s, int n)
void print_header(int topology_depth)
{
int unsigned mon;
- int state, need_len, pr_mon_len;
+ int state, need_len;
cstate_t s;
char buf[128] = "";
int percent_width = 4;
@@ -93,7 +94,6 @@ void print_header(int topology_depth)
printf("%s|", buf);
for (mon = 0; mon < avail_monitors; mon++) {
- pr_mon_len = 0;
need_len = monitors[mon]->hw_states_num * (percent_width + 3)
- 1;
if (mon != 0) {
@@ -315,16 +315,28 @@ int fork_it(char **argv)
int do_interval_measure(int i)
{
unsigned int num;
+ int cpu;
+
+ if (wake_cpus)
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ bind_cpu(cpu);
for (num = 0; num < avail_monitors; num++) {
dprint("HW C-state residency monitor: %s - States: %d\n",
monitors[num]->name, monitors[num]->hw_states_num);
monitors[num]->start();
}
+
sleep(i);
+
+ if (wake_cpus)
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ bind_cpu(cpu);
+
for (num = 0; num < avail_monitors; num++)
monitors[num]->stop();
+
return 0;
}
@@ -333,7 +345,7 @@ static void cmdline(int argc, char *argv[])
int opt;
progname = basename(argv[0]);
- while ((opt = getopt(argc, argv, "+li:m:")) != -1) {
+ while ((opt = getopt(argc, argv, "+lci:m:")) != -1) {
switch (opt) {
case 'l':
if (mode)
@@ -352,6 +364,9 @@ static void cmdline(int argc, char *argv[])
mode = show;
show_monitors_param = optarg;
break;
+ case 'c':
+ wake_cpus = 1;
+ break;
default:
print_wrong_arg_exit();
}
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
index 9312ee1f2dbc..9e43f3371fbc 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -65,4 +65,21 @@ extern long long timespec_diff_us(struct timespec start, struct timespec end);
"could be inaccurate\n"), mes, ov); \
}
+
+/* Taken over from x86info project sources -> return 0 on success */
+#include <sched.h>
+#include <sys/types.h>
+#include <unistd.h>
+static inline int bind_cpu(int cpu)
+{
+ cpu_set_t set;
+
+ if (sched_getaffinity(getpid(), sizeof(set), &set) == 0) {
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+ return sched_setaffinity(getpid(), sizeof(set), &set);
+ }
+ return 1;
+}
+
#endif /* __CPUIDLE_INFO_HW__ */
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
index a1bc07cd53e1..a99b43b97d6d 100644
--- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -150,9 +150,15 @@ static struct cpuidle_monitor *snb_register(void)
|| cpupower_cpu_info.family != 6)
return NULL;
- if (cpupower_cpu_info.model != 0x2A
- && cpupower_cpu_info.model != 0x2D)
+ switch (cpupower_cpu_info.model) {
+ case 0x2A: /* SNB */
+ case 0x2D: /* SNB Xeon */
+ case 0x3A: /* IVB */
+ case 0x3E: /* IVB Xeon */
+ break;
+ default:
return NULL;
+ }
is_valid = calloc(cpu_count, sizeof(int));
for (num = 0; num < SNB_CSTATE_COUNT; num++) {
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index b336b24aa6c0..7300d0702efe 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,9 +1,9 @@
# Makefile for vm selftests
CC = $(CROSS_COMPILE)gcc
-CFLAGS = -Wall -Wextra
+CFLAGS = -Wall
-all: hugepage-mmap hugepage-shm map_hugetlb
+all: hugepage-mmap hugepage-shm map_hugetlb thuge-gen
%: %.c
$(CC) $(CFLAGS) -o $@ $^
diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c
new file mode 100644
index 000000000000..c87957295f74
--- /dev/null
+++ b/tools/testing/selftests/vm/thuge-gen.c
@@ -0,0 +1,254 @@
+/* Test selecting other page sizes for mmap/shmget.
+
+ Before running this huge pages for each huge page size must have been
+ reserved.
+ For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used.
+ Also shmmax must be increased.
+ And you need to run as root to work around some weird permissions in shm.
+ And nothing using huge pages should run in parallel.
+ When the program aborts you may need to clean up the shm segments with
+ ipcrm -m by hand, like this
+ sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m
+ (warning this will remove all if someone else uses them) */
+
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <glob.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <string.h>
+
+#define err(x) perror(x), exit(1)
+
+#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_SHIFT 26
+#define MAP_HUGE_MASK 0x3f
+#define MAP_HUGETLB 0x40000
+
+#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */
+#define SHM_HUGE_SHIFT 26
+#define SHM_HUGE_MASK 0x3f
+#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
+#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
+
+#define NUM_PAGESIZES 5
+
+#define NUM_PAGES 4
+
+#define Dprintf(fmt...) // printf(fmt)
+
+unsigned long page_sizes[NUM_PAGESIZES];
+int num_page_sizes;
+
+int ilog2(unsigned long v)
+{
+ int l = 0;
+ while ((1UL << l) < v)
+ l++;
+ return l;
+}
+
+void find_pagesizes(void)
+{
+ glob_t g;
+ int i;
+ glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
+ assert(g.gl_pathc <= NUM_PAGESIZES);
+ for (i = 0; i < g.gl_pathc; i++) {
+ sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
+ &page_sizes[i]);
+ page_sizes[i] <<= 10;
+ printf("Found %luMB\n", page_sizes[i] >> 20);
+ }
+ num_page_sizes = g.gl_pathc;
+ globfree(&g);
+}
+
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+ free(line);
+ return hps;
+}
+
+void show(unsigned long ps)
+{
+ char buf[100];
+ if (ps == getpagesize())
+ return;
+ printf("%luMB: ", ps >> 20);
+ fflush(stdout);
+ snprintf(buf, sizeof buf,
+ "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+ ps >> 10);
+ system(buf);
+}
+
+unsigned long read_sysfs(int warn, char *fmt, ...)
+{
+ char *line = NULL;
+ size_t linelen = 0;
+ char buf[100];
+ FILE *f;
+ va_list ap;
+ unsigned long val = 0;
+
+ va_start(ap, fmt);
+ vsnprintf(buf, sizeof buf, fmt, ap);
+ va_end(ap);
+
+ f = fopen(buf, "r");
+ if (!f) {
+ if (warn)
+ printf("missing %s\n", buf);
+ return 0;
+ }
+ if (getline(&line, &linelen, f) > 0) {
+ sscanf(line, "%lu", &val);
+ }
+ fclose(f);
+ free(line);
+ return val;
+}
+
+unsigned long read_free(unsigned long ps)
+{
+ return read_sysfs(ps != getpagesize(),
+ "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+ ps >> 10);
+}
+
+void test_mmap(unsigned long size, unsigned flags)
+{
+ char *map;
+ unsigned long before, after;
+ int err;
+
+ before = read_free(size);
+ map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, 0, 0);
+
+ if (map == (char *)-1) err("mmap");
+ memset(map, 0xff, size*NUM_PAGES);
+ after = read_free(size);
+ Dprintf("before %lu after %lu diff %ld size %lu\n",
+ before, after, before - after, size);
+ assert(size == getpagesize() || (before - after) == NUM_PAGES);
+ show(size);
+ err = munmap(map, size);
+ assert(!err);
+}
+
+void test_shmget(unsigned long size, unsigned flags)
+{
+ int id;
+ unsigned long before, after;
+ int err;
+
+ before = read_free(size);
+ id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags);
+ if (id < 0) err("shmget");
+
+ struct shm_info i;
+ if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl");
+ Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss);
+
+
+ Dprintf("id %d\n", id);
+ char *map = shmat(id, NULL, 0600);
+ if (map == (char*)-1) err("shmat");
+
+ shmctl(id, IPC_RMID, NULL);
+
+ memset(map, 0xff, size*NUM_PAGES);
+ after = read_free(size);
+
+ Dprintf("before %lu after %lu diff %ld size %lu\n",
+ before, after, before - after, size);
+ assert(size == getpagesize() || (before - after) == NUM_PAGES);
+ show(size);
+ err = shmdt(map);
+ assert(!err);
+}
+
+void sanity_checks(void)
+{
+ int i;
+ unsigned long largest = getpagesize();
+
+ for (i = 0; i < num_page_sizes; i++) {
+ if (page_sizes[i] > largest)
+ largest = page_sizes[i];
+
+ if (read_free(page_sizes[i]) < NUM_PAGES) {
+ printf("Not enough huge pages for page size %lu MB, need %u\n",
+ page_sizes[i] >> 20,
+ NUM_PAGES);
+ exit(0);
+ }
+ }
+
+ if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) {
+ printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
+ exit(0);
+ }
+
+#if defined(__x86_64__)
+ if (largest != 1U<<30) {
+ printf("No GB pages available on x86-64\n"
+ "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
+ exit(0);
+ }
+#endif
+}
+
+int main(void)
+{
+ int i;
+ unsigned default_hps = default_huge_page_size();
+
+ find_pagesizes();
+
+ sanity_checks();
+
+ for (i = 0; i < num_page_sizes; i++) {
+ unsigned long ps = page_sizes[i];
+ int arg = ilog2(ps) << MAP_HUGE_SHIFT;
+ printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
+ test_mmap(ps, MAP_HUGETLB | arg);
+ }
+ printf("Testing default huge mmap\n");
+ test_mmap(default_hps, SHM_HUGETLB);
+
+ puts("Testing non-huge shmget");
+ test_shmget(getpagesize(), 0);
+
+ for (i = 0; i < num_page_sizes; i++) {
+ unsigned long ps = page_sizes[i];
+ int arg = ilog2(ps) << SHM_HUGE_SHIFT;
+ printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
+ test_shmget(ps, SHM_HUGETLB | arg);
+ }
+ puts("default huge shmget");
+ test_shmget(default_hps, SHM_HUGETLB);
+
+ return 0;
+}