From 29687512c0b084957112cc2c0743ce34cd0d5055 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 16 Dec 2010 09:22:24 -0500 Subject: [SCSI] fix up documentation for change in ->queuecommand to lockless calling The current doc still says we call it with the host lock held, which is going to cause confusion. Signed-off-by: James Bottomley --- Documentation/scsi/scsi_mid_low_api.txt | 59 +++++++++++++++++---------------- 1 file changed, 31 insertions(+), 28 deletions(-) (limited to 'Documentation') diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt index 570ef2b3d79b..df322c103466 100644 --- a/Documentation/scsi/scsi_mid_low_api.txt +++ b/Documentation/scsi/scsi_mid_low_api.txt @@ -1044,9 +1044,9 @@ Details: /** - * queuecommand - queue scsi command, invoke 'done' on completion + * queuecommand - queue scsi command, invoke scp->scsi_done on completion + * @shost: pointer to the scsi host object * @scp: pointer to scsi command object - * @done: function pointer to be invoked on completion * * Returns 0 on success. * @@ -1074,42 +1074,45 @@ Details: * * Other types of errors that are detected immediately may be * flagged by setting scp->result to an appropriate value, - * invoking the 'done' callback, and then returning 0 from this - * function. If the command is not performed immediately (and the - * LLD is starting (or will start) the given command) then this - * function should place 0 in scp->result and return 0. + * invoking the scp->scsi_done callback, and then returning 0 + * from this function. If the command is not performed + * immediately (and the LLD is starting (or will start) the given + * command) then this function should place 0 in scp->result and + * return 0. * * Command ownership. If the driver returns zero, it owns the - * command and must take responsibility for ensuring the 'done' - * callback is executed. Note: the driver may call done before - * returning zero, but after it has called done, it may not - * return any value other than zero. If the driver makes a - * non-zero return, it must not execute the command's done - * callback at any time. - * - * Locks: struct Scsi_Host::host_lock held on entry (with "irqsave") - * and is expected to be held on return. + * command and must take responsibility for ensuring the + * scp->scsi_done callback is executed. Note: the driver may + * call scp->scsi_done before returning zero, but after it has + * called scp->scsi_done, it may not return any value other than + * zero. If the driver makes a non-zero return, it must not + * execute the command's scsi_done callback at any time. + * + * Locks: up to and including 2.6.36, struct Scsi_Host::host_lock + * held on entry (with "irqsave") and is expected to be + * held on return. From 2.6.37 onwards, queuecommand is + * called without any locks held. * * Calling context: in interrupt (soft irq) or process context * - * Notes: This function should be relatively fast. Normally it will - * not wait for IO to complete. Hence the 'done' callback is invoked - * (often directly from an interrupt service routine) some time after - * this function has returned. In some cases (e.g. pseudo adapter - * drivers that manufacture the response to a SCSI INQUIRY) - * the 'done' callback may be invoked before this function returns. - * If the 'done' callback is not invoked within a certain period - * the SCSI mid level will commence error processing. - * If a status of CHECK CONDITION is placed in "result" when the - * 'done' callback is invoked, then the LLD driver should - * perform autosense and fill in the struct scsi_cmnd::sense_buffer + * Notes: This function should be relatively fast. Normally it + * will not wait for IO to complete. Hence the scp->scsi_done + * callback is invoked (often directly from an interrupt service + * routine) some time after this function has returned. In some + * cases (e.g. pseudo adapter drivers that manufacture the + * response to a SCSI INQUIRY) the scp->scsi_done callback may be + * invoked before this function returns. If the scp->scsi_done + * callback is not invoked within a certain period the SCSI mid + * level will commence error processing. If a status of CHECK + * CONDITION is placed in "result" when the scp->scsi_done + * callback is invoked, then the LLD driver should perform + * autosense and fill in the struct scsi_cmnd::sense_buffer * array. The scsi_cmnd::sense_buffer array is zeroed prior to * the mid level queuing a command to an LLD. * * Defined in: LLD **/ - int queuecommand(struct scsi_cmnd * scp, - void (*done)(struct scsi_cmnd *)) + int queuecommand(struct Scsi_Host *shost, struct scsi_cmnd * scp) /** -- cgit v1.2.3 From 7a2d19bced51af31d2c9ff55219400ed0a6c012f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 21 Dec 2010 17:24:18 -0800 Subject: mm: vmscan: tracepoint: account for scanned pages similarly for both ftrace and vmstat When correlating ftrace results with /proc/vmstat, I noticed that the reporting scripts value for "pages scanned" differed significantly. Both values were "right" depending on how you look at it. The difference is due to vmstat only counting scanning of the inactive list towards pages scanned. The analysis script for the tracepoint counts active and inactive list yielding a far higher value than vmstat. The resulting scanning/reclaim ratio looks much worse. The tracepoint is ok but this patch updates the reporting script so that the report values for scanned are similar to vmstat. Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/trace/postprocess/trace-vmscan-postprocess.pl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index b3e73ddb1567..12cecc83cd91 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -373,9 +373,18 @@ EVENT_PROCESS: print " $regex_lru_isolate/o\n"; next; } + my $isolate_mode = $1; my $nr_scanned = $4; my $nr_contig_dirty = $7; - $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; + + # To closer match vmstat scanning statistics, only count isolate_both + # and isolate_inactive as scanning. isolate_active is rotation + # isolate_inactive == 0 + # isolate_active == 1 + # isolate_both == 2 + if ($isolate_mode != 1) { + $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; + } $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty; } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") { $details = $5; -- cgit v1.2.3 From 4be2c95d1f7706ca0e74499f2bd118e1cee19669 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 21 Dec 2010 17:24:30 -0800 Subject: taskstats: pad taskstats netlink response for aligment issues on ia64 The taskstats structure is internally aligned on 8 byte boundaries but the layout of the aggregrate reply, with two NLA headers and the pid (each 4 bytes), actually force the entire structure to be unaligned. This causes the kernel to issue unaligned access warnings on some architectures like ia64. Unfortunately, some software out there doesn't properly unroll the NLA packet and assumes that the start of the taskstats structure will always be 20 bytes from the start of the netlink payload. Aligning the start of the taskstats structure breaks this software, which we don't want. So, for now the alignment only happens on architectures that require it and those users will have to update to fixed versions of those packages. Space is reserved in the packet only when needed. This ifdef should be removed in several years e.g. 2012 once we can be confident that fixed versions are installed on most systems. We add the padding before the aggregate since the aggregate is already a defined type. Commit 85893120 ("delayacct: align to 8 byte boundary on 64-bit systems") previously addressed the alignment issues by padding out the pid field. This was supposed to be a compatible change but the circumstances described above mean that it wasn't. This patch backs out that change, since it was a hack, and introduces a new NULL attribute type to provide the padding. Padding the response with 4 bytes avoids allocating an aligned taskstats structure and copying it back. Since the structure weighs in at 328 bytes, it's too big to do it on the stack. Signed-off-by: Jeff Mahoney Reported-by: Brian Rogers Cc: Jeff Mahoney Cc: Guillaume Chazarain Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/accounting/getdelays.c | 1 + include/linux/taskstats.h | 3 +- kernel/taskstats.c | 57 ++++++++++++++++++++++++++++-------- 3 files changed, 47 insertions(+), 14 deletions(-) (limited to 'Documentation') diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index a2976a6de033..e9c77788a39d 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -516,6 +516,7 @@ int main(int argc, char *argv[]) default: fprintf(stderr, "Unknown nla_type %d\n", na->nla_type); + case TASKSTATS_TYPE_NULL: break; } na = (struct nlattr *) (GENLMSG_DATA(&msg) + len); diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 341dddb55090..2466e550a41d 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -33,7 +33,7 @@ */ -#define TASKSTATS_VERSION 7 +#define TASKSTATS_VERSION 8 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -188,6 +188,7 @@ enum { TASKSTATS_TYPE_STATS, /* taskstats structure */ TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ + TASKSTATS_TYPE_NULL, /* contains nothing */ __TASKSTATS_TYPE_MAX, }; diff --git a/kernel/taskstats.c b/kernel/taskstats.c index c8231fb15708..3308fd7f1b52 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask) return ret; } +#ifdef CONFIG_IA64 +#define TASKSTATS_NEEDS_PADDING 1 +#endif + static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) { struct nlattr *na, *ret; int aggr; - /* If we don't pad, we end up with alignment on a 4 byte boundary. - * This causes lots of runtime warnings on systems requiring 8 byte - * alignment */ - u32 pids[2] = { pid, 0 }; - int pid_size = ALIGN(sizeof(pid), sizeof(long)); - aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; + /* + * The taskstats structure is internally aligned on 8 byte + * boundaries but the layout of the aggregrate reply, with + * two NLA headers and the pid (each 4 bytes), actually + * force the entire structure to be unaligned. This causes + * the kernel to issue unaligned access warnings on some + * architectures like ia64. Unfortunately, some software out there + * doesn't properly unroll the NLA packet and assumes that the start + * of the taskstats structure will always be 20 bytes from the start + * of the netlink payload. Aligning the start of the taskstats + * structure breaks this software, which we don't want. So, for now + * the alignment only happens on architectures that require it + * and those users will have to update to fixed versions of those + * packages. Space is reserved in the packet only when needed. + * This ifdef should be removed in several years e.g. 2012 once + * we can be confident that fixed versions are installed on most + * systems. We add the padding before the aggregate since the + * aggregate is already a defined type. + */ +#ifdef TASKSTATS_NEEDS_PADDING + if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) + goto err; +#endif na = nla_nest_start(skb, aggr); if (!na) goto err; - if (nla_put(skb, type, pid_size, pids) < 0) + + if (nla_put(skb, type, sizeof(pid), &pid) < 0) goto err; ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); if (!ret) @@ -456,6 +478,18 @@ out: return rc; } +static size_t taskstats_packet_size(void) +{ + size_t size; + + size = nla_total_size(sizeof(u32)) + + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); +#ifdef TASKSTATS_NEEDS_PADDING + size += nla_total_size(0); /* Padding for alignment */ +#endif + return size; +} + static int cmd_attr_pid(struct genl_info *info) { struct taskstats *stats; @@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info) u32 pid; int rc; - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) @@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info) u32 tgid; int rc; - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) @@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) /* * Size includes space for nested attributes */ - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = taskstats_packet_size(); is_thread_group = !!taskstats_tgid_alloc(tsk); if (is_thread_group) { -- cgit v1.2.3