diff options
-rw-r--r-- | include/linux/perf_event.h | 16 | ||||
-rw-r--r-- | kernel/events/core.c | 307 |
2 files changed, 267 insertions, 56 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7546822a1d74..6e3f854a34d8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -558,7 +558,11 @@ struct perf_event { */ struct list_head group_entry; struct list_head sibling_list; - + /* + * Node on the pinned or flexible tree located at the event context; + */ + struct rb_node group_node; + u64 group_index; /* * We need storage to track the entries in perf_pmu_migrate_context; we * cannot use the event_entry because of RCU and we want to keep the @@ -690,6 +694,12 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; + +struct perf_event_groups { + struct rb_root tree; + u64 index; +}; + /** * struct perf_event_context - event context structure * @@ -710,8 +720,8 @@ struct perf_event_context { struct mutex mutex; struct list_head active_ctx_list; - struct list_head pinned_groups; - struct list_head flexible_groups; + struct perf_event_groups pinned_groups; + struct perf_event_groups flexible_groups; struct list_head event_list; int nr_events; int nr_active; diff --git a/kernel/events/core.c b/kernel/events/core.c index 8b6a2774e084..c9fee3640f40 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1460,8 +1460,21 @@ static enum event_type_t get_event_type(struct perf_event *event) return event_type; } -static struct list_head * -ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) +/* + * Helper function to initialize group leader event; + */ +void init_event_group(struct perf_event *event) +{ + RB_CLEAR_NODE(&event->group_node); + event->group_index = 0; +} + +/* + * Extract pinned or flexible groups from the context + * based on event attrs bits; + */ +static struct perf_event_groups * +get_event_groups(struct perf_event *event, struct perf_event_context *ctx) { if (event->attr.pinned) return &ctx->pinned_groups; @@ -1470,6 +1483,169 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) } /* + * Helper function to initializes perf event groups object; + */ +void perf_event_groups_init(struct perf_event_groups *groups) +{ + groups->tree = RB_ROOT; + groups->index = 0; +} + +/* + * Compare function for event groups; + * Implements complex key that first sorts by CPU and then by + * virtual index which provides ordering when rotating + * groups for the same CPU; + */ +int perf_event_groups_less(struct perf_event *left, struct perf_event *right) +{ + if (left->cpu < right->cpu) { + return 1; + } else if (left->cpu > right->cpu) { + return 0; + } else { + if (left->group_index < right->group_index) { + return 1; + } else if(left->group_index > right->group_index) { + return 0; + } else { + return 0; + } + } +} + +/* + * Insert a group into a tree using event->cpu as a key. If event->cpu node + * is already attached to the tree then the event is added to the attached + * group's group_list list. + */ +static void +perf_event_groups_insert(struct perf_event_groups *groups, + struct perf_event *event) +{ + struct perf_event *node_event; + struct rb_node *parent; + struct rb_node **node; + + event->group_index = ++groups->index; + + node = &groups->tree.rb_node; + parent = *node; + + while (*node) { + parent = *node; + node_event = container_of(*node, + struct perf_event, group_node); + + if (perf_event_groups_less(event, node_event)) + node = &parent->rb_left; + else + node = &parent->rb_right; + } + + rb_link_node(&event->group_node, parent, node); + rb_insert_color(&event->group_node, &groups->tree); +} + +/* + * Helper function to insert event into the pinned or + * flexible groups; + */ +static void +add_event_to_groups(struct perf_event *event, struct perf_event_context *ctx) +{ + struct perf_event_groups *groups; + + groups = get_event_groups(event, ctx); + perf_event_groups_insert(groups, event); +} + +/* + * Delete a group from a tree. If the group is directly attached to the tree + * it also detaches all groups on the group's group_list list. + */ +static void +perf_event_groups_delete(struct perf_event_groups *groups, + struct perf_event *event) +{ + if (!RB_EMPTY_NODE(&event->group_node) && + !RB_EMPTY_ROOT(&groups->tree)) + rb_erase(&event->group_node, &groups->tree); + + init_event_group(event); +} + +/* + * Helper function to delete event from its groups; + */ +static void +del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx) +{ + struct perf_event_groups *groups; + + groups = get_event_groups(event, ctx); + perf_event_groups_delete(groups, event); +} + +/* + * Get a group by a cpu key from groups tree with the least group_index; + */ +static struct perf_event * +perf_event_groups_first(struct perf_event_groups *groups, int cpu) +{ + struct perf_event *node_event = NULL, *match = NULL; + struct rb_node *node = groups->tree.rb_node; + + while (node) { + node_event = container_of(node, + struct perf_event, group_node); + + if (cpu < node_event->cpu) { + node = node->rb_left; + } else if (cpu > node_event->cpu) { + node = node->rb_right; + } else { + match = node_event; + node = node->rb_left; + } + } + + return match; +} + +/* + * Find group list by a cpu key and rotate it. + */ +static void +perf_event_groups_rotate(struct perf_event_groups *groups, int cpu) +{ + struct perf_event *event = + perf_event_groups_first(groups, cpu); + + if (event) { + perf_event_groups_delete(groups, event); + perf_event_groups_insert(groups, event); + } +} + +/* + * Iterate event groups thru the whole tree. + */ +#define perf_event_groups_for_each(event, groups, node) \ + for (event = rb_entry_safe(rb_first(&((groups)->tree)), \ + typeof(*event), node); event; \ + event = rb_entry_safe(rb_next(&event->node), \ + typeof(*event), node)) +/* + * Iterate event groups with cpu == key. + */ +#define perf_event_groups_for_each_cpu(event, key, groups, node) \ + for (event = perf_event_groups_first(groups, key); \ + event && event->cpu == key; \ + event = rb_entry_safe(rb_next(&event->node), \ + typeof(*event), node)) + +/* * Add a event from the lists for its context. * Must be called with ctx->mutex and ctx->lock held. */ @@ -1489,12 +1665,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) * perf_group_detach can, at all times, locate all siblings. */ if (event->group_leader == event) { - struct list_head *list; - event->group_caps = event->event_caps; - - list = ctx_group_list(event, ctx); - list_add_tail(&event->group_entry, list); + add_event_to_groups(event, ctx); } list_update_cgroup_event(event, ctx, true); @@ -1688,7 +1860,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) list_del_rcu(&event->event_entry); if (event->group_leader == event) - list_del_init(&event->group_entry); + del_event_from_groups(event, ctx); /* * If event was in error state, then keep it @@ -1706,7 +1878,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) static void perf_group_detach(struct perf_event *event) { struct perf_event *sibling, *tmp; - struct list_head *list = NULL; lockdep_assert_held(&event->ctx->lock); @@ -1727,22 +1898,23 @@ static void perf_group_detach(struct perf_event *event) goto out; } - if (!list_empty(&event->group_entry)) - list = &event->group_entry; - /* * If this was a group event with sibling events then * upgrade the siblings to singleton events by adding them * to whatever list we are on. */ list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { - if (list) - list_move_tail(&sibling->group_entry, list); + sibling->group_leader = sibling; /* Inherit group flags from the previous leader */ sibling->group_caps = event->group_caps; + if (!RB_EMPTY_NODE(&event->group_node)) { + list_del_init(&sibling->group_entry); + add_event_to_groups(sibling, event->ctx); + } + WARN_ON_ONCE(sibling->ctx != event->ctx); } @@ -2186,6 +2358,22 @@ static int group_can_go_on(struct perf_event *event, return can_add_hw; } +static int +flexible_group_sched_in(struct perf_event *event, + struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, + int *can_add_hw) +{ + if (event->state <= PERF_EVENT_STATE_OFF || !event_filter_match(event)) + return 0; + + if (group_can_go_on(event, cpuctx, *can_add_hw)) + if (group_sched_in(event, cpuctx, ctx)) + *can_add_hw = 0; + + return 1; +} + static void add_event_to_ctx(struct perf_event *event, struct perf_event_context *ctx) { @@ -2652,6 +2840,7 @@ static void ctx_sched_out(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, enum event_type_t event_type) { + int sw = -1, cpu = smp_processor_id(); int is_active = ctx->is_active; struct perf_event *event; @@ -2700,12 +2889,20 @@ static void ctx_sched_out(struct perf_event_context *ctx, perf_pmu_disable(ctx->pmu); if (is_active & EVENT_PINNED) { - list_for_each_entry(event, &ctx->pinned_groups, group_entry) + perf_event_groups_for_each_cpu(event, cpu, + &ctx->pinned_groups, group_node) + group_sched_out(event, cpuctx, ctx); + perf_event_groups_for_each_cpu(event, sw, + &ctx->pinned_groups, group_node) group_sched_out(event, cpuctx, ctx); } if (is_active & EVENT_FLEXIBLE) { - list_for_each_entry(event, &ctx->flexible_groups, group_entry) + perf_event_groups_for_each_cpu(event, cpu, + &ctx->flexible_groups, group_node) + group_sched_out(event, cpuctx, ctx); + perf_event_groups_for_each_cpu(event, sw, + &ctx->flexible_groups, group_node) group_sched_out(event, cpuctx, ctx); } perf_pmu_enable(ctx->pmu); @@ -2996,23 +3193,28 @@ static void ctx_pinned_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx) { + int sw = -1, cpu = smp_processor_id(); struct perf_event *event; + int can_add_hw; + + perf_event_groups_for_each_cpu(event, sw, + &ctx->pinned_groups, group_node) { + can_add_hw = 1; + if (flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw)) { + if (event->state == PERF_EVENT_STATE_INACTIVE) + perf_event_set_state(event, + PERF_EVENT_STATE_ERROR); + } + } - list_for_each_entry(event, &ctx->pinned_groups, group_entry) { - if (event->state <= PERF_EVENT_STATE_OFF) - continue; - if (!event_filter_match(event)) - continue; - - if (group_can_go_on(event, cpuctx, 1)) - group_sched_in(event, cpuctx, ctx); - - /* - * If this pinned group hasn't been scheduled, - * put it in error state. - */ - if (event->state == PERF_EVENT_STATE_INACTIVE) - perf_event_set_state(event, PERF_EVENT_STATE_ERROR); + perf_event_groups_for_each_cpu(event, cpu, + &ctx->pinned_groups, group_node) { + can_add_hw = 1; + if (flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw)) { + if (event->state == PERF_EVENT_STATE_INACTIVE) + perf_event_set_state(event, + PERF_EVENT_STATE_ERROR); + } } } @@ -3020,25 +3222,19 @@ static void ctx_flexible_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx) { + int sw = -1, cpu = smp_processor_id(); struct perf_event *event; int can_add_hw = 1; - list_for_each_entry(event, &ctx->flexible_groups, group_entry) { - /* Ignore events in OFF or ERROR state */ - if (event->state <= PERF_EVENT_STATE_OFF) - continue; - /* - * Listen to the 'cpu' scheduling filter constraint - * of events: - */ - if (!event_filter_match(event)) - continue; + perf_event_groups_for_each_cpu(event, sw, + &ctx->flexible_groups, group_node) + flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw); + + can_add_hw = 1; + perf_event_groups_for_each_cpu(event, cpu, + &ctx->flexible_groups, group_node) + flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw); - if (group_can_go_on(event, cpuctx, can_add_hw)) { - if (group_sched_in(event, cpuctx, ctx)) - can_add_hw = 0; - } - } } static void @@ -3119,7 +3315,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, * However, if task's ctx is not carrying any pinned * events, no need to flip the cpuctx's events around. */ - if (!list_empty(&ctx->pinned_groups)) + if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); perf_event_sched_in(cpuctx, ctx, task); perf_pmu_enable(ctx->pmu); @@ -3356,8 +3552,12 @@ static void rotate_ctx(struct perf_event_context *ctx) * Rotate the first entry last of non-pinned groups. Rotation might be * disabled by the inheritance code. */ - if (!ctx->rotate_disable) - list_rotate_left(&ctx->flexible_groups); + if (!ctx->rotate_disable) { + int sw = -1, cpu = smp_processor_id(); + + perf_event_groups_rotate(&ctx->flexible_groups, sw); + perf_event_groups_rotate(&ctx->flexible_groups, cpu); + } } static int perf_rotate_context(struct perf_cpu_context *cpuctx) @@ -3715,8 +3915,8 @@ static void __perf_event_init_context(struct perf_event_context *ctx) raw_spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->active_ctx_list); - INIT_LIST_HEAD(&ctx->pinned_groups); - INIT_LIST_HEAD(&ctx->flexible_groups); + perf_event_groups_init(&ctx->pinned_groups); + perf_event_groups_init(&ctx->flexible_groups); INIT_LIST_HEAD(&ctx->event_list); atomic_set(&ctx->refcount, 1); } @@ -9561,6 +9761,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, INIT_LIST_HEAD(&event->group_entry); INIT_LIST_HEAD(&event->event_entry); INIT_LIST_HEAD(&event->sibling_list); + init_event_group(event); INIT_LIST_HEAD(&event->rb_entry); INIT_LIST_HEAD(&event->active_entry); INIT_LIST_HEAD(&event->addr_filters.list); @@ -11085,7 +11286,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: */ - list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { + perf_event_groups_for_each(event, &parent_ctx->pinned_groups, group_node) { ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) @@ -11101,7 +11302,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) parent_ctx->rotate_disable = 1; raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); - list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { + perf_event_groups_for_each(event, &parent_ctx->flexible_groups, group_node) { ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) |