diff options
50 files changed, 1894 insertions, 301 deletions
diff --git a/Documentation/bpf/README.rst b/Documentation/bpf/index.rst index b9a80c9e9392..00a8450a602f 100644 --- a/Documentation/bpf/README.rst +++ b/Documentation/bpf/index.rst @@ -1,5 +1,5 @@ ================= -BPF documentation +BPF Documentation ================= This directory contains documentation for the BPF (Berkeley Packet @@ -22,14 +22,14 @@ Frequently asked questions (FAQ) Two sets of Questions and Answers (Q&A) are maintained. -* QA for common questions about BPF see: bpf_design_QA_ +.. toctree:: + :maxdepth: 1 -* QA for developers interacting with BPF subsystem: bpf_devel_QA_ + bpf_design_QA + bpf_devel_QA .. Links: -.. _bpf_design_QA: bpf_design_QA.rst -.. _bpf_devel_QA: bpf_devel_QA.rst .. _Documentation/networking/filter.txt: ../networking/filter.txt .. _man-pages: https://www.kernel.org/doc/man-pages/ .. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html diff --git a/Documentation/index.rst b/Documentation/index.rst index fdc585703498..086710b054db 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -90,6 +90,7 @@ needed). crypto/index filesystems/index vm/index + bpf/index Architecture-specific documentation ----------------------------------- diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 81b150e5dfdb..8b97fd1f0cea 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -196,14 +196,16 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample) */ void lirc_bpf_free(struct rc_dev *rcdev) { - struct bpf_prog **progs; + struct bpf_prog_array_item *item; if (!rcdev->raw->progs) return; - progs = rcu_dereference(rcdev->raw->progs)->progs; - while (*progs) - bpf_prog_put(*progs++); + item = rcu_dereference(rcdev->raw->progs)->items; + while (item->prog) { + bpf_prog_put(item->prog); + item++; + } bpf_prog_array_free(rcdev->raw->progs); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index cb87fccb9f6a..2572a4b91c7c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -43,8 +43,6 @@ #include "fw.h" #include "main.h" -#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg) - #define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4) static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf) @@ -441,7 +439,10 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) } if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) { - nfp_bpf_event_output(bpf, skb); + if (!nfp_bpf_event_output(bpf, skb->data, skb->len)) + dev_consume_skb_any(skb); + else + dev_kfree_skb_any(skb); return; } @@ -465,3 +466,21 @@ err_unlock: err_free: dev_kfree_skb_any(skb); } + +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len) +{ + struct nfp_app_bpf *bpf = app->priv; + const struct cmsg_hdr *hdr = data; + + if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) { + cmsg_warn(bpf, "cmsg drop - too short %d!\n", len); + return; + } + + if (hdr->type == CMSG_TYPE_BPF_EVENT) + nfp_bpf_event_output(bpf, data, len); + else + cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n", + hdr->type); +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h index 4c7972e3db63..e4f9b7ec8528 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h @@ -51,6 +51,7 @@ enum bpf_cap_tlv_type { NFP_BPF_CAP_TYPE_MAPS = 3, NFP_BPF_CAP_TYPE_RANDOM = 4, NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5, + NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6, }; struct nfp_bpf_cap_tlv_func { diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 1d9e36835404..eff57f7d056a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1642,6 +1642,51 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 ret_einval, end; + swreg plen, delta; + + BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN)); + + plen = imm_a(nfp_prog); + delta = reg_a(2 * 2); + + ret_einval = nfp_prog_current_offset(nfp_prog) + 9; + end = nfp_prog_current_offset(nfp_prog) + 11; + + /* Calculate resulting length */ + emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta); + /* delta == 0 is not allowed by the kernel, add must overflow to make + * length smaller. + */ + emit_br(nfp_prog, BR_BCC, ret_einval, 0); + + /* if (new_len < 14) then -EINVAL */ + emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN)); + emit_br(nfp_prog, BR_BMI, ret_einval, 0); + + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_ADD, delta); + emit_alu(nfp_prog, pv_len(nfp_prog), + pv_len(nfp_prog), ALU_OP_ADD, delta); + + emit_br(nfp_prog, BR_UNC, end, 2); + wrp_immed(nfp_prog, reg_both(0), 0); + wrp_immed(nfp_prog, reg_both(1), 0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) + return -EINVAL; + + wrp_immed(nfp_prog, reg_both(0), -22); + wrp_immed(nfp_prog, reg_both(1), ~0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, end)) + return -EINVAL; + + return 0; +} + static int map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { @@ -3041,6 +3086,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) switch (meta->insn.imm) { case BPF_FUNC_xdp_adjust_head: return adjust_head(nfp_prog, meta); + case BPF_FUNC_xdp_adjust_tail: + return adjust_tail(nfp_prog, meta); case BPF_FUNC_map_lookup_elem: case BPF_FUNC_map_update_elem: case BPF_FUNC_map_delete_elem: @@ -3883,6 +3930,7 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) struct nfp_insn_meta *meta1, *meta2; struct nfp_bpf_map *nfp_map; struct bpf_map *map; + u32 id; nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { if (meta1->skip || meta2->skip) @@ -3894,11 +3942,14 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) map = (void *)(unsigned long)((u32)meta1->insn.imm | (u64)meta2->insn.imm << 32); - if (bpf_map_offload_neutral(map)) - continue; - nfp_map = map_to_offmap(map)->dev_priv; + if (bpf_map_offload_neutral(map)) { + id = map->id; + } else { + nfp_map = map_to_offmap(map)->dev_priv; + id = nfp_map->tid; + } - meta1->insn.imm = nfp_map->tid; + meta1->insn.imm = id; meta2->insn.imm = 0; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 994d2b756fe1..970af07f4656 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -45,8 +45,8 @@ const struct rhashtable_params nfp_bpf_maps_neutral_params = { .nelem_hint = 4, - .key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr), - .key_offset = offsetof(struct nfp_bpf_neutral_map, ptr), + .key_len = FIELD_SIZEOF(struct bpf_map, id), + .key_offset = offsetof(struct nfp_bpf_neutral_map, map_id), .head_offset = offsetof(struct nfp_bpf_neutral_map, l), .automatic_shrinking = true, }; @@ -334,6 +334,14 @@ nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) return 0; } +static int +nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + bpf->adjust_tail = true; + return 0; +} + static int nfp_bpf_parse_capabilities(struct nfp_app *app) { struct nfp_cpp *cpp = app->pf->cpp; @@ -380,6 +388,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app) if (nfp_bpf_parse_cap_qsel(app->priv, value, length)) goto err_release_free; break; + case NFP_BPF_CAP_TYPE_ADJUST_TAIL: + if (nfp_bpf_parse_cap_adjust_tail(app->priv, value, + length)) + goto err_release_free; + break; default: nfp_dbg(cpp, "unknown BPF capability: %d\n", type); break; @@ -490,6 +503,7 @@ const struct nfp_app_type app_bpf = { .vnic_free = nfp_bpf_vnic_free, .ctrl_msg_rx = nfp_bpf_ctrl_msg_rx, + .ctrl_msg_rx_raw = nfp_bpf_ctrl_msg_rx_raw, .setup_tc = nfp_bpf_setup_tc, .bpf = nfp_ndo_bpf, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index bec935468f90..dbd00982fd2b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -47,6 +47,8 @@ #include "../nfp_asm.h" #include "fw.h" +#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg) + /* For relocation logic use up-most byte of branch instruction as scratch * area. Remember to clear this before sending instructions to HW! */ @@ -148,6 +150,7 @@ enum pkt_vec { * * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) * @queue_select: BPF can set the RX queue ID in packet vector + * @adjust_tail: BPF can simply trunc packet size for adjust tail */ struct nfp_app_bpf { struct nfp_app *app; @@ -193,6 +196,7 @@ struct nfp_app_bpf { bool pseudo_random; bool queue_select; + bool adjust_tail; }; enum nfp_bpf_map_use { @@ -221,6 +225,7 @@ struct nfp_bpf_map { struct nfp_bpf_neutral_map { struct rhash_head l; struct bpf_map *ptr; + u32 map_id; u32 count; }; @@ -501,7 +506,11 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap, int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, void *key, void *next_key); -int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb); +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len); void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, + unsigned int len); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 49b03f7dbf46..1ccd6371a15b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -67,7 +67,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, ASSERT_RTNL(); /* Reuse path - other offloaded program is already tracking this map. */ - record = rhashtable_lookup_fast(&bpf->maps_neutral, &map, + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id, nfp_bpf_maps_neutral_params); if (record) { nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; @@ -89,6 +89,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, } record->ptr = map; + record->map_id = map->id; record->count = 1; err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l, @@ -379,11 +380,23 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) bpf->maps.max_elems - bpf->map_elems_in_use); return -ENOMEM; } - if (offmap->map.key_size > bpf->maps.max_key_sz || - offmap->map.value_size > bpf->maps.max_val_sz || - round_up(offmap->map.key_size, 8) + + + if (round_up(offmap->map.key_size, 8) + round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) { - pr_info("elements don't fit in device constraints\n"); + pr_info("map elements too large: %u, FW max element size (key+value): %u\n", + round_up(offmap->map.key_size, 8) + + round_up(offmap->map.value_size, 8), + bpf->maps.max_elem_sz); + return -ENOMEM; + } + if (offmap->map.key_size > bpf->maps.max_key_sz) { + pr_info("map key size %u, FW max is %u\n", + offmap->map.key_size, bpf->maps.max_key_sz); + return -ENOMEM; + } + if (offmap->map.value_size > bpf->maps.max_val_sz) { + pr_info("map value size %u, FW max is %u\n", + offmap->map.value_size, bpf->maps.max_val_sz); return -ENOMEM; } @@ -453,43 +466,43 @@ nfp_bpf_perf_event_copy(void *dst, const void *src, return 0; } -int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb) +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len) { - struct cmsg_bpf_event *cbe = (void *)skb->data; - u32 pkt_size, data_size; - struct bpf_map *map; + struct cmsg_bpf_event *cbe = (void *)data; + struct nfp_bpf_neutral_map *record; + u32 pkt_size, data_size, map_id; + u64 map_id_full; - if (skb->len < sizeof(struct cmsg_bpf_event)) - goto err_drop; + if (len < sizeof(struct cmsg_bpf_event)) + return -EINVAL; pkt_size = be32_to_cpu(cbe->pkt_size); data_size = be32_to_cpu(cbe->data_size); - map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr); + map_id_full = be64_to_cpu(cbe->map_ptr); + map_id = map_id_full; - if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) - goto err_drop; + if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) + return -EINVAL; if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION) - goto err_drop; + return -EINVAL; rcu_read_lock(); - if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map, - nfp_bpf_maps_neutral_params)) { + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id, + nfp_bpf_maps_neutral_params); + if (!record || map_id_full > U32_MAX) { rcu_read_unlock(); - pr_warn("perf event: dest map pointer %px not recognized, dropping event\n", - map); - goto err_drop; + cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n", + map_id_full, map_id_full); + return -EINVAL; } - bpf_event_output(map, be32_to_cpu(cbe->cpu_id), + bpf_event_output(record->ptr, be32_to_cpu(cbe->cpu_id), &cbe->data[round_up(pkt_size, 4)], data_size, cbe->data, pkt_size, nfp_bpf_perf_event_copy); rcu_read_unlock(); - dev_consume_skb_any(skb); return 0; -err_drop: - dev_kfree_skb_any(skb); - return -EINVAL; } static int diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 49ba0d645d36..a6e9248669e1 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -178,6 +178,13 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, nfp_record_adjust_head(bpf, nfp_prog, meta, reg2); break; + case BPF_FUNC_xdp_adjust_tail: + if (!bpf->adjust_tail) { + pr_vlog(env, "adjust_tail not supported by FW\n"); + return -EOPNOTSUPP; + } + break; + case BPF_FUNC_map_lookup_elem: if (!nfp_bpf_map_call_ok("map_lookup", env, meta, bpf->helpers.map_lookup, reg1) || diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 69d4ae7a61f3..8607d09ab732 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -172,6 +172,8 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc)) return ERR_PTR(-EINVAL); + if (WARN_ON(!apps[id]->ctrl_msg_rx && apps[id]->ctrl_msg_rx_raw)) + return ERR_PTR(-EINVAL); app = kzalloc(sizeof(*app), GFP_KERNEL); if (!app) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index afbc19aa66a8..4e1eb3395648 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -98,6 +98,7 @@ extern const struct nfp_app_type app_abm; * @start: start application logic * @stop: stop application logic * @ctrl_msg_rx: control message handler + * @ctrl_msg_rx_raw: handler for control messages from data queues * @setup_tc: setup TC ndo * @bpf: BPF ndo offload-related calls * @xdp_offload: offload an XDP program @@ -150,6 +151,8 @@ struct nfp_app_type { void (*stop)(struct nfp_app *app); void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb); + void (*ctrl_msg_rx_raw)(struct nfp_app *app, const void *data, + unsigned int len); int (*setup_tc)(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data); @@ -318,6 +321,11 @@ static inline bool nfp_app_ctrl_has_meta(struct nfp_app *app) return app->type->ctrl_has_meta; } +static inline bool nfp_app_ctrl_uses_data_vnics(struct nfp_app *app) +{ + return app && app->type->ctrl_msg_rx_raw; +} + static inline const char *nfp_app_extra_cap(struct nfp_app *app, struct nfp_net *nn) { @@ -381,6 +389,16 @@ static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb) app->type->ctrl_msg_rx(app, skb); } +static inline void +nfp_app_ctrl_rx_raw(struct nfp_app *app, const void *data, unsigned int len) +{ + if (!app || !app->type->ctrl_msg_rx_raw) + return; + + trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, data, len); + app->type->ctrl_msg_rx_raw(app, data, len); +} + static inline int nfp_app_eswitch_mode_get(struct nfp_app *app, u16 *mode) { if (!app->type->eswitch_mode_get) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index cdc4e065f6f5..fad0e62a910c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -93,6 +93,7 @@ enum br_mask { BR_BNE = 0x01, BR_BMI = 0x02, BR_BHS = 0x04, + BR_BCC = 0x05, BR_BLO = 0x05, BR_BGE = 0x08, BR_BLT = 0x09, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 7c1a921d178d..0817c69fc568 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1759,6 +1759,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) } } + if (likely(!meta.portid)) { + netdev = dp->netdev; + } else if (meta.portid == NFP_META_PORT_ID_CTRL) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, + pkt_len); + nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } else { + struct nfp_net *nn; + + nn = netdev_priv(dp->netdev); + netdev = nfp_app_repr_get(nn->app, meta.portid); + if (unlikely(!netdev)) { + nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + skb = build_skb(rxbuf->frag, true_bufsz); if (unlikely(!skb)) { nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); @@ -1774,20 +1797,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); - if (likely(!meta.portid)) { - netdev = dp->netdev; - } else { - struct nfp_net *nn; - - nn = netdev_priv(dp->netdev); - netdev = nfp_app_repr_get(nn->app, meta.portid); - if (unlikely(!netdev)) { - nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb); - continue; - } - nfp_repr_inc_rx_stats(netdev, pkt_len); - } - skb_reserve(skb, pkt_off); skb_put(skb, pkt_len); @@ -3857,6 +3866,9 @@ int nfp_net_init(struct nfp_net *nn) nn->dp.mtu = NFP_NET_DEFAULT_MTU; nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp); + if (nfp_app_ctrl_uses_data_vnics(nn->app)) + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA; + if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) { nfp_net_rss_init(nn); nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?: diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index bb63c115537d..44d3ea75d043 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -127,6 +127,7 @@ #define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ #define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ #define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */ +#define NFP_NET_CFG_CTRL_CMSG_DATA (0x1 << 12) /* RX cmsgs on data Qs */ #define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ #define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */ #define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d50c2f0a655a..f91b0f8ff3a9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -4,22 +4,46 @@ #include <linux/errno.h> #include <linux/jump_label.h> +#include <linux/percpu.h> +#include <linux/rbtree.h> #include <uapi/linux/bpf.h> struct sock; struct sockaddr; struct cgroup; struct sk_buff; +struct bpf_map; +struct bpf_prog; struct bpf_sock_ops_kern; +struct bpf_cgroup_storage; #ifdef CONFIG_CGROUP_BPF extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +DECLARE_PER_CPU(void*, bpf_cgroup_storage); + +struct bpf_cgroup_storage_map; + +struct bpf_storage_buffer { + struct rcu_head rcu; + char data[0]; +}; + +struct bpf_cgroup_storage { + struct bpf_storage_buffer *buf; + struct bpf_cgroup_storage_map *map; + struct bpf_cgroup_storage_key key; + struct list_head list; + struct rb_node node; + struct rcu_head rcu; +}; + struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; + struct bpf_cgroup_storage *storage; }; struct bpf_prog_array; @@ -77,6 +101,26 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum bpf_attach_type type); +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) +{ + struct bpf_storage_buffer *buf; + + if (!storage) + return; + + buf = READ_ONCE(storage->buf); + this_cpu_write(bpf_cgroup_storage, &buf->data[0]); +} + +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog); +void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); +void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, + struct cgroup *cgroup, + enum bpf_attach_type type); +void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); +int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map); +void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -221,6 +265,16 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {} +static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog, + struct bpf_map *map) { return 0; } +static inline void bpf_cgroup_storage_release(struct bpf_prog *prog, + struct bpf_map *map) {} +static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( + struct bpf_prog *prog) { return 0; } +static inline void bpf_cgroup_storage_free( + struct bpf_cgroup_storage *storage) {} + #define cgroup_bpf_enabled (0) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b5ad95cf339..cd8790d2c6ed 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -155,6 +155,7 @@ enum bpf_arg_type { enum bpf_return_type { RET_INTEGER, /* function returns integer */ RET_VOID, /* function doesn't return anything */ + RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ }; @@ -282,6 +283,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ + struct bpf_map *cgroup_storage; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY void *security; @@ -348,9 +350,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, * The 'struct bpf_prog_array *' should only be replaced with xchg() * since other cpus are walking the array of pointers in parallel. */ +struct bpf_prog_array_item { + struct bpf_prog *prog; + struct bpf_cgroup_storage *cgroup_storage; +}; + struct bpf_prog_array { struct rcu_head rcu; - struct bpf_prog *progs[0]; + struct bpf_prog_array_item items[0]; }; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); @@ -371,7 +378,8 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ ({ \ - struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array_item *_item; \ + struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ u32 _ret = 1; \ preempt_disable(); \ @@ -379,10 +387,11 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, _array = rcu_dereference(array); \ if (unlikely(check_non_null && !_array))\ goto _out; \ - _prog = _array->progs; \ - while ((__prog = READ_ONCE(*_prog))) { \ - _ret &= func(__prog, ctx); \ - _prog++; \ + _item = &_array->items[0]; \ + while ((_prog = READ_ONCE(_item->prog))) { \ + bpf_cgroup_storage_set(_item->cgroup_storage); \ + _ret &= func(_prog, ctx); \ + _item++; \ } \ _out: \ rcu_read_unlock(); \ @@ -435,6 +444,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); +int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); +void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); void *bpf_map_area_alloc(size_t size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); @@ -777,6 +788,8 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; +extern const struct bpf_func_proto bpf_get_local_storage_proto; + /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index c5700c2d5549..add08be53b6f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -37,6 +37,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) #ifdef CONFIG_CGROUPS BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) #endif +#ifdef CONFIG_CGROUP_BPF +BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h index 661fd5b4d3e0..08359e2d8b35 100644 --- a/include/net/seg6_local.h +++ b/include/net/seg6_local.h @@ -21,10 +21,12 @@ extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id); +extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb); struct seg6_bpf_srh_state { - bool valid; + struct ipv6_sr_hdr *srh; u16 hdrlen; + bool valid; }; DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 870113916cac..dd5758dc35d3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -75,6 +75,11 @@ struct bpf_lpm_trie_key { __u8 data[0]; /* Arbitrary size */ }; +struct bpf_cgroup_storage_key { + __u64 cgroup_inode_id; /* cgroup inode id */ + __u32 attach_type; /* program attach type */ +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -120,6 +125,7 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, + BPF_MAP_TYPE_CGROUP_STORAGE, }; enum bpf_prog_type { @@ -1371,6 +1377,20 @@ union bpf_attr { * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. * + * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * Return + * A 8-byte long non-decreasing number. + * + * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * Return + * A 8-byte long non-decreasing number. + * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return * The owner UID of the socket associated to *skb*. If the socket @@ -2075,6 +2095,24 @@ union bpf_attr { * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. + * + * void* get_local_storage(void *map, u64 flags) + * Description + * Get the pointer to the local storage area. + * The type and the size of the local storage is defined + * by the *map* argument. + * The *flags* meaning is specific for each map type, + * and has to be 0 for cgroup local storage. + * + * Depending on the bpf program type, a local storage area + * can be shared between multiple instances of the bpf program, + * running simultaneously. + * + * A user should care about the synchronization by himself. + * For example, by using the BPF_STX_XADD instruction to alter + * the shared data. + * Return + * Pointer to the local storage area. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2157,7 +2195,8 @@ union bpf_attr { FN(rc_repeat), \ FN(rc_keydown), \ FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), + FN(get_current_cgroup_id), \ + FN(get_local_storage), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index f27f5496d6fe..e8906cbad81f 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -3,6 +3,7 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_SYSCALL) += btf.o ifeq ($(CONFIG_NET),y) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index badabb0b435c..6a7d931bbc55 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -34,6 +34,8 @@ void cgroup_bpf_put(struct cgroup *cgrp) list_for_each_entry_safe(pl, tmp, progs, node) { list_del(&pl->node); bpf_prog_put(pl->prog); + bpf_cgroup_storage_unlink(pl->storage); + bpf_cgroup_storage_free(pl->storage); kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key); } @@ -115,15 +117,18 @@ static int compute_effective_progs(struct cgroup *cgrp, cnt = 0; p = cgrp; do { - if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) - list_for_each_entry(pl, - &p->bpf.progs[type], node) { - if (!pl->prog) - continue; - progs->progs[cnt++] = pl->prog; - } - p = cgroup_parent(p); - } while (p); + if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + continue; + + list_for_each_entry(pl, &p->bpf.progs[type], node) { + if (!pl->prog) + continue; + + progs->items[cnt].prog = pl->prog; + progs->items[cnt].cgroup_storage = pl->storage; + cnt++; + } + } while ((p = cgroup_parent(p))); rcu_assign_pointer(*array, progs); return 0; @@ -172,6 +177,45 @@ cleanup: return -ENOMEM; } +static int update_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type) +{ + struct cgroup_subsys_state *css; + int err; + + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; + } + + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + return err; +} + #define BPF_CGROUP_MAX_PROGS 64 /** @@ -188,7 +232,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, { struct list_head *progs = &cgrp->bpf.progs[type]; struct bpf_prog *old_prog = NULL; - struct cgroup_subsys_state *css; + struct bpf_cgroup_storage *storage, *old_storage = NULL; struct bpf_prog_list *pl; bool pl_was_allocated; int err; @@ -210,72 +254,71 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; + storage = bpf_cgroup_storage_alloc(prog); + if (IS_ERR(storage)) + return -ENOMEM; + if (flags & BPF_F_ALLOW_MULTI) { - list_for_each_entry(pl, progs, node) - if (pl->prog == prog) + list_for_each_entry(pl, progs, node) { + if (pl->prog == prog) { /* disallow attaching the same prog twice */ + bpf_cgroup_storage_free(storage); return -EINVAL; + } + } pl = kmalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) + if (!pl) { + bpf_cgroup_storage_free(storage); return -ENOMEM; + } + pl_was_allocated = true; pl->prog = prog; + pl->storage = storage; list_add_tail(&pl->node, progs); } else { if (list_empty(progs)) { pl = kmalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) + if (!pl) { + bpf_cgroup_storage_free(storage); return -ENOMEM; + } pl_was_allocated = true; list_add_tail(&pl->node, progs); } else { pl = list_first_entry(progs, typeof(*pl), node); old_prog = pl->prog; + old_storage = pl->storage; + bpf_cgroup_storage_unlink(old_storage); pl_was_allocated = false; } pl->prog = prog; + pl->storage = storage; } cgrp->bpf.flags[type] = flags; - /* allocate and recompute effective prog arrays */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - err = compute_effective_progs(desc, type, &desc->bpf.inactive); - if (err) - goto cleanup; - } - - /* all allocations were successful. Activate all prog arrays */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - activate_effective_progs(desc, type, desc->bpf.inactive); - desc->bpf.inactive = NULL; - } + err = update_effective_progs(cgrp, type); + if (err) + goto cleanup; static_branch_inc(&cgroup_bpf_enabled_key); + if (old_storage) + bpf_cgroup_storage_free(old_storage); if (old_prog) { bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } + bpf_cgroup_storage_link(storage, cgrp, type); return 0; cleanup: - /* oom while computing effective. Free all computed effective arrays - * since they were not activated - */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - bpf_prog_array_free(desc->bpf.inactive); - desc->bpf.inactive = NULL; - } - /* and cleanup the prog list */ pl->prog = old_prog; + bpf_cgroup_storage_free(pl->storage); + pl->storage = old_storage; + bpf_cgroup_storage_link(old_storage, cgrp, type); if (pl_was_allocated) { list_del(&pl->node); kfree(pl); @@ -298,7 +341,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct list_head *progs = &cgrp->bpf.progs[type]; u32 flags = cgrp->bpf.flags[type]; struct bpf_prog *old_prog = NULL; - struct cgroup_subsys_state *css; struct bpf_prog_list *pl; int err; @@ -337,25 +379,14 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, pl->prog = NULL; } - /* allocate and recompute effective prog arrays */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - err = compute_effective_progs(desc, type, &desc->bpf.inactive); - if (err) - goto cleanup; - } - - /* all allocations were successful. Activate all prog arrays */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - activate_effective_progs(desc, type, desc->bpf.inactive); - desc->bpf.inactive = NULL; - } + err = update_effective_progs(cgrp, type); + if (err) + goto cleanup; /* now can actually delete it from this cgroup list */ list_del(&pl->node); + bpf_cgroup_storage_unlink(pl->storage); + bpf_cgroup_storage_free(pl->storage); kfree(pl); if (list_empty(progs)) /* last program was detached, reset flags to zero */ @@ -366,16 +397,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, return 0; cleanup: - /* oom while computing effective. Free all computed effective arrays - * since they were not activated - */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - bpf_prog_array_free(desc->bpf.inactive); - desc->bpf.inactive = NULL; - } - /* and restore back old_prog */ pl->prog = old_prog; return err; @@ -654,6 +675,8 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_delete_elem_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; case BPF_FUNC_trace_printk: if (capable(CAP_SYS_ADMIN)) return bpf_get_trace_printk_proto(); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 253aa8e79c7b..4d09e610777f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1542,7 +1542,8 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) { if (prog_cnt) return kzalloc(sizeof(struct bpf_prog_array) + - sizeof(struct bpf_prog *) * (prog_cnt + 1), + sizeof(struct bpf_prog_array_item) * + (prog_cnt + 1), flags); return &empty_prog_array.hdr; @@ -1556,43 +1557,45 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) kfree_rcu(progs, rcu); } -int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) +int bpf_prog_array_length(struct bpf_prog_array __rcu *array) { - struct bpf_prog **prog; + struct bpf_prog_array_item *item; u32 cnt = 0; rcu_read_lock(); - prog = rcu_dereference(progs)->progs; - for (; *prog; prog++) - if (*prog != &dummy_bpf_prog.prog) + item = rcu_dereference(array)->items; + for (; item->prog; item++) + if (item->prog != &dummy_bpf_prog.prog) cnt++; rcu_read_unlock(); return cnt; } -static bool bpf_prog_array_copy_core(struct bpf_prog **prog, + +static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array, u32 *prog_ids, u32 request_cnt) { + struct bpf_prog_array_item *item; int i = 0; - for (; *prog; prog++) { - if (*prog == &dummy_bpf_prog.prog) + item = rcu_dereference(array)->items; + for (; item->prog; item++) { + if (item->prog == &dummy_bpf_prog.prog) continue; - prog_ids[i] = (*prog)->aux->id; + prog_ids[i] = item->prog->aux->id; if (++i == request_cnt) { - prog++; + item++; break; } } - return !!(*prog); + return !!(item->prog); } -int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, +int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array, __u32 __user *prog_ids, u32 cnt) { - struct bpf_prog **prog; unsigned long err = 0; bool nospc; u32 *ids; @@ -1611,8 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, if (!ids) return -ENOMEM; rcu_read_lock(); - prog = rcu_dereference(progs)->progs; - nospc = bpf_prog_array_copy_core(prog, ids, cnt); + nospc = bpf_prog_array_copy_core(array, ids, cnt); rcu_read_unlock(); err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); kfree(ids); @@ -1623,14 +1625,14 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, return 0; } -void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array, struct bpf_prog *old_prog) { - struct bpf_prog **prog = progs->progs; + struct bpf_prog_array_item *item = array->items; - for (; *prog; prog++) - if (*prog == old_prog) { - WRITE_ONCE(*prog, &dummy_bpf_prog.prog); + for (; item->prog; item++) + if (item->prog == old_prog) { + WRITE_ONCE(item->prog, &dummy_bpf_prog.prog); break; } } @@ -1641,7 +1643,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, struct bpf_prog_array **new_array) { int new_prog_cnt, carry_prog_cnt = 0; - struct bpf_prog **existing_prog; + struct bpf_prog_array_item *existing; struct bpf_prog_array *array; bool found_exclude = false; int new_prog_idx = 0; @@ -1650,15 +1652,15 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, * the new array. */ if (old_array) { - existing_prog = old_array->progs; - for (; *existing_prog; existing_prog++) { - if (*existing_prog == exclude_prog) { + existing = old_array->items; + for (; existing->prog; existing++) { + if (existing->prog == exclude_prog) { found_exclude = true; continue; } - if (*existing_prog != &dummy_bpf_prog.prog) + if (existing->prog != &dummy_bpf_prog.prog) carry_prog_cnt++; - if (*existing_prog == include_prog) + if (existing->prog == include_prog) return -EEXIST; } } @@ -1684,15 +1686,17 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, /* Fill in the new prog array */ if (carry_prog_cnt) { - existing_prog = old_array->progs; - for (; *existing_prog; existing_prog++) - if (*existing_prog != exclude_prog && - *existing_prog != &dummy_bpf_prog.prog) - array->progs[new_prog_idx++] = *existing_prog; + existing = old_array->items; + for (; existing->prog; existing++) + if (existing->prog != exclude_prog && + existing->prog != &dummy_bpf_prog.prog) { + array->items[new_prog_idx++].prog = + existing->prog; + } } if (include_prog) - array->progs[new_prog_idx++] = include_prog; - array->progs[new_prog_idx] = NULL; + array->items[new_prog_idx++].prog = include_prog; + array->items[new_prog_idx].prog = NULL; *new_array = array; return 0; } @@ -1701,7 +1705,6 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, u32 *prog_ids, u32 request_cnt, u32 *prog_cnt) { - struct bpf_prog **prog; u32 cnt = 0; if (array) @@ -1714,8 +1717,7 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, return 0; /* this function is called under trace/bpf_trace.c: bpf_event_mutex */ - prog = rcu_dereference_check(array, 1)->progs; - return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC + return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC : 0; } @@ -1793,6 +1795,7 @@ const struct bpf_func_proto bpf_get_current_comm_proto __weak; const struct bpf_func_proto bpf_sock_map_update_proto __weak; const struct bpf_func_proto bpf_sock_hash_update_proto __weak; const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; +const struct bpf_func_proto bpf_get_local_storage_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 73065e2d23c2..1991466b8327 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -193,4 +193,24 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { .gpl_only = false, .ret_type = RET_INTEGER, }; + +DECLARE_PER_CPU(void*, bpf_cgroup_storage); + +BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) +{ + /* map and flags arguments are not used now, + * but provide an ability to extend the API + * for other types of local storages. + * verifier checks that their values are correct. + */ + return (unsigned long) this_cpu_read(bpf_cgroup_storage); +} + +const struct bpf_func_proto bpf_get_local_storage_proto = { + .func = bpf_get_local_storage, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; #endif diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c new file mode 100644 index 000000000000..fc4e37f68f2a --- /dev/null +++ b/kernel/bpf/local_storage.c @@ -0,0 +1,378 @@ +//SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf-cgroup.h> +#include <linux/bpf.h> +#include <linux/bug.h> +#include <linux/filter.h> +#include <linux/mm.h> +#include <linux/rbtree.h> +#include <linux/slab.h> + +DEFINE_PER_CPU(void*, bpf_cgroup_storage); + +#ifdef CONFIG_CGROUP_BPF + +#define LOCAL_STORAGE_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + +struct bpf_cgroup_storage_map { + struct bpf_map map; + + spinlock_t lock; + struct bpf_prog *prog; + struct rb_root root; + struct list_head list; +}; + +static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map) +{ + return container_of(map, struct bpf_cgroup_storage_map, map); +} + +static int bpf_cgroup_storage_key_cmp( + const struct bpf_cgroup_storage_key *key1, + const struct bpf_cgroup_storage_key *key2) +{ + if (key1->cgroup_inode_id < key2->cgroup_inode_id) + return -1; + else if (key1->cgroup_inode_id > key2->cgroup_inode_id) + return 1; + else if (key1->attach_type < key2->attach_type) + return -1; + else if (key1->attach_type > key2->attach_type) + return 1; + return 0; +} + +static struct bpf_cgroup_storage *cgroup_storage_lookup( + struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key, + bool locked) +{ + struct rb_root *root = &map->root; + struct rb_node *node; + + if (!locked) + spin_lock_bh(&map->lock); + + node = root->rb_node; + while (node) { + struct bpf_cgroup_storage *storage; + + storage = container_of(node, struct bpf_cgroup_storage, node); + + switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) { + case -1: + node = node->rb_left; + break; + case 1: + node = node->rb_right; + break; + default: + if (!locked) + spin_unlock_bh(&map->lock); + return storage; + } + } + + if (!locked) + spin_unlock_bh(&map->lock); + + return NULL; +} + +static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map, + struct bpf_cgroup_storage *storage) +{ + struct rb_root *root = &map->root; + struct rb_node **new = &(root->rb_node), *parent = NULL; + + while (*new) { + struct bpf_cgroup_storage *this; + + this = container_of(*new, struct bpf_cgroup_storage, node); + + parent = *new; + switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) { + case -1: + new = &((*new)->rb_left); + break; + case 1: + new = &((*new)->rb_right); + break; + default: + return -EEXIST; + } + } + + rb_link_node(&storage->node, parent, new); + rb_insert_color(&storage->node, root); + + return 0; +} + +static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage *storage; + + storage = cgroup_storage_lookup(map, key, false); + if (!storage) + return NULL; + + return &READ_ONCE(storage->buf)->data[0]; +} + +static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, + void *value, u64 flags) +{ + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage *storage; + struct bpf_storage_buffer *new; + + if (flags & BPF_NOEXIST) + return -EINVAL; + + storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, + key, false); + if (!storage) + return -ENOENT; + + new = kmalloc_node(sizeof(struct bpf_storage_buffer) + + map->value_size, __GFP_ZERO | GFP_USER, + map->numa_node); + if (!new) + return -ENOMEM; + + memcpy(&new->data[0], value, map->value_size); + + new = xchg(&storage->buf, new); + kfree_rcu(new, rcu); + + return 0; +} + +static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key, + void *_next_key) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage_key *next = _next_key; + struct bpf_cgroup_storage *storage; + + spin_lock_bh(&map->lock); + + if (list_empty(&map->list)) + goto enoent; + + if (key) { + storage = cgroup_storage_lookup(map, key, true); + if (!storage) + goto enoent; + + storage = list_next_entry(storage, list); + if (!storage) + goto enoent; + } else { + storage = list_first_entry(&map->list, + struct bpf_cgroup_storage, list); + } + + spin_unlock_bh(&map->lock); + next->attach_type = storage->key.attach_type; + next->cgroup_inode_id = storage->key.cgroup_inode_id; + return 0; + +enoent: + spin_unlock_bh(&map->lock); + return -ENOENT; +} + +static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) +{ + int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_cgroup_storage_map *map; + + if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) + return ERR_PTR(-EINVAL); + + if (attr->value_size > PAGE_SIZE) + return ERR_PTR(-E2BIG); + + if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK) + /* reserved bits should not be used */ + return ERR_PTR(-EINVAL); + + if (attr->max_entries) + /* max_entries is not used and enforced to be 0 */ + return ERR_PTR(-EINVAL); + + map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), + __GFP_ZERO | GFP_USER, numa_node); + if (!map) + return ERR_PTR(-ENOMEM); + + map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map), + PAGE_SIZE) >> PAGE_SHIFT; + + /* copy mandatory map attributes */ + bpf_map_init_from_attr(&map->map, attr); + + spin_lock_init(&map->lock); + map->root = RB_ROOT; + INIT_LIST_HEAD(&map->list); + + return &map->map; +} + +static void cgroup_storage_map_free(struct bpf_map *_map) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + + WARN_ON(!RB_EMPTY_ROOT(&map->root)); + WARN_ON(!list_empty(&map->list)); + + kfree(map); +} + +static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) +{ + return -EINVAL; +} + +const struct bpf_map_ops cgroup_storage_map_ops = { + .map_alloc = cgroup_storage_map_alloc, + .map_free = cgroup_storage_map_free, + .map_get_next_key = cgroup_storage_get_next_key, + .map_lookup_elem = cgroup_storage_lookup_elem, + .map_update_elem = cgroup_storage_update_elem, + .map_delete_elem = cgroup_storage_delete_elem, +}; + +int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + int ret = -EBUSY; + + spin_lock_bh(&map->lock); + + if (map->prog && map->prog != prog) + goto unlock; + if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map) + goto unlock; + + map->prog = prog; + prog->aux->cgroup_storage = _map; + ret = 0; +unlock: + spin_unlock_bh(&map->lock); + + return ret; +} + +void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + + spin_lock_bh(&map->lock); + if (map->prog == prog) { + WARN_ON(prog->aux->cgroup_storage != _map); + map->prog = NULL; + prog->aux->cgroup_storage = NULL; + } + spin_unlock_bh(&map->lock); +} + +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog) +{ + struct bpf_cgroup_storage *storage; + struct bpf_map *map; + u32 pages; + + map = prog->aux->cgroup_storage; + if (!map) + return NULL; + + pages = round_up(sizeof(struct bpf_cgroup_storage) + + sizeof(struct bpf_storage_buffer) + + map->value_size, PAGE_SIZE) >> PAGE_SHIFT; + if (bpf_map_charge_memlock(map, pages)) + return ERR_PTR(-EPERM); + + storage = kmalloc_node(sizeof(struct bpf_cgroup_storage), + __GFP_ZERO | GFP_USER, map->numa_node); + if (!storage) { + bpf_map_uncharge_memlock(map, pages); + return ERR_PTR(-ENOMEM); + } + + storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) + + map->value_size, __GFP_ZERO | GFP_USER, + map->numa_node); + if (!storage->buf) { + bpf_map_uncharge_memlock(map, pages); + kfree(storage); + return ERR_PTR(-ENOMEM); + } + + storage->map = (struct bpf_cgroup_storage_map *)map; + + return storage; +} + +void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage) +{ + u32 pages; + struct bpf_map *map; + + if (!storage) + return; + + map = &storage->map->map; + pages = round_up(sizeof(struct bpf_cgroup_storage) + + sizeof(struct bpf_storage_buffer) + + map->value_size, PAGE_SIZE) >> PAGE_SHIFT; + bpf_map_uncharge_memlock(map, pages); + + kfree_rcu(storage->buf, rcu); + kfree_rcu(storage, rcu); +} + +void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, + struct cgroup *cgroup, + enum bpf_attach_type type) +{ + struct bpf_cgroup_storage_map *map; + + if (!storage) + return; + + storage->key.attach_type = type; + storage->key.cgroup_inode_id = cgroup->kn->id.id; + + map = storage->map; + + spin_lock_bh(&map->lock); + WARN_ON(cgroup_storage_insert(map, storage)); + list_add(&storage->list, &map->list); + spin_unlock_bh(&map->lock); +} + +void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage) +{ + struct bpf_cgroup_storage_map *map; + struct rb_root *root; + + if (!storage) + return; + + map = storage->map; + + spin_lock_bh(&map->lock); + root = &map->root; + rb_erase(&storage->node, root); + + list_del(&storage->list); + spin_unlock_bh(&map->lock); +} + +#endif diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 1da574612bea..3bfbf4464416 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -23,7 +23,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) * is a runtime binding. Doing static check alone * in the verifier is not enough. */ - if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { + if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY || + inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE) { fdput(f); return ERR_PTR(-ENOTSUPP); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a31a1ba0f8ea..5af4e9e2722d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -181,32 +181,60 @@ int bpf_map_precharge_memlock(u32 pages) return 0; } -static int bpf_map_charge_memlock(struct bpf_map *map) +static int bpf_charge_memlock(struct user_struct *user, u32 pages) { - struct user_struct *user = get_current_user(); - unsigned long memlock_limit; + unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) { + atomic_long_sub(pages, &user->locked_vm); + return -EPERM; + } + return 0; +} - atomic_long_add(map->pages, &user->locked_vm); +static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) +{ + atomic_long_sub(pages, &user->locked_vm); +} + +static int bpf_map_init_memlock(struct bpf_map *map) +{ + struct user_struct *user = get_current_user(); + int ret; - if (atomic_long_read(&user->locked_vm) > memlock_limit) { - atomic_long_sub(map->pages, &user->locked_vm); + ret = bpf_charge_memlock(user, map->pages); + if (ret) { free_uid(user); - return -EPERM; + return ret; } map->user = user; - return 0; + return ret; } -static void bpf_map_uncharge_memlock(struct bpf_map *map) +static void bpf_map_release_memlock(struct bpf_map *map) { struct user_struct *user = map->user; - - atomic_long_sub(map->pages, &user->locked_vm); + bpf_uncharge_memlock(user, map->pages); free_uid(user); } +int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) +{ + int ret; + + ret = bpf_charge_memlock(map->user, pages); + if (ret) + return ret; + map->pages += pages; + return ret; +} + +void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) +{ + bpf_uncharge_memlock(map->user, pages); + map->pages -= pages; +} + static int bpf_map_alloc_id(struct bpf_map *map) { int id; @@ -256,7 +284,7 @@ static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); - bpf_map_uncharge_memlock(map); + bpf_map_release_memlock(map); security_bpf_map_free(map); /* implementation dependent freeing */ map->ops->map_free(map); @@ -492,7 +520,7 @@ static int map_create(union bpf_attr *attr) if (err) goto free_map_nouncharge; - err = bpf_map_charge_memlock(map); + err = bpf_map_init_memlock(map); if (err) goto free_map_sec; @@ -515,7 +543,7 @@ static int map_create(union bpf_attr *attr) return err; free_map: - bpf_map_uncharge_memlock(map); + bpf_map_release_memlock(map); free_map_sec: security_bpf_map_free(map); free_map_nouncharge: @@ -929,6 +957,9 @@ static void free_used_maps(struct bpf_prog_aux *aux) { int i; + if (aux->cgroup_storage) + bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage); + for (i = 0; i < aux->used_map_cnt; i++) bpf_map_put(aux->used_maps[i]); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 25e47c195874..587468a9c37d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2127,6 +2127,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; + case BPF_MAP_TYPE_CGROUP_STORAGE: + if (func_id != BPF_FUNC_get_local_storage) + goto error; + break; /* devmap returns a pointer to a live net_device ifindex that we cannot * allow to be modified from bpf side. So do not allow lookup elements * for now. @@ -2209,6 +2213,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; + case BPF_FUNC_get_local_storage: + if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) + goto error; + break; default: break; } @@ -2533,6 +2541,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } regs = cur_regs(env); + + /* check that flags argument in get_local_storage(map, flags) is 0, + * this is required because get_local_storage() can't return an error. + */ + if (func_id == BPF_FUNC_get_local_storage && + !register_is_null(®s[BPF_REG_2])) { + verbose(env, "get_local_storage() doesn't support non-zero flags\n"); + return -EINVAL; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@ -2545,8 +2563,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn mark_reg_unknown(env, regs, BPF_REG_0); } else if (fn->ret_type == RET_VOID) { regs[BPF_REG_0].type = NOT_INIT; - } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; + } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || + fn->ret_type == RET_PTR_TO_MAP_VALUE) { + if (fn->ret_type == RET_PTR_TO_MAP_VALUE) + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; + else + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].off = 0; @@ -3238,8 +3260,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } } - /* check dest operand */ - err = check_reg_arg(env, insn->dst_reg, DST_OP); + /* check dest operand, mark as required later */ + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); if (err) return err; @@ -3265,6 +3287,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) /* case: R = imm * remember the value we stored into this reg */ + /* clear any state __mark_reg_known doesn't set */ + mark_reg_unknown(env, regs, insn->dst_reg); regs[insn->dst_reg].type = SCALAR_VALUE; if (BPF_CLASS(insn->code) == BPF_ALU64) { __mark_reg_known(regs + insn->dst_reg, @@ -5152,6 +5176,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) } env->used_maps[env->used_map_cnt++] = map; + if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE && + bpf_cgroup_storage_assign(env->prog, map)) { + verbose(env, + "only one cgroup storage is allowed\n"); + fdput(f); + return -EBUSY; + } + fdput(f); next_insn: insn++; @@ -5178,6 +5210,10 @@ static void release_maps(struct bpf_verifier_env *env) { int i; + if (env->prog->aux->cgroup_storage) + bpf_cgroup_storage_release(env->prog, + env->prog->aux->cgroup_storage); + for (i = 0; i < env->used_map_cnt; i++) bpf_map_put(env->used_maps[i]); } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 22a78eedf4b1..f4078830ea50 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -11,12 +11,14 @@ #include <linux/filter.h> #include <linux/sched/signal.h> -static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) +static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx, + struct bpf_cgroup_storage *storage) { u32 ret; preempt_disable(); rcu_read_lock(); + bpf_cgroup_storage_set(storage); ret = BPF_PROG_RUN(prog, ctx); rcu_read_unlock(); preempt_enable(); @@ -26,14 +28,19 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) { + struct bpf_cgroup_storage *storage = NULL; u64 time_start, time_spent = 0; u32 ret = 0, i; + storage = bpf_cgroup_storage_alloc(prog); + if (IS_ERR(storage)) + return PTR_ERR(storage); + if (!repeat) repeat = 1; time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { - ret = bpf_test_run_one(prog, ctx); + ret = bpf_test_run_one(prog, ctx, storage); if (need_resched()) { if (signal_pending(current)) break; @@ -46,6 +53,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) do_div(time_spent, repeat); *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + bpf_cgroup_storage_free(storage); + return ret; } diff --git a/net/core/filter.c b/net/core/filter.c index 7509bb7f0694..587bbfbd7db3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3814,6 +3814,30 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx) +{ + return sock_gen_cookie(ctx->sk); +} + +static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = { + .func = bpf_get_socket_cookie_sock_addr, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) +{ + return sock_gen_cookie(ctx->sk); +} + +static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = { + .func = bpf_get_socket_cookie_sock_ops, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) { struct sock *sk = sk_to_full_sk(skb->sk); @@ -4544,26 +4568,28 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset, { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); + struct ipv6_sr_hdr *srh = srh_state->srh; void *srh_tlvs, *srh_end, *ptr; - struct ipv6_sr_hdr *srh; int srhoff = 0; - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + if (srh == NULL) return -EINVAL; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4)); srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen); ptr = skb->data + offset; if (ptr >= srh_tlvs && ptr + len <= srh_end) - srh_state->valid = 0; + srh_state->valid = false; else if (ptr < (void *)&srh->flags || ptr + len > (void *)&srh->segments) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return -EINVAL; + srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); memcpy(skb->data + offset, from, len); return 0; @@ -4579,52 +4605,78 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = { .arg4_type = ARG_CONST_SIZE }; -BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, - u32, action, void *, param, u32, param_len) +static void bpf_update_srh_state(struct sk_buff *skb) { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); - struct ipv6_sr_hdr *srh; int srhoff = 0; - int err; - - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) - return -EINVAL; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - if (!srh_state->valid) { - if (unlikely((srh_state->hdrlen & 7) != 0)) - return -EBADMSG; - - srh->hdrlen = (u8)(srh_state->hdrlen >> 3); - if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))) - return -EBADMSG; - - srh_state->valid = 1; + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) { + srh_state->srh = NULL; + } else { + srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); + srh_state->hdrlen = srh_state->srh->hdrlen << 3; + srh_state->valid = true; } +} + +BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, + u32, action, void *, param, u32, param_len) +{ + struct seg6_bpf_srh_state *srh_state = + this_cpu_ptr(&seg6_bpf_srh_states); + int hdroff = 0; + int err; switch (action) { case SEG6_LOCAL_ACTION_END_X: + if (!seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; if (param_len != sizeof(struct in6_addr)) return -EINVAL; return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0); case SEG6_LOCAL_ACTION_END_T: + if (!seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; if (param_len != sizeof(int)) return -EINVAL; return seg6_lookup_nexthop(skb, NULL, *(int *)param); + case SEG6_LOCAL_ACTION_END_DT6: + if (!seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; + if (param_len != sizeof(int)) + return -EINVAL; + + if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0) + return -EBADMSG; + if (!pskb_pull(skb, hdroff)) + return -EBADMSG; + + skb_postpull_rcsum(skb, skb_network_header(skb), hdroff); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + bpf_compute_data_pointers(skb); + bpf_update_srh_state(skb); + return seg6_lookup_nexthop(skb, NULL, *(int *)param); case SEG6_LOCAL_ACTION_END_B6: + if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE, param, param_len); if (!err) - srh_state->hdrlen = - ((struct ipv6_sr_hdr *)param)->hdrlen << 3; + bpf_update_srh_state(skb); + return err; case SEG6_LOCAL_ACTION_END_B6_ENCAP: + if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6, param, param_len); if (!err) - srh_state->hdrlen = - ((struct ipv6_sr_hdr *)param)->hdrlen << 3; + bpf_update_srh_state(skb); + return err; default: return -EINVAL; @@ -4646,15 +4698,14 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); + struct ipv6_sr_hdr *srh = srh_state->srh; void *srh_end, *srh_tlvs, *ptr; - struct ipv6_sr_hdr *srh; struct ipv6hdr *hdr; int srhoff = 0; int ret; - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + if (unlikely(srh == NULL)) return -EINVAL; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) + ((srh->first_segment + 1) << 4)); @@ -4684,8 +4735,11 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, hdr = (struct ipv6hdr *)skb->data; hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return -EINVAL; + srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_state->hdrlen += len; - srh_state->valid = 0; + srh_state->valid = false; return 0; } @@ -4768,6 +4822,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) */ case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4790,6 +4846,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) default: return NULL; } + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_sock_addr_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4813,6 +4873,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } static const struct bpf_func_proto * +cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; + default: + return sk_filter_func_proto(func_id, prog); + } +} + +static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { @@ -4932,6 +5003,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_map_update_proto; case BPF_FUNC_sock_hash_update: return &bpf_sock_hash_update_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_sock_ops_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4951,6 +5026,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_msg_cork_bytes_proto; case BPF_FUNC_msg_pull_data: return &bpf_msg_pull_data_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4978,6 +5055,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_map_proto; case BPF_FUNC_sk_redirect_hash: return &bpf_sk_redirect_hash_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -6782,7 +6861,7 @@ const struct bpf_prog_ops xdp_prog_ops = { }; const struct bpf_verifier_ops cg_skb_verifier_ops = { - .get_func_proto = sk_filter_func_proto, + .get_func_proto = cg_skb_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index e45098593dc0..3e85437f7106 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -50,10 +50,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, * mixing with BH RCU lock doesn't work. */ preempt_disable(); - rcu_read_lock(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); - rcu_read_unlock(); switch (ret) { case BPF_OK: diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index e1025b493a18..60325dbfe88b 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -459,36 +459,57 @@ drop: DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); +bool seg6_bpf_has_valid_srh(struct sk_buff *skb) +{ + struct seg6_bpf_srh_state *srh_state = + this_cpu_ptr(&seg6_bpf_srh_states); + struct ipv6_sr_hdr *srh = srh_state->srh; + + if (unlikely(srh == NULL)) + return false; + + if (unlikely(!srh_state->valid)) { + if ((srh_state->hdrlen & 7) != 0) + return false; + + srh->hdrlen = (u8)(srh_state->hdrlen >> 3); + if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)) + return false; + + srh_state->valid = true; + } + + return true; +} + static int input_action_end_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); - struct seg6_bpf_srh_state local_srh_state; struct ipv6_sr_hdr *srh; - int srhoff = 0; int ret; srh = get_and_validate_srh(skb); - if (!srh) - goto drop; + if (!srh) { + kfree_skb(skb); + return -EINVAL; + } advance_nextseg(srh, &ipv6_hdr(skb)->daddr); /* preempt_disable is needed to protect the per-CPU buffer srh_state, * which is also accessed by the bpf_lwt_seg6_* helpers */ preempt_disable(); + srh_state->srh = srh; srh_state->hdrlen = srh->hdrlen << 3; - srh_state->valid = 1; + srh_state->valid = true; rcu_read_lock(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb); rcu_read_unlock(); - local_srh_state = *srh_state; - preempt_enable(); - switch (ret) { case BPF_OK: case BPF_REDIRECT: @@ -500,24 +521,17 @@ static int input_action_end_bpf(struct sk_buff *skb, goto drop; } - if (unlikely((local_srh_state.hdrlen & 7) != 0)) - goto drop; - - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) - goto drop; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3); - - if (!local_srh_state.valid && - unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))) + if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) goto drop; + preempt_enable(); if (ret != BPF_REDIRECT) seg6_lookup_nexthop(skb, NULL, 0); return dst_input(skb); drop: + preempt_enable(); kfree_skb(skb); return -EINVAL; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 9ea2f7b64869..f88d5683d6ee 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -105,8 +105,8 @@ xdp_rxq_info-objs := xdp_rxq_info_user.o syscall_tp-objs := bpf_load.o syscall_tp_user.o cpustat-objs := bpf_load.o cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o -xdpsock-objs := bpf_load.o xdpsock_user.o -xdp_fwd-objs := bpf_load.o xdp_fwd_user.o +xdpsock-objs := xdpsock_user.o +xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) @@ -195,6 +195,8 @@ HOSTLOADLIBES_xdpsock += -pthread # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang LLC ?= llc CLANG ?= clang +LLVM_OBJCOPY ?= llvm-objcopy +BTF_PAHOLE ?= pahole # Detect that we're cross compiling and use the cross compiler ifdef CROSS_COMPILE @@ -202,6 +204,16 @@ HOSTCC = $(CROSS_COMPILE)gcc CLANG_ARCH_ARGS = -target $(ARCH) endif +BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) +BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) +BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') + +ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),) + EXTRA_CFLAGS += -g + LLC_FLAGS += -mattr=dwarfris + DWARF2BTF = y +endif + # Trick to allow make to be run from this directory all: $(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR) @@ -260,4 +272,7 @@ $(obj)/%.o: $(src)/%.c -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ - -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ + -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index b453e6a161be..180f9d813bca 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -8,7 +8,8 @@ * information. The number of invocations of the program, which maps * to the number of packets received, is stored to key 0. Key 1 is * incremented on each iteration by the number of bytes stored in - * the skb. + * the skb. The program also stores the number of received bytes + * in the cgroup storage. * * - Attaches the new program to a cgroup using BPF_PROG_ATTACH * @@ -21,12 +22,15 @@ #include <stdio.h> #include <stdlib.h> #include <assert.h> +#include <sys/resource.h> +#include <sys/time.h> #include <unistd.h> #include <linux/bpf.h> #include <bpf/bpf.h> #include "bpf_insn.h" +#include "bpf_rlimit.h" #include "cgroup_helpers.h" #define FOO "/foo" @@ -205,6 +209,8 @@ static int map_fd = -1; static int prog_load_cnt(int verdict, int val) { + int cgroup_storage_fd; + if (map_fd < 0) map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); if (map_fd < 0) { @@ -212,6 +218,13 @@ static int prog_load_cnt(int verdict, int val) return -1; } + cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + if (cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + struct bpf_insn prog[] = { BPF_MOV32_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ @@ -222,6 +235,11 @@ static int prog_load_cnt(int verdict, int val) BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_MOV64_IMM(BPF_REG_1, val), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_EXIT_INSN(), }; @@ -237,6 +255,7 @@ static int prog_load_cnt(int verdict, int val) printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); return 0; } + close(cgroup_storage_fd); return ret; } diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index a87a2048ed32..f88e1d7093d6 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -24,8 +24,7 @@ #include <fcntl.h> #include <libgen.h> -#include "bpf_load.h" -#include "bpf_util.h" +#include "bpf/libbpf.h" #include <bpf/bpf.h> @@ -63,9 +62,15 @@ static void usage(const char *prog) int main(int argc, char **argv) { + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + const char *prog_name = "xdp_fwd"; + struct bpf_program *prog; char filename[PATH_MAX]; + struct bpf_object *obj; int opt, i, idx, err; - int prog_id = 0; + int prog_fd, map_fd; int attach = 1; int ret = 0; @@ -75,7 +80,7 @@ int main(int argc, char **argv) attach = 0; break; case 'D': - prog_id = 1; + prog_name = "xdp_fwd_direct"; break; default: usage(basename(argv[0])); @@ -90,6 +95,7 @@ int main(int argc, char **argv) if (attach) { snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; if (access(filename, O_RDONLY) < 0) { printf("error accessing file %s: %s\n", @@ -97,19 +103,25 @@ int main(int argc, char **argv) return 1; } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - } - if (!prog_fd[prog_id]) { - printf("load_bpf_file: %s\n", strerror(errno)); + prog = bpf_object__find_program_by_title(obj, prog_name); + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + printf("program not found: %s\n", strerror(prog_fd)); + return 1; + } + map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj, + "tx_port")); + if (map_fd < 0) { + printf("map not found: %s\n", strerror(map_fd)); return 1; } } if (attach) { for (i = 1; i < 64; ++i) - bpf_map_update_elem(map_fd[0], &i, &i, 0); + bpf_map_update_elem(map_fd, &i, &i, 0); } for (i = optind; i < argc; ++i) { @@ -126,7 +138,7 @@ int main(int argc, char **argv) if (err) ret = err; } else { - err = do_attach(idx, prog_fd[prog_id], argv[i]); + err = do_attach(idx, prog_fd, argv[i]); if (err) ret = err; } diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 5904b1543831..4914788b6727 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -26,7 +26,7 @@ #include <sys/types.h> #include <poll.h> -#include "bpf_load.h" +#include "bpf/libbpf.h" #include "bpf_util.h" #include <bpf/bpf.h> @@ -145,8 +145,13 @@ static void dump_stats(void); } while (0) #define barrier() __asm__ __volatile__("": : :"memory") +#ifdef __aarch64__ +#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory") +#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory") +#else #define u_smp_rmb() barrier() #define u_smp_wmb() barrier() +#endif #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) @@ -886,7 +891,13 @@ static void l2fwd(struct xdpsock *xsk) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + int prog_fd, qidconf_map, xsks_map; + struct bpf_object *obj; char xdp_filename[256]; + struct bpf_map *map; int i, ret, key = 0; pthread_t pt; @@ -899,24 +910,38 @@ int main(int argc, char **argv) } snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); + prog_load_attr.file = xdp_filename; - if (load_bpf_file(xdp_filename)) { - fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + exit(EXIT_FAILURE); + if (prog_fd < 0) { + fprintf(stderr, "ERROR: no program found: %s\n", + strerror(prog_fd)); exit(EXIT_FAILURE); } - if (!prog_fd[0]) { - fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n", - strerror(errno)); + map = bpf_object__find_map_by_name(obj, "qidconf_map"); + qidconf_map = bpf_map__fd(map); + if (qidconf_map < 0) { + fprintf(stderr, "ERROR: no qidconf map found: %s\n", + strerror(qidconf_map)); + exit(EXIT_FAILURE); + } + + map = bpf_object__find_map_by_name(obj, "xsks_map"); + xsks_map = bpf_map__fd(map); + if (xsks_map < 0) { + fprintf(stderr, "ERROR: no xsks map found: %s\n", + strerror(xsks_map)); exit(EXIT_FAILURE); } - if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { fprintf(stderr, "ERROR: link set xdp fd failed\n"); exit(EXIT_FAILURE); } - ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0); + ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0); if (ret) { fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n"); exit(EXIT_FAILURE); @@ -933,7 +958,7 @@ int main(int argc, char **argv) /* ...and insert them into the map. */ for (i = 0; i < num_socks; i++) { key = i; - ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0); + ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0); if (ret) { fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); exit(EXIT_FAILURE); diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore new file mode 100644 index 000000000000..dfe2bd5a4b95 --- /dev/null +++ b/tools/bpf/.gitignore @@ -0,0 +1,5 @@ +FEATURE-DUMP.bpf +bpf_asm +bpf_dbg +bpf_exp.yacc.* +bpf_jit_disasm diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index d7e678c2d396..67167e44b726 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,3 +1,5 @@ *.d bpftool +bpftool*.8 +bpf-helpers.* FEATURE-DUMP.bpftool diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e860ca859b28..b2ec20e562bd 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -69,6 +69,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_SOCKMAP] = "sockmap", [BPF_MAP_TYPE_CPUMAP] = "cpumap", [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", }; static bool map_is_per_cpu(__u32 type) @@ -232,7 +233,7 @@ static int get_btf(struct bpf_map_info *map_info, struct btf **btf) *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); if (IS_ERR(*btf)) { - err = PTR_ERR(btf); + err = PTR_ERR(*btf); *btf = NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 870113916cac..dd5758dc35d3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -75,6 +75,11 @@ struct bpf_lpm_trie_key { __u8 data[0]; /* Arbitrary size */ }; +struct bpf_cgroup_storage_key { + __u64 cgroup_inode_id; /* cgroup inode id */ + __u32 attach_type; /* program attach type */ +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -120,6 +125,7 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, + BPF_MAP_TYPE_CGROUP_STORAGE, }; enum bpf_prog_type { @@ -1371,6 +1377,20 @@ union bpf_attr { * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. * + * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * Return + * A 8-byte long non-decreasing number. + * + * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * Return + * A 8-byte long non-decreasing number. + * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return * The owner UID of the socket associated to *skb*. If the socket @@ -2075,6 +2095,24 @@ union bpf_attr { * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. + * + * void* get_local_storage(void *map, u64 flags) + * Description + * Get the pointer to the local storage area. + * The type and the size of the local storage is defined + * by the *map* argument. + * The *flags* meaning is specific for each map type, + * and has to be 0 for cgroup local storage. + * + * Depending on the bpf program type, a local storage area + * can be shared between multiple instances of the bpf program, + * running simultaneously. + * + * A user should care about the synchronization by himself. + * For example, by using the BPF_STX_XADD instruction to alter + * the shared data. + * Return + * Pointer to the local storage area. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2157,7 +2195,8 @@ union bpf_attr { FN(rc_repeat), \ FN(rc_keydown), \ FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), + FN(get_current_cgroup_id), \ + FN(get_local_storage), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 26e9527ee464..40211b51427a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -468,8 +468,10 @@ static int bpf_object__elf_init(struct bpf_object *obj) } else { obj->efile.fd = open(obj->path, O_RDONLY); if (obj->efile.fd < 0) { - pr_warning("failed to open %s: %s\n", obj->path, - strerror(errno)); + char errmsg[STRERR_BUFSIZE]; + char *cp = strerror_r(errno, errmsg, sizeof(errmsg)); + + pr_warning("failed to open %s: %s\n", obj->path, cp); return -errno; } @@ -808,10 +810,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj) data->d_size, name, idx); if (err) { char errmsg[STRERR_BUFSIZE]; + char *cp = strerror_r(-err, errmsg, + sizeof(errmsg)); - strerror_r(-err, errmsg, sizeof(errmsg)); pr_warning("failed to alloc program %s (%s): %s", - name, obj->path, errmsg); + name, obj->path, cp); } } else if (sh.sh_type == SHT_REL) { void *reloc = obj->efile.reloc; @@ -874,6 +877,18 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) return NULL; } +struct bpf_program * +bpf_object__find_program_by_title(struct bpf_object *obj, const char *title) +{ + struct bpf_program *pos; + + bpf_object__for_each_program(pos, obj) { + if (pos->section_name && !strcmp(pos->section_name, title)) + return pos; + } + return NULL; +} + static int bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, Elf_Data *data, struct bpf_object *obj) @@ -1097,6 +1112,7 @@ bpf_object__create_maps(struct bpf_object *obj) for (i = 0; i < obj->nr_maps; i++) { struct bpf_map *map = &obj->maps[i]; struct bpf_map_def *def = &map->def; + char *cp, errmsg[STRERR_BUFSIZE]; int *pfd = &map->fd; if (map->fd >= 0) { @@ -1124,8 +1140,9 @@ bpf_object__create_maps(struct bpf_object *obj) *pfd = bpf_create_map_xattr(&create_attr); if (*pfd < 0 && create_attr.btf_key_type_id) { + cp = strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, strerror(errno), errno); + map->name, cp, errno); create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -1138,9 +1155,9 @@ bpf_object__create_maps(struct bpf_object *obj) size_t j; err = *pfd; + cp = strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("failed to create map (name: '%s'): %s\n", - map->name, - strerror(errno)); + map->name, cp); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; @@ -1292,6 +1309,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, char *license, u32 kern_version, int *pfd, int prog_ifindex) { struct bpf_load_program_attr load_attr; + char *cp, errmsg[STRERR_BUFSIZE]; char *log_buf; int ret; @@ -1321,7 +1339,8 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, } ret = -LIBBPF_ERRNO__LOAD; - pr_warning("load bpf program failed: %s\n", strerror(errno)); + cp = strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("load bpf program failed: %s\n", cp); if (log_buf && log_buf[0] != '\0') { ret = -LIBBPF_ERRNO__VERIFY; @@ -1620,6 +1639,7 @@ out: static int check_path(const char *path) { + char *cp, errmsg[STRERR_BUFSIZE]; struct statfs st_fs; char *dname, *dir; int err = 0; @@ -1633,7 +1653,8 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { - pr_warning("failed to statfs %s: %s\n", dir, strerror(errno)); + cp = strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to statfs %s: %s\n", dir, cp); err = -errno; } free(dname); @@ -1649,6 +1670,7 @@ static int check_path(const char *path) int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance) { + char *cp, errmsg[STRERR_BUFSIZE]; int err; err = check_path(path); @@ -1667,7 +1689,8 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, } if (bpf_obj_pin(prog->instances.fds[instance], path)) { - pr_warning("failed to pin program: %s\n", strerror(errno)); + cp = strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to pin program: %s\n", cp); return -errno; } pr_debug("pinned program '%s'\n", path); @@ -1677,13 +1700,16 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, static int make_dir(const char *path) { + char *cp, errmsg[STRERR_BUFSIZE]; int err = 0; if (mkdir(path, 0700) && errno != EEXIST) err = -errno; - if (err) - pr_warning("failed to mkdir %s: %s\n", path, strerror(-err)); + if (err) { + cp = strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warning("failed to mkdir %s: %s\n", path, cp); + } return err; } @@ -1730,6 +1756,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) int bpf_map__pin(struct bpf_map *map, const char *path) { + char *cp, errmsg[STRERR_BUFSIZE]; int err; err = check_path(path); @@ -1742,7 +1769,8 @@ int bpf_map__pin(struct bpf_map *map, const char *path) } if (bpf_obj_pin(map->fd, path)) { - pr_warning("failed to pin map: %s\n", strerror(errno)); + cp = strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to pin map: %s\n", cp); return -errno; } @@ -1996,6 +2024,9 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n) { int fd; + if (!prog) + return -EINVAL; + if (n >= prog->instances.nr || n < 0) { pr_warning("Can't get the %dth fd from program %s: only %d instances\n", n, prog->section_name, prog->instances.nr); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 413778a93499..96c55fac54c3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -86,6 +86,9 @@ const char *bpf_object__name(struct bpf_object *obj); unsigned int bpf_object__kversion(struct bpf_object *obj); int bpf_object__btf_fd(const struct bpf_object *obj); +struct bpf_program * +bpf_object__find_program_by_title(struct bpf_object *obj, const char *title); + struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ for ((pos) = bpf_object__next(NULL), \ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 5169a97eb68b..17a7a5818ee1 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -22,7 +22,8 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ - test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user + test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ + test_socket_cookie test_cgroup_storage TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ @@ -33,7 +34,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ - get_cgroup_id_kern.o + get_cgroup_id_kern.o socket_cookie_prog.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -60,10 +61,12 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c +$(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c +$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c .PHONY: force diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 810de20e8e26..9ba1c72d7cf5 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -65,6 +65,8 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_head; static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_meta; +static int (*bpf_get_socket_cookie)(void *ctx) = + (void *) BPF_FUNC_get_socket_cookie; static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; @@ -133,6 +135,8 @@ static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol, (void *) BPF_FUNC_rc_keydown; static unsigned long long (*bpf_get_current_cgroup_id)(void) = (void *) BPF_FUNC_get_current_cgroup_id; +static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) = + (void *) BPF_FUNC_get_local_storage; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/socket_cookie_prog.c b/tools/testing/selftests/bpf/socket_cookie_prog.c new file mode 100644 index 000000000000..9ff8ac4b0bf6 --- /dev/null +++ b/tools/testing/selftests/bpf/socket_cookie_prog.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +struct bpf_map_def SEC("maps") socket_cookies = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u64), + .value_size = sizeof(__u32), + .max_entries = 1 << 8, +}; + +SEC("cgroup/connect6") +int set_cookie(struct bpf_sock_addr *ctx) +{ + __u32 cookie_value = 0xFF; + __u64 cookie_key; + + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) + return 1; + + cookie_key = bpf_get_socket_cookie(ctx); + if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0)) + return 0; + + return 1; +} + +SEC("sockops") +int update_cookie(struct bpf_sock_ops *ctx) +{ + __u32 new_cookie_value; + __u32 *cookie_value; + __u64 cookie_key; + + if (ctx->family != AF_INET6) + return 1; + + if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + cookie_key = bpf_get_socket_cookie(ctx); + + cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key); + if (!cookie_value) + return 1; + + new_cookie_value = (ctx->local_port << 8) | *cookie_value; + bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0); + + return 1; +} + +int _version SEC("version") = 1; + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py index 481dccdf140c..7f8200a8702b 100755 --- a/tools/testing/selftests/bpf/tcp_client.py +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # # SPDX-License-Identifier: GPL-2.0 # @@ -9,11 +9,11 @@ import subprocess import select def read(sock, n): - buf = '' + buf = b'' while len(buf) < n: rem = n - len(buf) try: s = sock.recv(rem) - except (socket.error), e: return '' + except (socket.error) as e: return b'' buf += s return buf @@ -22,7 +22,7 @@ def send(sock, s): count = 0 while count < total: try: n = sock.send(s) - except (socket.error), e: n = 0 + except (socket.error) as e: n = 0 if n == 0: return count; count += n @@ -39,10 +39,10 @@ try: except socket.error as e: sys.exit(1) -buf = '' +buf = b'' n = 0 while n < 1000: - buf += '+' + buf += b'+' n += 1 sock.settimeout(1); diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py index bc454d7d0be2..b39903fca4c8 100755 --- a/tools/testing/selftests/bpf/tcp_server.py +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # # SPDX-License-Identifier: GPL-2.0 # @@ -9,11 +9,11 @@ import subprocess import select def read(sock, n): - buf = '' + buf = b'' while len(buf) < n: rem = n - len(buf) try: s = sock.recv(rem) - except (socket.error), e: return '' + except (socket.error) as e: return b'' buf += s return buf @@ -22,7 +22,7 @@ def send(sock, s): count = 0 while count < total: try: n = sock.send(s) - except (socket.error), e: n = 0 + except (socket.error) as e: n = 0 if n == 0: return count; count += n @@ -43,7 +43,7 @@ host = socket.gethostname() try: serverSocket.bind((host, 0)) except socket.error as msg: - print 'bind fails: ', msg + print('bind fails: ' + str(msg)) sn = serverSocket.getsockname() serverPort = sn[1] @@ -51,10 +51,10 @@ serverPort = sn[1] cmdStr = ("./tcp_client.py %d &") % (serverPort) os.system(cmdStr) -buf = '' +buf = b'' n = 0 while n < 500: - buf += '.' + buf += b'.' n += 1 serverSocket.listen(MAX_PORTS) @@ -79,5 +79,5 @@ while True: serverSocket.close() sys.exit(0) else: - print 'Select timeout!' + print('Select timeout!') sys.exit(1) diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c new file mode 100644 index 000000000000..dc83fb2d3f27 --- /dev/null +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <bpf/bpf.h> +#include <linux/filter.h> +#include <stdio.h> +#include <stdlib.h> + +#include "cgroup_helpers.h" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/" + +int main(int argc, char **argv) +{ + struct bpf_insn prog[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */ + BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + int error = EXIT_FAILURE; + int map_fd, prog_fd, cgroup_fd; + struct bpf_cgroup_storage_key key; + unsigned long long value; + + map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key), + sizeof(value), 0, 0); + if (map_fd < 0) { + printf("Failed to create map: %s\n", strerror(errno)); + goto out; + } + + prog[0].imm = map_fd; + prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + if (prog_fd < 0) { + printf("Failed to load bpf program: %s\n", bpf_log_buf); + goto out; + } + + if (setup_cgroup_environment()) { + printf("Failed to setup cgroup environment\n"); + goto err; + } + + /* Create a cgroup, get fd, and join it */ + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + if (!cgroup_fd) { + printf("Failed to create test cgroup\n"); + goto err; + } + + if (join_cgroup(TEST_CGROUP)) { + printf("Failed to join cgroup\n"); + goto err; + } + + /* Attach the bpf program */ + if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { + printf("Failed to attach bpf program\n"); + goto err; + } + + if (bpf_map_get_next_key(map_fd, NULL, &key)) { + printf("Failed to get the first key in cgroup storage\n"); + goto err; + } + + if (bpf_map_lookup_elem(map_fd, &key, &value)) { + printf("Failed to lookup cgroup storage\n"); + goto err; + } + + /* Every second packet should be dropped */ + assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0); + assert(system("ping localhost -c 1 -W 1 -q > /dev/null")); + assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0); + + /* Check the counter in the cgroup local storage */ + if (bpf_map_lookup_elem(map_fd, &key, &value)) { + printf("Failed to lookup cgroup storage\n"); + goto err; + } + + if (value != 3) { + printf("Unexpected data in the cgroup storage: %llu\n", value); + goto err; + } + + /* Bump the counter in the cgroup local storage */ + value++; + if (bpf_map_update_elem(map_fd, &key, &value, 0)) { + printf("Failed to update the data in the cgroup storage\n"); + goto err; + } + + /* Every second packet should be dropped */ + assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0); + assert(system("ping localhost -c 1 -W 1 -q > /dev/null")); + assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0); + + /* Check the final value of the counter in the cgroup local storage */ + if (bpf_map_lookup_elem(map_fd, &key, &value)) { + printf("Failed to lookup the cgroup storage\n"); + goto err; + } + + if (value != 7) { + printf("Unexpected data in the cgroup storage: %llu\n", value); + goto err; + } + + error = 0; + printf("test_cgroup_storage:PASS\n"); + +err: + cleanup_cgroup_environment(); + +out: + return error; +} diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c new file mode 100644 index 000000000000..68e108e4687a --- /dev/null +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_rlimit.h" +#include "cgroup_helpers.h" + +#define CG_PATH "/foo" +#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o" + +static int start_server(void) +{ + struct sockaddr_in6 addr; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) { + log_err("Failed to create server socket"); + goto out; + } + + memset(&addr, 0, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_loopback; + addr.sin6_port = 0; + + if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) { + log_err("Failed to bind server socket"); + goto close_out; + } + + if (listen(fd, 128) == -1) { + log_err("Failed to listen on server socket"); + goto close_out; + } + + goto out; + +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int connect_to_server(int server_fd) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) { + log_err("Failed to create client socket"); + goto out; + } + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + goto close_out; + } + + if (connect(fd, (const struct sockaddr *)&addr, len) == -1) { + log_err("Fail to connect to server"); + goto close_out; + } + + goto out; + +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int validate_map(struct bpf_map *map, int client_fd) +{ + __u32 cookie_expected_value; + struct sockaddr_in6 addr; + socklen_t len = sizeof(addr); + __u32 cookie_value; + __u64 cookie_key; + int err = 0; + int map_fd; + + if (!map) { + log_err("Map not found in BPF object"); + goto err; + } + + map_fd = bpf_map__fd(map); + + err = bpf_map_get_next_key(map_fd, NULL, &cookie_key); + if (err) { + log_err("Can't get cookie key from map"); + goto out; + } + + err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value); + if (err) { + log_err("Can't get cookie value from map"); + goto out; + } + + err = getsockname(client_fd, (struct sockaddr *)&addr, &len); + if (err) { + log_err("Can't get client local addr"); + goto out; + } + + cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; + if (cookie_value != cookie_expected_value) { + log_err("Unexpected value in map: %x != %x", cookie_value, + cookie_expected_value); + goto err; + } + + goto out; +err: + err = -1; +out: + return err; +} + +static int run_test(int cgfd) +{ + enum bpf_attach_type attach_type; + struct bpf_prog_load_attr attr; + struct bpf_program *prog; + struct bpf_object *pobj; + const char *prog_name; + int server_fd = -1; + int client_fd = -1; + int prog_fd = -1; + int err = 0; + + memset(&attr, 0, sizeof(attr)); + attr.file = SOCKET_COOKIE_PROG; + attr.prog_type = BPF_PROG_TYPE_UNSPEC; + + err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd); + if (err) { + log_err("Failed to load %s", attr.file); + goto out; + } + + bpf_object__for_each_program(prog, pobj) { + prog_name = bpf_program__title(prog, /*needs_copy*/ false); + + if (strcmp(prog_name, "cgroup/connect6") == 0) { + attach_type = BPF_CGROUP_INET6_CONNECT; + } else if (strcmp(prog_name, "sockops") == 0) { + attach_type = BPF_CGROUP_SOCK_OPS; + } else { + log_err("Unexpected prog: %s", prog_name); + goto err; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type, + BPF_F_ALLOW_OVERRIDE); + if (err) { + log_err("Failed to attach prog %s", prog_name); + goto out; + } + } + + server_fd = start_server(); + if (server_fd == -1) + goto err; + + client_fd = connect_to_server(server_fd); + if (client_fd == -1) + goto err; + + if (validate_map(bpf_map__next(NULL, pobj), client_fd)) + goto err; + + goto out; +err: + err = -1; +out: + close(client_fd); + close(server_fd); + bpf_object__close(pobj); + printf("%s\n", err ? "FAILED" : "PASSED"); + return err; +} + +int main(int argc, char **argv) +{ + int cgfd = -1; + int err = 0; + + if (setup_cgroup_environment()) + goto err; + + cgfd = create_and_get_cgroup(CG_PATH); + if (!cgfd) + goto err; + + if (join_cgroup(CG_PATH)) + goto err; + + if (run_test(cgfd)) + goto err; + + goto out; +err: + err = -1; +out: + close(cgfd); + cleanup_cgroup_environment(); + return err; +} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 41106d9d5cc7..452cf5c6c784 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -50,7 +50,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 7 +#define MAX_NR_MAPS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -70,6 +70,7 @@ struct bpf_test { int fixup_prog1[MAX_FIXUPS]; int fixup_prog2[MAX_FIXUPS]; int fixup_map_in_map[MAX_FIXUPS]; + int fixup_cgroup_storage[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t retval; @@ -4631,6 +4632,121 @@ static struct bpf_test tests[] = { .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "valid cgroup storage access", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 1 }, + .result = REJECT, + .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "fd 1 is not pointing to valid bpf_map", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cgroup storage access 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=256 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=-2 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 7), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 6", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { "multiple registers share map_lookup_elem result", .insns = { BPF_MOV64_IMM(BPF_REG_1, 10), @@ -6997,7 +7113,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, @@ -7020,7 +7136,7 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, @@ -7042,7 +7158,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, @@ -12372,6 +12488,32 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "variable ctx access var_off=(0x0; 0x4)", }, + { + "mov64 src == dst", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_2), + // Check bounds are OK + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "mov64 src != dst", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_3), + // Check bounds are OK + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -12476,6 +12618,19 @@ static int create_map_in_map(void) return outer_map_fd; } +static int create_cgroup_storage(void) +{ + int fd; + + fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), + TEST_DATA_LEN, 0, 0); + if (fd < 0) + printf("Failed to create array '%s'!\n", strerror(errno)); + + return fd; +} + static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, @@ -12488,6 +12643,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, int *fixup_prog1 = test->fixup_prog1; int *fixup_prog2 = test->fixup_prog2; int *fixup_map_in_map = test->fixup_map_in_map; + int *fixup_cgroup_storage = test->fixup_cgroup_storage; if (test->fill_helper) test->fill_helper(test); @@ -12555,6 +12711,14 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, fixup_map_in_map++; } while (*fixup_map_in_map); } + + if (*fixup_cgroup_storage) { + map_fds[7] = create_cgroup_storage(); + do { + prog[*fixup_cgroup_storage].imm = map_fds[7]; + fixup_cgroup_storage++; + } while (*fixup_cgroup_storage); + } } static void do_test_single(struct bpf_test *test, bool unpriv, |