eBPF maps 101

eBPF maps 101
AL Cho
SUSE Labs / Software Engineer
acho@suse.com
2018-04-22

3
The BSD Packet Filter:
A New Architecture for User-level
Packet Capture
December 19, 1992

5
BPF ASM
ldh [12]
jeq #0x806 jt 2 jf 3
ret #262144
ret #0
# tcpdump -p -ni wlan0 -d "arp"

6
BPF Bytecode
struct sock_filter code[] = {
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 1, 0x00000806 },
{ 0x6, 0, 0, 0x00040000 },
{ 0x6, 0, 0, 0x00000000 },
};
# tcpdump -i wlan0 arp -dd

7
BPF JIT
(Just In Time compiler)
A JIT for packet filters https://lwn.net/Articles/437981/

8
BPF
Bytecode
Native
Machine
Code
BPF JIT

9
$ find arch/ -name bpf_jit*
arch/arm/net/bpf_jit_32.c
arch/arm/net/bpf_jit_32.h
arch/arm64/net/bpf_jit.h
arch/arm64/net/bpf_jit_comp.c
arch/mips/net/bpf_jit.h
arch/mips/net/bpf_jit_asm.S
arch/mips/net/bpf_jit.c
arch/powerpc/net/bpf_jit.h
arch/powerpc/net/bpf_jit32.h
arch/powerpc/net/bpf_jit64.h
arch/powerpc/net/bpf_jit_asm.S
arch/powerpc/net/bpf_jit_asm64.S
arch/powerpc/net/bpf_jit_comp.c
arch/powerpc/net/bpf_jit_comp64.c
arch/s390/net/bpf_jit.S
arch/s390/net/bpf_jit.h
arch/s390/net/bpf_jit_comp.c
arch/sparc/net/bpf_jit.h
arch/sparc/net/bpf_jit_asm.S
arch/sparc/net/bpf_jit_comp.c
arch/x86/net/bpf_jit.S
arch/x86/net/bpf_jit_comp.c

14
99c55f7d47c0
bpf: introduce BPF syscall and maps
v3.18

15
ebpf - The Linux bpf syscall
• kernel/bpf/syscall.c: The Linux kernel code related to
the bpf syscall.
• include/uapi/linux/bpf.h: The bpf header file for
assisting in using the bpf syscall.

17
bpf() system call
'maps' is a generic key/vaule storage of different types
for sharing data between kernel and userspace.

18
User Program
eBPF
userspace
kernel
eBPF MAP
Kernel
Program
As simple
as possible
Whatever you want

19
bpf() system call
From man-page bpf(2):
NAME
bpf - perform a command on an extended BPF map or program
SYNOPSIS
#include <linux/bpf.h>
int bpf(int cmd, union bpf_attr *attr, unsigned int size);
include/linux/syscalls.h
asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
(749730ce42a2 bpf: enable bpf syscall on x64 and i386 (v3.18-rc1))

20
eBPF commands
include/uapi/linux/bpf.h
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
BPF_MAP_UPDATE_ELEM,
BPF_MAP_DELETE_ELEM,
BPF_MAP_GET_NEXT_KEY,
BPF_PROG_LOAD,
BPF_OBJ_PIN,
BPF_OBJ_GET,
BPF_PROG_ATTACH,
BPF_PROG_DETACH,
};

21
union bpf_attr
struct { /* anonymous struct used by BPF_OBJ_* commands */
__aligned_u64 pathname;
__u32 bpf_fd;
__u32 file_flags;
};
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
__u32 target_fd; /* container object to attach to */
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
__u32 prog_fd;
__u32 retval;
__u32 data_size_in;
__u32 data_size_out;
__aligned_u64 data_in;
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
union {
__u32 start_id;
__u32 prog_id;
__u32 map_id;
};
__u32 next_id;
__u32 open_flags;
};
struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
__u32 bpf_fd;
__u32 info_len;
__aligned_u64 info;
} info;
struct { /* anonymous struct used by BPF_PROG_QUERY command */
__u32 target_fd; /* container object to query */
__u32 attach_type;
__u32 query_flags;
__u32 attach_flags;
__aligned_u64 prog_ids;
__u32 prog_cnt;
} query;
} __attribute__((aligned(8)));
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
__u32 key_size; /* size of key in bytes */
__u32 value_size; /* size of value in bytes */
__u32 max_entries; /* max number of entries in a map */
__u32 map_flags; /* BPF_MAP_CREATE related
* flags defined above.
*/
__u32 inner_map_fd; /* fd pointing to the inner map */
__u32 numa_node; /* numa node (effective only if
* BPF_F_NUMA_NODE is set).
*/
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
__u32 map_fd;
__aligned_u64 key;
union {
__aligned_u64 value;
__aligned_u64 next_key;
};
__u64 flags;
};
struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32 prog_type; /* one of enum bpf_prog_type */
__u32 insn_cnt;
__aligned_u64 insns;
__aligned_u64 license;
__u32 log_level; /* verbosity level of verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe */
__u32 prog_flags;
char prog_name[BPF_OBJ_NAME_LEN];
__u32 prog_ifindex; /* ifindex of netdev to prep for */
};

22
userspacekernel
eBPF
Program
BPF_PROG_LOAD
int bpf(int cmd, union bpf_attr *attr, unsigned int size)
eBPF
Kernel
Program

23
Load an eBPF program into the kernel
char bpf_log_buf[LOG_BUF_SIZE];
Int bpf_prog_load(enum bpf_prog_type type,
const struct bpf_insn *insns,
int insn_cnt,
const char *license)
{
union bpf_attr attr = {
.prog_type = type,
.insns = ptr_to_u64(insns),
.insn_cnt = insn_cnt,
.license = ptr_to_u64(license),
.log_buf = ptr_to_u64(bpf_log_buf),
.log_size = LOG_BUF_SIZE,
.log_level = 1,
};
return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}

24
int insn_cnt,
{
.prog_type = type,
.log_level = 1,
};
}
prog_type is one of the available program types:
enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
/* Reserve 0 as invalid program type */
BPF_PROG_TYPE_SOCKET_FILTER,
BPF_PROG_TYPE_KPROBE,
BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT,
};

25
int insn_cnt,
{
.prog_type = type,
.log_level = 1,
};
}
member of bpf_attr union for BPF_PROG_LOAD
struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32 prog_type; /* one of enum bpf_prog_type */
__u32 insn_cnt;
__aligned_u64 insns;
__aligned_u64 license;
__u32 log_level; /* verbosity level of verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe
(since Linux 4.1) */
};

26
eBPF
BPF bytecode Read Map
BPF bytecode Map
BPF_PROG_LOAD BPF_MAP_*
userspace
kernel
bpf_xxxx.c

27
Create a map with given type and
attributes
map_fd = bpf(BPF_MAP_CREATE, union bpf_attr
*attr, u32 size)
using attr->map_type, attr->key_size, attr-
>value_size, attr->max_entries
db20fd2b0108
bpf: add lookup/update/delete/iterate methods to BPF maps (v3.18-rc1)

28
attributes
Userspace example:
int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries)
{
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries
};
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
--
99c55f7d47c0 bpf: introduce BPF syscall and maps (v3.18-rc1)

29
attributes
Userspace example:
int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries)
{
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries
};
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
--
99c55f7d47c0 bpf: introduce BPF syscall and maps (v3.18-rc1)
member of bpf_attr union for BPF_MAP_CREATE
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
__u32 key_size; /* size of key in bytes */
__u32 value_size; /* size of value in bytes */
__u32 max_entries; /* max number of entries in a map */
};

30
lookup key in a given map
err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr
*attr, u32 size)
using attr->map_fd, attr->key, attr->value
db20fd2b0108

31
Find and delete element by key in a
given map
err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr
*attr, u32 size)
using attr->map_fd, attr->key
db20fd2b0108

32
Userspace example:
int bpf_lookup_elem(int fd, const void *key, void *value)
{
.map_fd = fd,
.key = ptr_to_u64(key),
.value = ptr_to_u64(value),
};
return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}
int bpf_delete_elem(int fd, const void *key)
{
.map_fd = fd,
};
return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
}

33
Userspace example:
int bpf_lookup_elem(int fd, const void *key, void *value)
{
.map_fd = fd,
};
return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}
int bpf_delete_elem(int fd, const void *key)
{
.map_fd = fd,
};
return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
}
/* anonymous struct used by BPF_MAP_*_ELEM commands */
struct {
__u32 map_fd;
__aligned_u64 key;
union {
__aligned_u64 value;
__aligned_u64 next_key;
};
__u64 flags;
};

34
Create or update key/value pair in a
given map
err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr
*attr, u32 size)
using attr->map_fd, attr->key, attr->value
db20fd2b0108

35
given map
Userspace example:
int bpf_update_elem(int fd, const void *key, const void *value,
uint64_t flags)
{
.map_fd = fd,
.flags = flags,
};
return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}

36
given map
Userspace example:
int bpf_update_elem(int fd, const void *key, const void *value,
uint64_t flags)
{
.map_fd = fd,
.flags = flags,
};
return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}
/* flags for BPF_MAP_UPDATE_ELEM command */
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */

38
eBPF Map Types
• BPF_MAP_TYPE_UNSPEC
• BPF_MAP_TYPE_HASH
• BPF_MAP_TYPE_ARRAY
• BPF_MAP_TYPE_PROG_ARRAY
• BPF_MAP_TYPE_PERF_EVENT_ARRAY
• BPF_MAP_TYPE_PERCPU_HASH
• BPF_MAP_TYPE_PERCPU_ARRAY
• BPF_MAP_TYPE_STACK_TRACE
• BPF_MAP_TYPE_CGROUP_ARRAY
• BPF_MAP_TYPE_LRU_HASH
• BPF_MAP_TYPE_LRU_PERCPU_HASH
• BPF_MAP_TYPE_LPM_TRIE
• BPF_MAP_TYPE_ARRAY_OF_MAPS
• BPF_MAP_TYPE_HASH_OF_MAPS
• BPF_MAP_TYPE_DEVMAP
• BPF_MAP_TYPE_SOCKMAP
• BPF_MAP_TYPE_CPUMAP

39
eBPF Map Types
enum bpf_map_type in include/uapi/linux/bpf.h
/* v4.15 */
/* include/uapi/linux/bpf.h */
enum bpf_map_type {
BPF_MAP_TYPE_UNSPEC,
BPF_MAP_TYPE_HASH,
BPF_MAP_TYPE_ARRAY,
BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
BPF_MAP_TYPE_LRU_HASH,
BPF_MAP_TYPE_LRU_PERCPU_HASH,
BPF_MAP_TYPE_LPM_TRIE,
BPF_MAP_TYPE_ARRAY_OF_MAPS,
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
};

40
eBPF Map Types
enum bpf_map_type in include/uapi/linux/bpf.h
/* v4.15 */
/* include/uapi/linux/bpf.h */
enum bpf_map_type {
BPF_MAP_TYPE_UNSPEC,
BPF_MAP_TYPE_HASH,
BPF_MAP_TYPE_ARRAY,
BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
BPF_MAP_TYPE_LRU_HASH,
BPF_MAP_TYPE_LRU_PERCPU_HASH,
BPF_MAP_TYPE_LPM_TRIE,
BPF_MAP_TYPE_ARRAY_OF_MAPS,
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
};
Hash
Array

41
eBPF Map Types
• BPF_MAP_TYPE_UNSPEC
99c55f7d47c0 bpf: introduce BPF syscall and maps
(v3.18-rc1)

42
eBPF Map Types
• BPF_MAP_TYPE_HASH
0f8e4bd8a1fc bpf: add hashtable type of eBPF maps
(v3.19-rc1)

43
eBPF Map Types
• BPF_MAP_TYPE_ARRAY
28fbcfa08d8e bpf: add array type of eBPF maps (v3.19-rc1)

44
eBPF Map Types
• BPF_MAP_TYPE_PROG_ARRAY
04fd61ab36ec
bpf: allow bpf programs to tail-call other bpf programs (v4.2-rc1)

45
eBPF Map Types
• BPF_MAP_TYPE_PERF_EVENT_ARRAY
ea317b267e9d bpf: Add new bpf map type to store the
pointer to struct perf_event (v4.3-rc1)

46
eBPF Map Types
• BPF_MAP_TYPE_PERCPU_HASH
824bd0ce6c7c bpf: introduce
BPF_MAP_TYPE_PERCPU_HASH map (v4.6-rc1)

47
eBPF Map Types
• BPF_MAP_TYPE_PERCPU_ARRAY
a10423b87a7e bpf: introduce
BPF_MAP_TYPE_PERCPU_ARRAY map (v4.6-rc1)

50
Kernel Config
• CONFIG_BPF=y
• CONFIG_BPF_SYSCALL=y
• CONFIG_BPF_JIT=y
• CONFIG_HAVE_BPF_JIT=y
• CONFIG_BPF_EVENTS=y

53
clang
llc:
--emit-llvm
--march=bpf

54
C code
LLVM
IR Bitcode
BPF Bytecodeclang llc

55
User Program
eBPF
userspace
kernel
eBPF MAP
Kernel
Program
As simple
as possible
Whatever you want

58
C & Python Library
Built-in BPF compiler

Thank you.
60
Follow FB Group:
openSUSE Taiwan

61
Reference
● Documentation/networking/filter.txt
● http://www.brendangregg.com/blog/2015-05-15/ebpf-one-small-step.html
● https://suchakra.wordpress.com/2015/05/18/bpf-internals-i/
● https://suchakra.wordpress.com/2015/08/12/bpf-internals-ii/
● https://lkml.org/lkml/2013/9/30/627
● https://lwn.net/Articles/612878/
● https://lwn.net/Articles/650953/
● https://github.com/iovisor/bcc
● http://www.brendangregg.com/ebpf.html
● https://events.linuxfoundation.org/sites/events/files/slides/bpf_collabsummit_2015fe
● https://www.slideshare.net/suselab/ebpf-trace-from-kernel-to-userspace
● https://ferrisellis.com/posts/ebpf_syscall_and_maps/

Corporate Headquarters
Maxfeldstrasse 5
90409 Nuremberg
Germany
+49 911 740 53 0 (Worldwide)
www.suse.com
Join us on:
www.opensuse.org
63

Unpublished Work of SUSE. All Rights Reserved.
This work is an unpublished work and contains confidential, proprietary and trade secret information of SUSE.
Access to this work is restricted to SUSE employees who have a need to know to perform tasks within the scope of
their assignments. No part of this work may be practiced, performed, copied, distributed, revised, modified, translated,
abridged, condensed, expanded, collected, or adapted without the prior written consent of SUSE.
Any use or exploitation of this work without authorization could subject the perpetrator to criminal and civil liability.
General Disclaimer
This document is not to be construed as a promise by any participating company to develop, deliver, or market a
product. It is not a commitment to deliver any material, code, or functionality, and should not be relied upon in making
purchasing decisions. SUSE makes no representations or warranties with respect to the contents of this document,
and specifically disclaims any express or implied warranties of merchantability or fitness for any particular purpose. The
development, release, and timing of features or functionality described for SUSE products remains at the sole
discretion of SUSE. Further, SUSE reserves the right to revise this document and to make changes to its content, at
any time, without obligation to notify any person or entity of such revisions or changes. All SUSE marks referenced in
this presentation are trademarks or registered trademarks of Novell, Inc. in the United States and other countries. All
third-party trademarks are the property of their respective owners.

eBPF maps 101

Recommended

Recommended

More Related Content

What's hot

What's hot (20)

Similar to eBPF maps 101

Similar to eBPF maps 101 (20)

More from SUSE Labs Taipei

More from SUSE Labs Taipei (20)

Recently uploaded

Recently uploaded (20)

eBPF maps 101

Editor's Notes