SlideShare a Scribd company logo
1 of 64
eBPF maps 101
AL Cho
SUSE Labs / Software Engineer
acho@suse.com
2018-04-22
2
Berkeley Packet Filter
3
The BSD Packet Filter:
A New Architecture for User-level
Packet Capture
December 19, 1992
4
BPF
No Red
BPF Program
5
BPF ASM
ldh [12]
jeq #0x806 jt 2 jf 3
ret #262144
ret #0
# tcpdump -p -ni wlan0 -d "arp"
6
BPF Bytecode
struct sock_filter code[] = {
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 1, 0x00000806 },
{ 0x6, 0, 0, 0x00040000 },
{ 0x6, 0, 0, 0x00000000 },
};
# tcpdump -i wlan0 arp -dd
7
BPF JIT
(Just In Time compiler)
A JIT for packet filters https://lwn.net/Articles/437981/
8
BPF
Bytecode
Native
Machine
Code
BPF JIT
9
$ find arch/ -name bpf_jit*
arch/arm/net/bpf_jit_32.c
arch/arm/net/bpf_jit_32.h
arch/arm64/net/bpf_jit.h
arch/arm64/net/bpf_jit_comp.c
arch/mips/net/bpf_jit.h
arch/mips/net/bpf_jit_asm.S
arch/mips/net/bpf_jit.c
arch/powerpc/net/bpf_jit.h
arch/powerpc/net/bpf_jit32.h
arch/powerpc/net/bpf_jit64.h
arch/powerpc/net/bpf_jit_asm.S
arch/powerpc/net/bpf_jit_asm64.S
arch/powerpc/net/bpf_jit_comp.c
arch/powerpc/net/bpf_jit_comp64.c
arch/s390/net/bpf_jit.S
arch/s390/net/bpf_jit.h
arch/s390/net/bpf_jit_comp.c
arch/sparc/net/bpf_jit.h
arch/sparc/net/bpf_jit_asm.S
arch/sparc/net/bpf_jit_comp.c
arch/x86/net/bpf_jit.S
arch/x86/net/bpf_jit_comp.c
10
Stable and Efficient
11
But...
12
eBPF
13
Extended BPF
14
99c55f7d47c0
bpf: introduce BPF syscall and maps
v3.18
15
ebpf - The Linux bpf syscall
• kernel/bpf/syscall.c: The Linux kernel code related to
the bpf syscall.
• include/uapi/linux/bpf.h: The bpf header file for
assisting in using the bpf syscall.
16
eBPF Maps
17
bpf() system call
'maps' is a generic key/vaule storage of different types
for sharing data between kernel and userspace.
18
User Program
eBPF
userspace
kernel
eBPF MAP
Kernel
Program
As simple
as possible
Whatever you want
19
bpf() system call
From man-page bpf(2):
NAME
bpf - perform a command on an extended BPF map or program
SYNOPSIS
#include <linux/bpf.h>
int bpf(int cmd, union bpf_attr *attr, unsigned int size);
include/linux/syscalls.h
asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
(749730ce42a2 bpf: enable bpf syscall on x64 and i386 (v3.18-rc1))
20
eBPF commands
include/uapi/linux/bpf.h
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
BPF_MAP_UPDATE_ELEM,
BPF_MAP_DELETE_ELEM,
BPF_MAP_GET_NEXT_KEY,
BPF_PROG_LOAD,
BPF_OBJ_PIN,
BPF_OBJ_GET,
BPF_PROG_ATTACH,
BPF_PROG_DETACH,
};
21
union bpf_attr
struct { /* anonymous struct used by BPF_OBJ_* commands */
__aligned_u64 pathname;
__u32 bpf_fd;
__u32 file_flags;
};
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
__u32 target_fd; /* container object to attach to */
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
__u32 prog_fd;
__u32 retval;
__u32 data_size_in;
__u32 data_size_out;
__aligned_u64 data_in;
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
union {
__u32 start_id;
__u32 prog_id;
__u32 map_id;
};
__u32 next_id;
__u32 open_flags;
};
struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
__u32 bpf_fd;
__u32 info_len;
__aligned_u64 info;
} info;
struct { /* anonymous struct used by BPF_PROG_QUERY command */
__u32 target_fd; /* container object to query */
__u32 attach_type;
__u32 query_flags;
__u32 attach_flags;
__aligned_u64 prog_ids;
__u32 prog_cnt;
} query;
} __attribute__((aligned(8)));
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
__u32 key_size; /* size of key in bytes */
__u32 value_size; /* size of value in bytes */
__u32 max_entries; /* max number of entries in a map */
__u32 map_flags; /* BPF_MAP_CREATE related
* flags defined above.
*/
__u32 inner_map_fd; /* fd pointing to the inner map */
__u32 numa_node; /* numa node (effective only if
* BPF_F_NUMA_NODE is set).
*/
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
__u32 map_fd;
__aligned_u64 key;
union {
__aligned_u64 value;
__aligned_u64 next_key;
};
__u64 flags;
};
struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32 prog_type; /* one of enum bpf_prog_type */
__u32 insn_cnt;
__aligned_u64 insns;
__aligned_u64 license;
__u32 log_level; /* verbosity level of verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe */
__u32 prog_flags;
char prog_name[BPF_OBJ_NAME_LEN];
__u32 prog_ifindex; /* ifindex of netdev to prep for */
};
include/uapi/linux/bpf.h
22
userspacekernel
eBPF
Program
BPF_PROG_LOAD
int bpf(int cmd, union bpf_attr *attr, unsigned int size)
eBPF
Kernel
Program
23
Load an eBPF program into the kernel
char bpf_log_buf[LOG_BUF_SIZE];
Int bpf_prog_load(enum bpf_prog_type type,
const struct bpf_insn *insns,
int insn_cnt,
const char *license)
{
union bpf_attr attr = {
.prog_type = type,
.insns = ptr_to_u64(insns),
.insn_cnt = insn_cnt,
.license = ptr_to_u64(license),
.log_buf = ptr_to_u64(bpf_log_buf),
.log_size = LOG_BUF_SIZE,
.log_level = 1,
};
return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
24
Load an eBPF program into the kernel
char bpf_log_buf[LOG_BUF_SIZE];
Int bpf_prog_load(enum bpf_prog_type type,
const struct bpf_insn *insns,
int insn_cnt,
const char *license)
{
union bpf_attr attr = {
.prog_type = type,
.insns = ptr_to_u64(insns),
.insn_cnt = insn_cnt,
.license = ptr_to_u64(license),
.log_buf = ptr_to_u64(bpf_log_buf),
.log_size = LOG_BUF_SIZE,
.log_level = 1,
};
return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
prog_type is one of the available program types:
enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
/* Reserve 0 as invalid program type */
BPF_PROG_TYPE_SOCKET_FILTER,
BPF_PROG_TYPE_KPROBE,
BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT,
};
25
Load an eBPF program into the kernel
char bpf_log_buf[LOG_BUF_SIZE];
Int bpf_prog_load(enum bpf_prog_type type,
const struct bpf_insn *insns,
int insn_cnt,
const char *license)
{
union bpf_attr attr = {
.prog_type = type,
.insns = ptr_to_u64(insns),
.insn_cnt = insn_cnt,
.license = ptr_to_u64(license),
.log_buf = ptr_to_u64(bpf_log_buf),
.log_size = LOG_BUF_SIZE,
.log_level = 1,
};
return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
member of bpf_attr union for BPF_PROG_LOAD
struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32 prog_type; /* one of enum bpf_prog_type */
__u32 insn_cnt;
__aligned_u64 insns;
__aligned_u64 license;
__u32 log_level; /* verbosity level of verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe
(since Linux 4.1) */
};
26
eBPF
BPF bytecode Read Map
BPF bytecode Map
BPF_PROG_LOAD BPF_MAP_*
userspace
kernel
bpf_xxxx.c
27
Create a map with given type and
attributes
map_fd = bpf(BPF_MAP_CREATE, union bpf_attr
*attr, u32 size)
using attr->map_type, attr->key_size, attr-
>value_size, attr->max_entries
db20fd2b0108
bpf: add lookup/update/delete/iterate methods to BPF maps (v3.18-rc1)
28
Create a map with given type and
attributes
Userspace example:
int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries)
{
union bpf_attr attr = {
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries
};
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
--
99c55f7d47c0 bpf: introduce BPF syscall and maps (v3.18-rc1)
29
Create a map with given type and
attributes
Userspace example:
int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries)
{
union bpf_attr attr = {
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries
};
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
--
99c55f7d47c0 bpf: introduce BPF syscall and maps (v3.18-rc1)
member of bpf_attr union for BPF_MAP_CREATE
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
__u32 key_size; /* size of key in bytes */
__u32 value_size; /* size of value in bytes */
__u32 max_entries; /* max number of entries in a map */
};
30
lookup key in a given map
err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr
*attr, u32 size)
using attr->map_fd, attr->key, attr->value
db20fd2b0108
bpf: add lookup/update/delete/iterate methods to BPF maps (v3.18-rc1)
31
Find and delete element by key in a
given map
err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr
*attr, u32 size)
using attr->map_fd, attr->key
db20fd2b0108
bpf: add lookup/update/delete/iterate methods to BPF maps (v3.18-rc1)
32
lookup key in a given map
Userspace example:
int bpf_lookup_elem(int fd, const void *key, void *value)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
.value = ptr_to_u64(value),
};
return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}
int bpf_delete_elem(int fd, const void *key)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
};
return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
}
33
lookup key in a given map
Userspace example:
int bpf_lookup_elem(int fd, const void *key, void *value)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
.value = ptr_to_u64(value),
};
return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}
int bpf_delete_elem(int fd, const void *key)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
};
return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
}
include/uapi/linux/bpf.h
/* anonymous struct used by BPF_MAP_*_ELEM commands */
struct {
__u32 map_fd;
__aligned_u64 key;
union {
__aligned_u64 value;
__aligned_u64 next_key;
};
__u64 flags;
};
34
Create or update key/value pair in a
given map
err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr
*attr, u32 size)
using attr->map_fd, attr->key, attr->value
db20fd2b0108
bpf: add lookup/update/delete/iterate methods to BPF maps (v3.18-rc1)
35
Create or update key/value pair in a
given map
Userspace example:
int bpf_update_elem(int fd, const void *key, const void *value,
uint64_t flags)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
.value = ptr_to_u64(value),
.flags = flags,
};
return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}
36
Create or update key/value pair in a
given map
Userspace example:
int bpf_update_elem(int fd, const void *key, const void *value,
uint64_t flags)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
.value = ptr_to_u64(value),
.flags = flags,
};
return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}
include/uapi/linux/bpf.h
/* flags for BPF_MAP_UPDATE_ELEM command */
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */
37
Types of
ebpf maps
38
eBPF Map Types
• BPF_MAP_TYPE_UNSPEC
• BPF_MAP_TYPE_HASH
• BPF_MAP_TYPE_ARRAY
• BPF_MAP_TYPE_PROG_ARRAY
• BPF_MAP_TYPE_PERF_EVENT_ARRAY
• BPF_MAP_TYPE_PERCPU_HASH
• BPF_MAP_TYPE_PERCPU_ARRAY
• BPF_MAP_TYPE_STACK_TRACE
• BPF_MAP_TYPE_CGROUP_ARRAY
• BPF_MAP_TYPE_LRU_HASH
• BPF_MAP_TYPE_LRU_PERCPU_HASH
• BPF_MAP_TYPE_LPM_TRIE
• BPF_MAP_TYPE_ARRAY_OF_MAPS
• BPF_MAP_TYPE_HASH_OF_MAPS
• BPF_MAP_TYPE_DEVMAP
• BPF_MAP_TYPE_SOCKMAP
• BPF_MAP_TYPE_CPUMAP
39
eBPF Map Types
enum bpf_map_type in include/uapi/linux/bpf.h
/* v4.15 */
/* include/uapi/linux/bpf.h */
enum bpf_map_type {
BPF_MAP_TYPE_UNSPEC,
BPF_MAP_TYPE_HASH,
BPF_MAP_TYPE_ARRAY,
BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
BPF_MAP_TYPE_LRU_HASH,
BPF_MAP_TYPE_LRU_PERCPU_HASH,
BPF_MAP_TYPE_LPM_TRIE,
BPF_MAP_TYPE_ARRAY_OF_MAPS,
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
};
40
eBPF Map Types
enum bpf_map_type in include/uapi/linux/bpf.h
/* v4.15 */
/* include/uapi/linux/bpf.h */
enum bpf_map_type {
BPF_MAP_TYPE_UNSPEC,
BPF_MAP_TYPE_HASH,
BPF_MAP_TYPE_ARRAY,
BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
BPF_MAP_TYPE_LRU_HASH,
BPF_MAP_TYPE_LRU_PERCPU_HASH,
BPF_MAP_TYPE_LPM_TRIE,
BPF_MAP_TYPE_ARRAY_OF_MAPS,
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
};
Hash
Array
41
eBPF Map Types
• BPF_MAP_TYPE_UNSPEC
99c55f7d47c0 bpf: introduce BPF syscall and maps
(v3.18-rc1)
42
eBPF Map Types
• BPF_MAP_TYPE_HASH
0f8e4bd8a1fc bpf: add hashtable type of eBPF maps
(v3.19-rc1)
43
eBPF Map Types
• BPF_MAP_TYPE_ARRAY
28fbcfa08d8e bpf: add array type of eBPF maps (v3.19-rc1)
44
eBPF Map Types
• BPF_MAP_TYPE_PROG_ARRAY
04fd61ab36ec
bpf: allow bpf programs to tail-call other bpf programs (v4.2-rc1)
45
eBPF Map Types
• BPF_MAP_TYPE_PERF_EVENT_ARRAY
ea317b267e9d bpf: Add new bpf map type to store the
pointer to struct perf_event (v4.3-rc1)
46
eBPF Map Types
• BPF_MAP_TYPE_PERCPU_HASH
824bd0ce6c7c bpf: introduce
BPF_MAP_TYPE_PERCPU_HASH map (v4.6-rc1)
47
eBPF Map Types
• BPF_MAP_TYPE_PERCPU_ARRAY
a10423b87a7e bpf: introduce
BPF_MAP_TYPE_PERCPU_ARRAY map (v4.6-rc1)
48
How to use eBPF?
49
Linux Kernel >= 4.1
50
Kernel Config
• CONFIG_BPF=y
• CONFIG_BPF_SYSCALL=y
• CONFIG_BPF_JIT=y
• CONFIG_HAVE_BPF_JIT=y
• CONFIG_BPF_EVENTS=y
51
BPF ASM
Restricted C
52
LLVM >= 3.7
53
clang
llc:
--emit-llvm
--march=bpf
54
C code
LLVM
IR Bitcode
BPF Bytecodeclang llc
55
User Program
eBPF
userspace
kernel
eBPF MAP
Kernel
Program
As simple
as possible
Whatever you want
56
But !
57
BPF Compiler Collection
58
C & Python Library
Built-in BPF compiler
59
# zypper in python-bcc
Thank you.
60
Follow FB Group:
openSUSE Taiwan
61
Reference
● Documentation/networking/filter.txt
● http://www.brendangregg.com/blog/2015-05-15/ebpf-one-small-step.html
● https://suchakra.wordpress.com/2015/05/18/bpf-internals-i/
● https://suchakra.wordpress.com/2015/08/12/bpf-internals-ii/
● https://lkml.org/lkml/2013/9/30/627
● https://lwn.net/Articles/612878/
● https://lwn.net/Articles/650953/
● https://github.com/iovisor/bcc
● http://www.brendangregg.com/ebpf.html
● https://events.linuxfoundation.org/sites/events/files/slides/bpf_collabsummit_2015fe
● https://www.slideshare.net/suselab/ebpf-trace-from-kernel-to-userspace
● https://ferrisellis.com/posts/ebpf_syscall_and_maps/
Corporate Headquarters
Maxfeldstrasse 5
90409 Nuremberg
Germany
+49 911 740 53 0 (Worldwide)
www.suse.com
Join us on:
www.opensuse.org
63
Unpublished Work of SUSE. All Rights Reserved.
This work is an unpublished work and contains confidential, proprietary and trade secret information of SUSE.
Access to this work is restricted to SUSE employees who have a need to know to perform tasks within the scope of
their assignments. No part of this work may be practiced, performed, copied, distributed, revised, modified, translated,
abridged, condensed, expanded, collected, or adapted without the prior written consent of SUSE.
Any use or exploitation of this work without authorization could subject the perpetrator to criminal and civil liability.
General Disclaimer
This document is not to be construed as a promise by any participating company to develop, deliver, or market a
product. It is not a commitment to deliver any material, code, or functionality, and should not be relied upon in making
purchasing decisions. SUSE makes no representations or warranties with respect to the contents of this document,
and specifically disclaims any express or implied warranties of merchantability or fitness for any particular purpose. The
development, release, and timing of features or functionality described for SUSE products remains at the sole
discretion of SUSE. Further, SUSE reserves the right to revise this document and to make changes to its content, at
any time, without obligation to notify any person or entity of such revisions or changes. All SUSE marks referenced in
this presentation are trademarks or registered trademarks of Novell, Inc. in the United States and other countries. All
third-party trademarks are the property of their respective owners.

More Related Content

What's hot

High-Performance Networking Using eBPF, XDP, and io_uring
High-Performance Networking Using eBPF, XDP, and io_uringHigh-Performance Networking Using eBPF, XDP, and io_uring
High-Performance Networking Using eBPF, XDP, and io_uring
ScyllaDB
 

What's hot (20)

eBPF Basics
eBPF BasicseBPF Basics
eBPF Basics
 
eBPF Trace from Kernel to Userspace
eBPF Trace from Kernel to UserspaceeBPF Trace from Kernel to Userspace
eBPF Trace from Kernel to Userspace
 
Xdp and ebpf_maps
Xdp and ebpf_mapsXdp and ebpf_maps
Xdp and ebpf_maps
 
eBPF - Rethinking the Linux Kernel
eBPF - Rethinking the Linux KerneleBPF - Rethinking the Linux Kernel
eBPF - Rethinking the Linux Kernel
 
BPF - in-kernel virtual machine
BPF - in-kernel virtual machineBPF - in-kernel virtual machine
BPF - in-kernel virtual machine
 
Using eBPF for High-Performance Networking in Cilium
Using eBPF for High-Performance Networking in CiliumUsing eBPF for High-Performance Networking in Cilium
Using eBPF for High-Performance Networking in Cilium
 
Dpdk applications
Dpdk applicationsDpdk applications
Dpdk applications
 
High-Performance Networking Using eBPF, XDP, and io_uring
High-Performance Networking Using eBPF, XDP, and io_uringHigh-Performance Networking Using eBPF, XDP, and io_uring
High-Performance Networking Using eBPF, XDP, and io_uring
 
DPDK In Depth
DPDK In DepthDPDK In Depth
DPDK In Depth
 
Intel DPDK Step by Step instructions
Intel DPDK Step by Step instructionsIntel DPDK Step by Step instructions
Intel DPDK Step by Step instructions
 
Faster packet processing in Linux: XDP
Faster packet processing in Linux: XDPFaster packet processing in Linux: XDP
Faster packet processing in Linux: XDP
 
BPF / XDP 8월 세미나 KossLab
BPF / XDP 8월 세미나 KossLabBPF / XDP 8월 세미나 KossLab
BPF / XDP 8월 세미나 KossLab
 
Kubernetes Networking with Cilium - Deep Dive
Kubernetes Networking with Cilium - Deep DiveKubernetes Networking with Cilium - Deep Dive
Kubernetes Networking with Cilium - Deep Dive
 
EBPF and Linux Networking
EBPF and Linux NetworkingEBPF and Linux Networking
EBPF and Linux Networking
 
Linux Performance Analysis: New Tools and Old Secrets
Linux Performance Analysis: New Tools and Old SecretsLinux Performance Analysis: New Tools and Old Secrets
Linux Performance Analysis: New Tools and Old Secrets
 
Cilium - Container Networking with BPF & XDP
Cilium - Container Networking with BPF & XDPCilium - Container Networking with BPF & XDP
Cilium - Container Networking with BPF & XDP
 
Container Performance Analysis
Container Performance AnalysisContainer Performance Analysis
Container Performance Analysis
 
Linux BPF Superpowers
Linux BPF SuperpowersLinux BPF Superpowers
Linux BPF Superpowers
 
Performance Wins with eBPF: Getting Started (2021)
Performance Wins with eBPF: Getting Started (2021)Performance Wins with eBPF: Getting Started (2021)
Performance Wins with eBPF: Getting Started (2021)
 
Understanding DPDK
Understanding DPDKUnderstanding DPDK
Understanding DPDK
 

Similar to eBPF maps 101

Building Network Functions with eBPF & BCC
Building Network Functions with eBPF & BCCBuilding Network Functions with eBPF & BCC
Building Network Functions with eBPF & BCC
Kernel TLV
 
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
Teddy Hsiung
 
Opengl4 quick reference card
Opengl4 quick reference cardOpengl4 quick reference card
Opengl4 quick reference card
Adrien Wattez
 
The TCP/IP stack in the FreeBSD kernel COSCUP 2014
The TCP/IP stack in the FreeBSD kernel COSCUP 2014The TCP/IP stack in the FreeBSD kernel COSCUP 2014
The TCP/IP stack in the FreeBSD kernel COSCUP 2014
Kevin Lo
 

Similar to eBPF maps 101 (20)

OpenGL 4.5 Reference Card
OpenGL 4.5 Reference CardOpenGL 4.5 Reference Card
OpenGL 4.5 Reference Card
 
Building Network Functions with eBPF & BCC
Building Network Functions with eBPF & BCCBuilding Network Functions with eBPF & BCC
Building Network Functions with eBPF & BCC
 
Efficient System Monitoring in Cloud Native Environments
Efficient System Monitoring in Cloud Native EnvironmentsEfficient System Monitoring in Cloud Native Environments
Efficient System Monitoring in Cloud Native Environments
 
UM2019 Extended BPF: A New Type of Software
UM2019 Extended BPF: A New Type of SoftwareUM2019 Extended BPF: A New Type of Software
UM2019 Extended BPF: A New Type of Software
 
BPF Hardware Offload Deep Dive
BPF Hardware Offload Deep DiveBPF Hardware Offload Deep Dive
BPF Hardware Offload Deep Dive
 
Berkeley Packet Filters
Berkeley Packet FiltersBerkeley Packet Filters
Berkeley Packet Filters
 
Roll your own toy unix clone os
Roll your own toy unix clone osRoll your own toy unix clone os
Roll your own toy unix clone os
 
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
 
LSFMM 2019 BPF Observability
LSFMM 2019 BPF ObservabilityLSFMM 2019 BPF Observability
LSFMM 2019 BPF Observability
 
OpenGL 4.4 Reference Card
OpenGL 4.4 Reference CardOpenGL 4.4 Reference Card
OpenGL 4.4 Reference Card
 
Opengl4 quick reference card
Opengl4 quick reference cardOpengl4 quick reference card
Opengl4 quick reference card
 
OpenGL 4.6 Reference Guide
OpenGL 4.6 Reference GuideOpenGL 4.6 Reference Guide
OpenGL 4.6 Reference Guide
 
Hooking signals and dumping the callstack
Hooking signals and dumping the callstackHooking signals and dumping the callstack
Hooking signals and dumping the callstack
 
The TCP/IP stack in the FreeBSD kernel COSCUP 2014
The TCP/IP stack in the FreeBSD kernel COSCUP 2014The TCP/IP stack in the FreeBSD kernel COSCUP 2014
The TCP/IP stack in the FreeBSD kernel COSCUP 2014
 
Regular Expression (RegExp)
Regular Expression (RegExp)Regular Expression (RegExp)
Regular Expression (RegExp)
 
Qe Reference
Qe ReferenceQe Reference
Qe Reference
 
3D-DRESD Lorenzo Pavesi
3D-DRESD Lorenzo Pavesi3D-DRESD Lorenzo Pavesi
3D-DRESD Lorenzo Pavesi
 
Boost.Python: C++ and Python Integration
Boost.Python: C++ and Python IntegrationBoost.Python: C++ and Python Integration
Boost.Python: C++ and Python Integration
 
PVS-Studio 5.00, a solution for developers of modern resource-intensive appl...
PVS-Studio 5.00, a solution for developers of modern resource-intensive appl...PVS-Studio 5.00, a solution for developers of modern resource-intensive appl...
PVS-Studio 5.00, a solution for developers of modern resource-intensive appl...
 
Writing MySQL UDFs
Writing MySQL UDFsWriting MySQL UDFs
Writing MySQL UDFs
 

More from SUSE Labs Taipei

Develop and Maintain a Distro with Open Build Service
Develop and Maintain a Distro with Open Build ServiceDevelop and Maintain a Distro with Open Build Service
Develop and Maintain a Distro with Open Build Service
SUSE Labs Taipei
 

More from SUSE Labs Taipei (20)

Locked down openSUSE Tumbleweed kernel
Locked down openSUSE Tumbleweed kernelLocked down openSUSE Tumbleweed kernel
Locked down openSUSE Tumbleweed kernel
 
SUSE shim and things related to it
SUSE shim and things related to itSUSE shim and things related to it
SUSE shim and things related to it
 
Multi-signed Kernel Module
Multi-signed Kernel ModuleMulti-signed Kernel Module
Multi-signed Kernel Module
 
ACPI Debugging from Linux Kernel
ACPI Debugging from Linux KernelACPI Debugging from Linux Kernel
ACPI Debugging from Linux Kernel
 
Profiling the ACPICA Namespace and Event Handing
Profiling the ACPICA Namespace and Event HandingProfiling the ACPICA Namespace and Event Handing
Profiling the ACPICA Namespace and Event Handing
 
Kernel debug log and console on openSUSE
Kernel debug log and console on openSUSEKernel debug log and console on openSUSE
Kernel debug log and console on openSUSE
 
The bright future of SUSE and openSUSE
The bright future of SUSE and openSUSEThe bright future of SUSE and openSUSE
The bright future of SUSE and openSUSE
 
EFI Secure Key
EFI Secure KeyEFI Secure Key
EFI Secure Key
 
Convert your package to multibuild on Open Build Service
Convert your package to multibuild on Open Build ServiceConvert your package to multibuild on Open Build Service
Convert your package to multibuild on Open Build Service
 
Ixgbe internals
Ixgbe internalsIxgbe internals
Ixgbe internals
 
Linux Linux Traffic Control
Linux Linux Traffic ControlLinux Linux Traffic Control
Linux Linux Traffic Control
 
Looking into trusted and encrypted keys
Looking into trusted and encrypted keysLooking into trusted and encrypted keys
Looking into trusted and encrypted keys
 
Use bonding driver with ethernet
Use bonding driver with ethernetUse bonding driver with ethernet
Use bonding driver with ethernet
 
Use build service API in your program
Use build service API in your programUse build service API in your program
Use build service API in your program
 
Hands-on ethernet driver
Hands-on ethernet driverHands-on ethernet driver
Hands-on ethernet driver
 
S4 sig-check-lpc-20130918
S4 sig-check-lpc-20130918S4 sig-check-lpc-20130918
S4 sig-check-lpc-20130918
 
openSUSE12.2 Review
openSUSE12.2 ReviewopenSUSE12.2 Review
openSUSE12.2 Review
 
oS KDE Repos & MM
oS KDE Repos & MMoS KDE Repos & MM
oS KDE Repos & MM
 
Develop and Maintain a Distro with Open Build Service
Develop and Maintain a Distro with Open Build ServiceDevelop and Maintain a Distro with Open Build Service
Develop and Maintain a Distro with Open Build Service
 
Coscup 2012-urfkill
Coscup 2012-urfkillCoscup 2012-urfkill
Coscup 2012-urfkill
 

Recently uploaded

CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
anilsa9823
 
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
Health
 

Recently uploaded (20)

A Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docxA Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docx
 
How To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsHow To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.js
 
Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...
Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...
Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...
 
Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...
Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...
Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...
 
Vip Call Girls Noida ➡️ Delhi ➡️ 9999965857 No Advance 24HRS Live
Vip Call Girls Noida ➡️ Delhi ➡️ 9999965857 No Advance 24HRS LiveVip Call Girls Noida ➡️ Delhi ➡️ 9999965857 No Advance 24HRS Live
Vip Call Girls Noida ➡️ Delhi ➡️ 9999965857 No Advance 24HRS Live
 
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdfLearn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
 
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
 
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
 
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
 
Software Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsSoftware Quality Assurance Interview Questions
Software Quality Assurance Interview Questions
 
SyndBuddy AI 2k Review 2024: Revolutionizing Content Syndication with AI
SyndBuddy AI 2k Review 2024: Revolutionizing Content Syndication with AISyndBuddy AI 2k Review 2024: Revolutionizing Content Syndication with AI
SyndBuddy AI 2k Review 2024: Revolutionizing Content Syndication with AI
 
Microsoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdfMicrosoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdf
 
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
 
How To Troubleshoot Collaboration Apps for the Modern Connected Worker
How To Troubleshoot Collaboration Apps for the Modern Connected WorkerHow To Troubleshoot Collaboration Apps for the Modern Connected Worker
How To Troubleshoot Collaboration Apps for the Modern Connected Worker
 
The Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdfThe Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdf
 
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
 
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
 
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
 
Hand gesture recognition PROJECT PPT.pptx
Hand gesture recognition PROJECT PPT.pptxHand gesture recognition PROJECT PPT.pptx
Hand gesture recognition PROJECT PPT.pptx
 
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
 

eBPF maps 101

  • 1. eBPF maps 101 AL Cho SUSE Labs / Software Engineer acho@suse.com 2018-04-22
  • 3. 3 The BSD Packet Filter: A New Architecture for User-level Packet Capture December 19, 1992
  • 5. 5 BPF ASM ldh [12] jeq #0x806 jt 2 jf 3 ret #262144 ret #0 # tcpdump -p -ni wlan0 -d "arp"
  • 6. 6 BPF Bytecode struct sock_filter code[] = { { 0x28, 0, 0, 0x0000000c }, { 0x15, 0, 1, 0x00000806 }, { 0x6, 0, 0, 0x00040000 }, { 0x6, 0, 0, 0x00000000 }, }; # tcpdump -i wlan0 arp -dd
  • 7. 7 BPF JIT (Just In Time compiler) A JIT for packet filters https://lwn.net/Articles/437981/
  • 9. 9 $ find arch/ -name bpf_jit* arch/arm/net/bpf_jit_32.c arch/arm/net/bpf_jit_32.h arch/arm64/net/bpf_jit.h arch/arm64/net/bpf_jit_comp.c arch/mips/net/bpf_jit.h arch/mips/net/bpf_jit_asm.S arch/mips/net/bpf_jit.c arch/powerpc/net/bpf_jit.h arch/powerpc/net/bpf_jit32.h arch/powerpc/net/bpf_jit64.h arch/powerpc/net/bpf_jit_asm.S arch/powerpc/net/bpf_jit_asm64.S arch/powerpc/net/bpf_jit_comp.c arch/powerpc/net/bpf_jit_comp64.c arch/s390/net/bpf_jit.S arch/s390/net/bpf_jit.h arch/s390/net/bpf_jit_comp.c arch/sparc/net/bpf_jit.h arch/sparc/net/bpf_jit_asm.S arch/sparc/net/bpf_jit_comp.c arch/x86/net/bpf_jit.S arch/x86/net/bpf_jit_comp.c
  • 14. 14 99c55f7d47c0 bpf: introduce BPF syscall and maps v3.18
  • 15. 15 ebpf - The Linux bpf syscall • kernel/bpf/syscall.c: The Linux kernel code related to the bpf syscall. • include/uapi/linux/bpf.h: The bpf header file for assisting in using the bpf syscall.
  • 17. 17 bpf() system call 'maps' is a generic key/vaule storage of different types for sharing data between kernel and userspace.
  • 19. 19 bpf() system call From man-page bpf(2): NAME bpf - perform a command on an extended BPF map or program SYNOPSIS #include <linux/bpf.h> int bpf(int cmd, union bpf_attr *attr, unsigned int size); include/linux/syscalls.h asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); (749730ce42a2 bpf: enable bpf syscall on x64 and i386 (v3.18-rc1))
  • 20. 20 eBPF commands include/uapi/linux/bpf.h enum bpf_cmd { BPF_MAP_CREATE, BPF_MAP_LOOKUP_ELEM, BPF_MAP_UPDATE_ELEM, BPF_MAP_DELETE_ELEM, BPF_MAP_GET_NEXT_KEY, BPF_PROG_LOAD, BPF_OBJ_PIN, BPF_OBJ_GET, BPF_PROG_ATTACH, BPF_PROG_DETACH, };
  • 21. 21 union bpf_attr struct { /* anonymous struct used by BPF_OBJ_* commands */ __aligned_u64 pathname; __u32 bpf_fd; __u32 file_flags; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ __u32 target_fd; /* container object to attach to */ __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ __u32 prog_fd; __u32 retval; __u32 data_size_in; __u32 data_size_out; __aligned_u64 data_in; __aligned_u64 data_out; __u32 repeat; __u32 duration; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ union { __u32 start_id; __u32 prog_id; __u32 map_id; }; __u32 next_id; __u32 open_flags; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ __u32 bpf_fd; __u32 info_len; __aligned_u64 info; } info; struct { /* anonymous struct used by BPF_PROG_QUERY command */ __u32 target_fd; /* container object to query */ __u32 attach_type; __u32 query_flags; __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; } query; } __attribute__((aligned(8))); union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ __u32 key_size; /* size of key in bytes */ __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ __u32 map_flags; /* BPF_MAP_CREATE related * flags defined above. */ __u32 inner_map_fd; /* fd pointing to the inner map */ __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ char map_name[BPF_OBJ_NAME_LEN]; __u32 map_ifindex; /* ifindex of netdev to create on */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ __u32 map_fd; __aligned_u64 key; union { __aligned_u64 value; __aligned_u64 next_key; }; __u64 flags; }; struct { /* anonymous struct used by BPF_PROG_LOAD command */ __u32 prog_type; /* one of enum bpf_prog_type */ __u32 insn_cnt; __aligned_u64 insns; __aligned_u64 license; __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; __u32 prog_ifindex; /* ifindex of netdev to prep for */ }; include/uapi/linux/bpf.h
  • 22. 22 userspacekernel eBPF Program BPF_PROG_LOAD int bpf(int cmd, union bpf_attr *attr, unsigned int size) eBPF Kernel Program
  • 23. 23 Load an eBPF program into the kernel char bpf_log_buf[LOG_BUF_SIZE]; Int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, int insn_cnt, const char *license) { union bpf_attr attr = { .prog_type = type, .insns = ptr_to_u64(insns), .insn_cnt = insn_cnt, .license = ptr_to_u64(license), .log_buf = ptr_to_u64(bpf_log_buf), .log_size = LOG_BUF_SIZE, .log_level = 1, }; return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); }
  • 24. 24 Load an eBPF program into the kernel char bpf_log_buf[LOG_BUF_SIZE]; Int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, int insn_cnt, const char *license) { union bpf_attr attr = { .prog_type = type, .insns = ptr_to_u64(insns), .insn_cnt = insn_cnt, .license = ptr_to_u64(license), .log_buf = ptr_to_u64(bpf_log_buf), .log_size = LOG_BUF_SIZE, .log_level = 1, }; return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } prog_type is one of the available program types: enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, /* Reserve 0 as invalid program type */ BPF_PROG_TYPE_SOCKET_FILTER, BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, };
  • 25. 25 Load an eBPF program into the kernel char bpf_log_buf[LOG_BUF_SIZE]; Int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, int insn_cnt, const char *license) { union bpf_attr attr = { .prog_type = type, .insns = ptr_to_u64(insns), .insn_cnt = insn_cnt, .license = ptr_to_u64(license), .log_buf = ptr_to_u64(bpf_log_buf), .log_size = LOG_BUF_SIZE, .log_level = 1, }; return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } member of bpf_attr union for BPF_PROG_LOAD struct { /* anonymous struct used by BPF_PROG_LOAD command */ __u32 prog_type; /* one of enum bpf_prog_type */ __u32 insn_cnt; __aligned_u64 insns; __aligned_u64 license; __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe (since Linux 4.1) */ };
  • 26. 26 eBPF BPF bytecode Read Map BPF bytecode Map BPF_PROG_LOAD BPF_MAP_* userspace kernel bpf_xxxx.c
  • 27. 27 Create a map with given type and attributes map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size) using attr->map_type, attr->key_size, attr- >value_size, attr->max_entries db20fd2b0108 bpf: add lookup/update/delete/iterate methods to BPF maps (v3.18-rc1)
  • 28. 28 Create a map with given type and attributes Userspace example: int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries) { union bpf_attr attr = { .map_type = map_type, .key_size = key_size, .value_size = value_size, .max_entries = max_entries }; return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } -- 99c55f7d47c0 bpf: introduce BPF syscall and maps (v3.18-rc1)
  • 29. 29 Create a map with given type and attributes Userspace example: int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries) { union bpf_attr attr = { .map_type = map_type, .key_size = key_size, .value_size = value_size, .max_entries = max_entries }; return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } -- 99c55f7d47c0 bpf: introduce BPF syscall and maps (v3.18-rc1) member of bpf_attr union for BPF_MAP_CREATE struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ __u32 key_size; /* size of key in bytes */ __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ };
  • 30. 30 lookup key in a given map err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value db20fd2b0108 bpf: add lookup/update/delete/iterate methods to BPF maps (v3.18-rc1)
  • 31. 31 Find and delete element by key in a given map err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key db20fd2b0108 bpf: add lookup/update/delete/iterate methods to BPF maps (v3.18-rc1)
  • 32. 32 lookup key in a given map Userspace example: int bpf_lookup_elem(int fd, const void *key, void *value) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), }; return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } int bpf_delete_elem(int fd, const void *key) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), }; return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); }
  • 33. 33 lookup key in a given map Userspace example: int bpf_lookup_elem(int fd, const void *key, void *value) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), }; return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } int bpf_delete_elem(int fd, const void *key) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), }; return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); } include/uapi/linux/bpf.h /* anonymous struct used by BPF_MAP_*_ELEM commands */ struct { __u32 map_fd; __aligned_u64 key; union { __aligned_u64 value; __aligned_u64 next_key; }; __u64 flags; };
  • 34. 34 Create or update key/value pair in a given map err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value db20fd2b0108 bpf: add lookup/update/delete/iterate methods to BPF maps (v3.18-rc1)
  • 35. 35 Create or update key/value pair in a given map Userspace example: int bpf_update_elem(int fd, const void *key, const void *value, uint64_t flags) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), .flags = flags, }; return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); }
  • 36. 36 Create or update key/value pair in a given map Userspace example: int bpf_update_elem(int fd, const void *key, const void *value, uint64_t flags) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), .flags = flags, }; return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); } include/uapi/linux/bpf.h /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */
  • 38. 38 eBPF Map Types • BPF_MAP_TYPE_UNSPEC • BPF_MAP_TYPE_HASH • BPF_MAP_TYPE_ARRAY • BPF_MAP_TYPE_PROG_ARRAY • BPF_MAP_TYPE_PERF_EVENT_ARRAY • BPF_MAP_TYPE_PERCPU_HASH • BPF_MAP_TYPE_PERCPU_ARRAY • BPF_MAP_TYPE_STACK_TRACE • BPF_MAP_TYPE_CGROUP_ARRAY • BPF_MAP_TYPE_LRU_HASH • BPF_MAP_TYPE_LRU_PERCPU_HASH • BPF_MAP_TYPE_LPM_TRIE • BPF_MAP_TYPE_ARRAY_OF_MAPS • BPF_MAP_TYPE_HASH_OF_MAPS • BPF_MAP_TYPE_DEVMAP • BPF_MAP_TYPE_SOCKMAP • BPF_MAP_TYPE_CPUMAP
  • 39. 39 eBPF Map Types enum bpf_map_type in include/uapi/linux/bpf.h /* v4.15 */ /* include/uapi/linux/bpf.h */ enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_CGROUP_ARRAY, BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_SOCKMAP, BPF_MAP_TYPE_CPUMAP, };
  • 40. 40 eBPF Map Types enum bpf_map_type in include/uapi/linux/bpf.h /* v4.15 */ /* include/uapi/linux/bpf.h */ enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_CGROUP_ARRAY, BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_SOCKMAP, BPF_MAP_TYPE_CPUMAP, }; Hash Array
  • 41. 41 eBPF Map Types • BPF_MAP_TYPE_UNSPEC 99c55f7d47c0 bpf: introduce BPF syscall and maps (v3.18-rc1)
  • 42. 42 eBPF Map Types • BPF_MAP_TYPE_HASH 0f8e4bd8a1fc bpf: add hashtable type of eBPF maps (v3.19-rc1)
  • 43. 43 eBPF Map Types • BPF_MAP_TYPE_ARRAY 28fbcfa08d8e bpf: add array type of eBPF maps (v3.19-rc1)
  • 44. 44 eBPF Map Types • BPF_MAP_TYPE_PROG_ARRAY 04fd61ab36ec bpf: allow bpf programs to tail-call other bpf programs (v4.2-rc1)
  • 45. 45 eBPF Map Types • BPF_MAP_TYPE_PERF_EVENT_ARRAY ea317b267e9d bpf: Add new bpf map type to store the pointer to struct perf_event (v4.3-rc1)
  • 46. 46 eBPF Map Types • BPF_MAP_TYPE_PERCPU_HASH 824bd0ce6c7c bpf: introduce BPF_MAP_TYPE_PERCPU_HASH map (v4.6-rc1)
  • 47. 47 eBPF Map Types • BPF_MAP_TYPE_PERCPU_ARRAY a10423b87a7e bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map (v4.6-rc1)
  • 48. 48 How to use eBPF?
  • 50. 50 Kernel Config • CONFIG_BPF=y • CONFIG_BPF_SYSCALL=y • CONFIG_BPF_JIT=y • CONFIG_HAVE_BPF_JIT=y • CONFIG_BPF_EVENTS=y
  • 54. 54 C code LLVM IR Bitcode BPF Bytecodeclang llc
  • 58. 58 C & Python Library Built-in BPF compiler
  • 59. 59 # zypper in python-bcc
  • 60. Thank you. 60 Follow FB Group: openSUSE Taiwan
  • 61. 61 Reference ● Documentation/networking/filter.txt ● http://www.brendangregg.com/blog/2015-05-15/ebpf-one-small-step.html ● https://suchakra.wordpress.com/2015/05/18/bpf-internals-i/ ● https://suchakra.wordpress.com/2015/08/12/bpf-internals-ii/ ● https://lkml.org/lkml/2013/9/30/627 ● https://lwn.net/Articles/612878/ ● https://lwn.net/Articles/650953/ ● https://github.com/iovisor/bcc ● http://www.brendangregg.com/ebpf.html ● https://events.linuxfoundation.org/sites/events/files/slides/bpf_collabsummit_2015fe ● https://www.slideshare.net/suselab/ebpf-trace-from-kernel-to-userspace ● https://ferrisellis.com/posts/ebpf_syscall_and_maps/
  • 62.
  • 63. Corporate Headquarters Maxfeldstrasse 5 90409 Nuremberg Germany +49 911 740 53 0 (Worldwide) www.suse.com Join us on: www.opensuse.org 63
  • 64. Unpublished Work of SUSE. All Rights Reserved. This work is an unpublished work and contains confidential, proprietary and trade secret information of SUSE. Access to this work is restricted to SUSE employees who have a need to know to perform tasks within the scope of their assignments. No part of this work may be practiced, performed, copied, distributed, revised, modified, translated, abridged, condensed, expanded, collected, or adapted without the prior written consent of SUSE. Any use or exploitation of this work without authorization could subject the perpetrator to criminal and civil liability. General Disclaimer This document is not to be construed as a promise by any participating company to develop, deliver, or market a product. It is not a commitment to deliver any material, code, or functionality, and should not be relied upon in making purchasing decisions. SUSE makes no representations or warranties with respect to the contents of this document, and specifically disclaims any express or implied warranties of merchantability or fitness for any particular purpose. The development, release, and timing of features or functionality described for SUSE products remains at the sole discretion of SUSE. Further, SUSE reserves the right to revise this document and to make changes to its content, at any time, without obligation to notify any person or entity of such revisions or changes. All SUSE marks referenced in this presentation are trademarks or registered trademarks of Novell, Inc. in the United States and other countries. All third-party trademarks are the property of their respective owners.

Editor's Notes

  1. “ &amp;apos;union bpf_attr&amp;apos; is backwards compatible with future extensions.” From 99c55f7d47c0 bpf: introduce BPF syscall and maps (v3.18-rc1) “This is a C union which allows for different C structs to be passed to the bpf syscall depending on which command is being used. The code for it can be found in the include/uapi/linux/bpf.h file of the Linux kernel. The relevant C struct from this C union will be included in code examples that use the bpf_attr union so readers can see the form of the struct being used.” From https://qmonnet.github.io/whirl-offload/2016/09/01/dive-into-bpf/
  2. The return value for this command is a new file descriptor associated with this eBPF program.
  3. The return value for this command is a new file descriptor associated with this eBPF program. prog_type is one of the available program types: For further details of eBPF program types, see below. The remaining fields of bpf_attr are set as follows: * insns is an array of struct bpf_insn instructions. * insn_cnt is the number of instructions in the program referred to by insns. * license is a license string, which must be GPL compatible to call helper functions marked gpl_only. (The licensing rules are the same as for kernel modules, so that also dual licenses, such as &amp;quot;Dual BSD/GPL&amp;quot;, may be used.) * log_buf is a pointer to a caller-allocated buffer in which the in-kernel verifier can store the verification log. This log is a multi-line string that can be checked by the program author in order to understand how the verifier came to the conclusion that the eBPF program is unsafe. The format of the output can change at any time as the verifier evolves. * log_size size of the buffer pointed to by log_buf. If the size of the buffer is not large enough to store all verifier messages, -1 is returned and errno is set to ENOSPC. * log_level verbosity level of the verifier. A value of zero means that the verifier will not provide a log; in this case, log_buf must be a NULL pointer, and log_size must be zero.
  4. The return value for this command is a new file descriptor associated with this eBPF program. prog_type is one of the available program types: For further details of eBPF program types, see below. The remaining fields of bpf_attr are set as follows: * insns is an array of struct bpf_insn instructions. * insn_cnt is the number of instructions in the program referred to by insns. * license is a license string, which must be GPL compatible to call helper functions marked gpl_only. (The licensing rules are the same as for kernel modules, so that also dual licenses, such as &amp;quot;Dual BSD/GPL&amp;quot;, may be used.) * log_buf is a pointer to a caller-allocated buffer in which the in-kernel verifier can store the verification log. This log is a multi-line string that can be checked by the program author in order to understand how the verifier came to the conclusion that the eBPF program is unsafe. The format of the output can change at any time as the verifier evolves. * log_size size of the buffer pointed to by log_buf. If the size of the buffer is not large enough to store all verifier messages, -1 is returned and errno is set to ENOSPC. * log_level verbosity level of the verifier. A value of zero means that the verifier will not provide a log; in this case, log_buf must be a NULL pointer, and log_size must be zero.
  5. On success, this operation returns a file descriptor. On error, -1 is returned and errno is set to EINVAL, EPERM, or ENOMEM.
  6. //// member of bpf_attr union for BPF_MAP_CREATE // // struct { /* anonymous struct used by BPF_MAP_CREATE command */ // __u32 map_type; /* one of enum bpf_map_type */ // __u32 key_size; /* size of key in bytes */ // __u32 value_size; /* size of value in bytes */ // __u32 max_entries; /* max number of entries in a map */ // __u32 map_flags; /* prealloc or not */ // };
  7. //// member of bpf_attr union for BPF_MAP_CREATE // // struct { /* anonymous struct used by BPF_MAP_CREATE command */ // __u32 map_type; /* one of enum bpf_map_type */ // __u32 key_size; /* size of key in bytes */ // __u32 value_size; /* size of value in bytes */ // __u32 max_entries; /* max number of entries in a map */ // __u32 map_flags; /* prealloc or not */ // };
  8. If an element is found, the operation returns zero and stores the element&amp;apos;s value into value, which must point to a buffer of value_size bytes. If no element is found, the operation returns -1 and sets errno to ENOENT.
  9. On success, zero is returned. If the element is not found, -1 is returned and errno is set to ENOENT.
  10. [BPF_MAP_LOOKUP_ELEM] The BPF_MAP_LOOKUP_ELEM command looks up an element with a given key in the map referred to by the file descriptor fd. If an element is found, the operation returns zero and stores the element&amp;apos;s value into value, which must point to a buffer of value_size bytes. [BPF_MAP_DELETE_ELEM] On success, zero is returned. If the element is not found, -1 is returned and errno is set to ENOENT.
  11. The BPF_MAP_LOOKUP_ELEM command looks up an element with a given key in the map referred to by the file descriptor fd. If an element is found, the operation returns zero and stores the element&amp;apos;s value into value, which must point to a buffer of value_size bytes.
  12. On success, the operation returns zero. On error, -1 is returned and errno is set to EINVAL, EPERM, ENOMEM, or E2BIG. E2BIG indicates that the number of elements in the map reached the max_entries limit specified at map creation time. EEXIST will be returned if flags specifies BPF_NOEXIST and the element with key already exists in the map. ENOENT will be returned if flags specifies BPF_EXIST and the element with key doesn&amp;apos;t exist in the map.
  13. //// member of bpf_attr union for BPF_MAP_LOOKUP_ELEM, BPF_MAP_UPDATE_ELEM, //// and BPF_MAP_DELETE_ELEM // // struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ // __u32 map_fd; // __aligned_u64 key; // union { // __aligned_u64 value; // __aligned_u64 next_key; // }; // __u64 flags; // };
  14. The BPF_MAP_UPDATE_ELEM command also allows a flag to be specified which communicates the desired action relative to if a key does or doesn’t already exist when the update action is called. As you can see from the comments in the code block below, this command really is three: set value if no prior value exists, set value only if prior value exists, or set value regardless of if prior value exists.
  15. 11 different types as of Linux kernel 4.11. 16 different types as of Linux kernel 4.15. Along with this, the Linux kernel reserves the first C enum option as BPF_MAP_TYPE_UNSPEC to ensure that zero isn’t a valid map type. Presumably, this is in case zero, with it’s many forms in C, does not accidentally get passed as the map types argument.
  16. 11 different types as of Linux kernel 4.11. 16 different types as of Linux kernel 4.15. Along with this, the Linux kernel reserves the first C enum option as BPF_MAP_TYPE_UNSPEC to ensure that zero isn’t a valid map type. Presumably, this is in case zero, with it’s many forms in C, does not accidentally get passed as the map types argument.
  17. Along with this, the Linux kernel reserves the first C enum option as BPF_MAP_TYPE_UNSPEC to ensure that zero isn’t a valid map type. Presumably, this is in case zero, with it’s many forms in C, does not accidentally get passed as the map types argument. enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, /* Reserve 0 as invalid map type */ BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, };
  18. BPF_MAP_TYPE_HASH Hash-table maps have the following characteristics: * Maps are created and destroyed by user-space programs. Both user-space and eBPF programs can perform lookup, update, and delete operations. * The kernel takes care of allocating and freeing key/value pairs. * The map_update_elem() helper with fail to insert new element when the max_entries limit is reached. (This ensures that eBPF programs cannot exhaust memory.) * map_update_elem() replaces existing elements atomically. Hash-table maps are optimized for speed of lookup.
  19. This function like the hash-table type above except it indexes the entries like an array, meaning for a map with n elements you can only use indexes 0 to n-1.
  20. With it’s release in August 2015, version 4.2 of the Linux kernel added the eBPF-map type BPF_MAP_TYPE_PROG_ARRAY. This is one of the more interesting eBPF-map types because it allows tail calling of eBPF programs! And, as you may have guessed, the BPF_MAP_TYPE_PROG_ARRAY holds file descriptors of loaded eBPF programs as its values. The man page states that, as of this writing, both the key and value must be 4 bytes in size. Thus the common thing to do is use numbers to identify the different eBPF program types. With this pattern also comes the bpf_tail_call helper function. This function can be invoked by an eBPF program to lookup a program from an eBPF-map of type BPF_MAP_TYPE_PROG_ARRAY with a given key and then jump into that function
  21. With version 4.4 of the Linux kernel, released in January 2016, eBPF was integrated into the perf tooling system. For those unfamiliar with it, perf is a tool in Linux that can be used for a wide swath of performance monitoring including CPU performance counters, tracepoints, kprobes, and uprobes (dynamic tracing). The usage of BPF_MAP_TYPE_PERF_EVENT_ARRAY isn’t super clear as there appear to be only two of examples of directly using it. Both of these can be found in the following locations within the Linux kernel repository. samples/bpf/tracex6_*.c: These two files (tracex6_kern.c and tracex6_user.c) form a simplistic example. However, due to lack of comments or documentation, understanding its function isn’t obvious. The commit message for the example’s creation, done by Kaixu Xia, states the example “shows how to use the new ability to get the selected Hardware PMU counter value”. samples/bpf/trace_output_*.c: These two files (trace_output_kern.c and trace_output_user.c) form the more complex example. This one also lacks code documentation. The commit message for this example’s creation, done by Alexei Starovoitov, states that a “kprobe is attached to sys_write() and trivial bpf program streams pid+cookie into userspace via PERF_COUNT_SW_BPF_OUTPUT event”.
  22. With its release in May 2016, version 4.6 of the Linux kernel added the eBPF-map types BPF_MAP_TYPE_PERCPU_HASH and BPF_MAP_TYPE_PERCPU_ARRAY. These two are nearly identical to BPF_MAP_TYPE_HASH and BPF_MAP_TYPE_ARRAY except that one is created for each CPU core. This allows for lock free uses of hash-tables and arrays in eBPF for high performance needs. Though, of course, it must be an application where the divided results can be reconciled in the end. There are a few, minor, technical details about the per-cpu eBPF-map types. For those interested in the details of them check out the commits for each below: BPF_MAP_TYPE_PERCPU_HASH, initial commit 824bd0ce6c7c43a9e1e210abf124958e54d88342 BPF_MAP_TYPE_PERCPU_ARRAY, initial commit a10423b87a7eae75da79ce80a8d9475047a674ee
  23. With its release in May 2016, version 4.6 of the Linux kernel added the eBPF-map types BPF_MAP_TYPE_PERCPU_HASH and BPF_MAP_TYPE_PERCPU_ARRAY. These two are nearly identical to BPF_MAP_TYPE_HASH and BPF_MAP_TYPE_ARRAY except that one is created for each CPU core. This allows for lock free uses of hash-tables and arrays in eBPF for high performance needs. Though, of course, it must be an application where the divided results can be reconciled in the end. There are a few, minor, technical details about the per-cpu eBPF-map types. For those interested in the details of them check out the commits for each below: BPF_MAP_TYPE_PERCPU_HASH, initial commit 824bd0ce6c7c43a9e1e210abf124958e54d88342 BPF_MAP_TYPE_PERCPU_ARRAY, initial commit a10423b87a7eae75da79ce80a8d9475047a674ee