问题描述:在小包场景下,相比于v4 fnat,fnat64存在明显的性能下降约30%。基于dperf压测工具和物理机mlx25G网卡进行性能测试,在cps900k,cc10,pkg size 84情况下,测出fnat v4 pps约为620w,fnat64 pps约为470w。
问题分析:结合火焰图进行分析,fnat v4出向dp_vs_out_xmit_fnat函数占比仅0.23%,而fnat64出向过程dp_vs_out_xmit_fnat函数占比1.29%。,如下图所示。
从代码分析看,fnat64出向过程中,通过查路由表及走协议栈进行转发,而fnat v4大部分通过fast path卸载到网卡,将数据包直接转发到网关下一跳。代码如下:
static int __dp_vs_out_xmit_fnat4(struct dp_vs_proto *proto,
struct dp_vs_conn *conn,
struct rte_mbuf *mbuf)
{
struct flow4 fl4;
struct rte_ipv4_hdr *iph = ip4_hdr(mbuf);
struct route_entry *rt;
int err, mtu;
if (!fast_xmit_close && !(conn->flags & DPVS_CONN_F_NOFASTXMIT)) {
dp_vs_save_outxmit_info(mbuf, proto, conn);
if (!dp_vs_fast_outxmit_fnat(AF_INET, proto, conn, mbuf)) {
return EDPVS_OK;
}
}
...
}
static int __dp_vs_out_xmit_fnat46(struct dp_vs_proto *proto,
struct dp_vs_conn *conn,
struct rte_mbuf *mbuf)
{
struct flow6 fl6;
struct rte_ipv4_hdr *ip4h = ip4_hdr(mbuf);
uint32_t pkt_len;
struct route6 *rt6;
int err, mtu;
/*
* drop old route. just for safe, because
* FNAT is PRE_ROUTING, should not have route.
*/
if (unlikely(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE) != NULL)) {
RTE_LOG(WARNING, IPVS, "%s: FNAT have route %p ?\n", __func__,
MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE));
route4_put(MBUF_USERDATA(mbuf, struct route_entry *, MBUF_FIELD_ROUTE));
}
memset(&fl6, 0, sizeof(struct flow6));
fl6.fl6_daddr = conn->caddr.in6;
fl6.fl6_saddr = conn->vaddr.in6;
rt6 = route6_output(mbuf, &fl6);
if (!rt6) {
err = EDPVS_NOROUTE;
goto errout;
}
/*
* didn't cache the pointer to rt
* or route can't be deleted when there is conn ref
* this is for neighbour confirm
*/
// 计算下一跳
dp_vs_conn_cache_rt6(conn, rt6, false);
/*
* mbuf is from IPv6, icmp should send by icmp6
* ext_hdr and
*/
// 计算mtu
mtu = rt6->rt6_mtu;
pkt_len = mbuf_nat4to6_len(mbuf);
if (pkt_len > mtu
&& (ip4h->fragment_offset & htons(RTE_IPV4_HDR_DF_FLAG))) {
RTE_LOG(DEBUG, IPVS, "%s: frag needed.\n", __func__);
icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(mtu));
err = EDPVS_FRAG;
goto errout;
}
MBUF_USERDATA(mbuf, struct route6 *, MBUF_FIELD_ROUTE) = rt6;
/* after route lookup and before translation */
if (xmit_ttl) {
if (unlikely(ip4h->time_to_live <= 1)) {
icmp_send(mbuf, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
err = EDPVS_DROP;
goto errout;
}
ip4h->time_to_live--;
}
/* pre-handler before translation */
if (proto->fnat_out_pre_handler) {
err = proto->fnat_out_pre_handler(proto, conn, mbuf);
if (err != EDPVS_OK)
goto errout;
}
/* L3 translation before l4 re-csum */
err = mbuf_4to6(mbuf, &conn->vaddr.in6, &conn->caddr.in6);
if (err)
goto errout;
/* L4 FNAT translation */
if (proto->fnat_out_handler) {
err = proto->fnat_out_handler(proto, conn, mbuf);
if (err != EDPVS_OK)
goto errout;
}
return INET_HOOK(AF_INET6, INET_HOOK_LOCAL_OUT, mbuf,
NULL, rt6->rt6_dev, ip6_output);
errout:
if (rt6)
route6_put(rt6);
rte_pktmbuf_free(mbuf);
return err;
}
解决方案:
针对于上述问题,将fnat64出向过程简化:首先,将ipv4头替换为ipv6头;然后,直接由网卡转发到下一跳即网关。实现代码如下:
static int __dp_vs_out_xmit_fnat46(struct dp_vs_proto *proto,
struct dp_vs_conn *conn,
struct rte_mbuf *mbuf)
{
struct flow6 fl6;
struct rte_ipv4_hdr *ip4h = ip4_hdr(mbuf);
uint32_t pkt_len;
struct route6 *rt6;
int err, mtu;
if (!fast_xmit_close && !(conn->flags & DPVS_CONN_F_NOFASTXMIT)) {
dp_vs_save_outxmit_info(mbuf, proto, conn);
err = __dp_vs_fast_outxmit_fnat4to6(proto, conn, mbuf);
if (!err) {
return EDPVS_OK;
} else {
RTE_LOG(ERR, IPVS, "err: %d.\n", err);
}
}
...
}
static int __dp_vs_fast_outxmit_fnat4to6(struct dp_vs_proto *proto,
struct dp_vs_conn *conn,
struct rte_mbuf *mbuf)
{
struct rte_ether_hdr *eth;
uint16_t packet_type = RTE_ETHER_TYPE_IPV6;
int err;
if (unlikely(conn->out_dev == NULL))
return EDPVS_NOROUTE;
if (unlikely(rte_is_zero_ether_addr(&conn->out_dmac) ||
rte_is_zero_ether_addr(&conn->out_smac)))
return EDPVS_NOTSUPP;
/* pre-handler before translation */
if (proto->fnat_out_pre_handler) {
err = proto->fnat_out_pre_handler(proto, conn, mbuf);
if (err != EDPVS_OK)
return err;
/*
* re-fetch IP header
* the offset may changed during pre-handler
*/
}
err = mbuf_4to6(mbuf, &conn->vaddr.in6, &conn->caddr.in6);
if (err)
return err;
if(proto->fnat_out_handler) {
err = proto->fnat_out_handler(proto, conn, mbuf);
if(err != EDPVS_OK)
return err;
}
eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf,
(uint16_t)sizeof(struct rte_ether_hdr));
rte_ether_addr_copy(&conn->out_dmac, ð->d_addr);
rte_ether_addr_copy(&conn->out_smac, ð->s_addr);
eth->ether_type = rte_cpu_to_be_16(packet_type);
mbuf->packet_type = packet_type;
err = netif_xmit(mbuf, conn->out_dev);
if (err != EDPVS_OK)
RTE_LOG(DEBUG, IPVS, "%s: fail to netif_xmit.\n", __func__);
/* must return OK since netif_xmit alway consume mbuf */
return EDPVS_OK;
}
在cps900k,cc10,pkg size 84情况下,测出fnat64 pps 520w。相比于fnat v4,优化后的fnat64转发性能下降了16%;相比于优化前fnat64,优化后的fnat64转发性能提升11%,火焰图如下:

问题描述:在小包场景下,相比于v4 fnat,fnat64存在明显的性能下降约30%。基于dperf压测工具和物理机mlx25G网卡进行性能测试,在cps900k,cc10,pkg size 84情况下,测出fnat v4 pps约为620w,fnat64 pps约为470w。
问题分析:结合火焰图进行分析,fnat v4出向dp_vs_out_xmit_fnat函数占比仅0.23%,而fnat64出向过程dp_vs_out_xmit_fnat函数占比1.29%。,如下图所示。
从代码分析看,fnat64出向过程中,通过查路由表及走协议栈进行转发,而fnat v4大部分通过fast path卸载到网卡,将数据包直接转发到网关下一跳。代码如下:
解决方案:
针对于上述问题,将fnat64出向过程简化:首先,将ipv4头替换为ipv6头;然后,直接由网卡转发到下一跳即网关。实现代码如下:
在cps900k,cc10,pkg size 84情况下,测出fnat64 pps 520w。相比于fnat v4,优化后的fnat64转发性能下降了16%;相比于优化前fnat64,优化后的fnat64转发性能提升11%,火焰图如下: