Skip to content

Commit

Permalink
Route Cache patch cloudius-systems#1
Browse files Browse the repository at this point in the history
The first patch replaces the calls to the BSD functions in three places
by calls to a new function route_cache::lookup(). lookup() is a new
function which still does the slow lock-full lookup, but the point of
this patch is twofold: 1. To verify that indeed, these three places are
all the lookups done in the fast-path of the UDP memcached and tomcat
benchmarks (other benchmarks may involve more code paths, can be fixed
later).
2. Now lookup() takes care of the locking, and there is no need to
unlock a route after calling it - and the first patch verifies this
movement of the unlock code indeed works as a expected.

Note that this patch defines an invalidate() function but nothing
ever defines it, nor uses it. Once we're confident about the direction
of these two patches, I think it should be pretty straighforward to
implement and call invalidate() on route changes.

Signed-off-by: Nadav Har'El <[email protected]>
  • Loading branch information
nyh authored and asias committed Jul 23, 2014
1 parent 8fb6a66 commit ef36d12
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 11 deletions.
85 changes: 85 additions & 0 deletions bsd/sys/net/routecache.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright (C) 2014 Cloudius Systems, Ltd.
*
* This work is open source software, licensed under the terms of the
* BSD license as described in the LICENSE file in the top-level directory.
*/

// FreeBSD's routing table, in route.cc and radix.cc, is fairly slow when
// used in the fast path to decide where to send each individual packet:
// The radix tree holding the routes is protected by a rwlock which needs
// to be locked for reading (which is twice slower than a normal lock),
// and and then the route we found is individually locked and unlock.
// Even when uncontended on a single-CPU VM, all this locking and unlocking
// is pretty slow (e.g., in one memcached run I saw this responsible for more
// than 1.5 locks per second), but when we have an SMP with many CPUs
// accessing the network, things get even worse because we start seeing
// contention on these mutexes.
//
// What we really need is to assume that the routing table rarely changes
// an have an RCU routing table, where the read path (the packet-sending
// fast path) involves no locks, and only the write path (changing a route)
// involves a mutex and creation of a new copy of the routing table.
// However, instead of rewriting FreeBSD's route.cc and radix.cc, and the
// countless places that use it and make subtle assumptions on how it works,
// we decided to do this:
//
// 1. In this file, we define a "routing cache", an RCU-based layer in
// front of the usual routing table.
//
// 2. A new function looks up in the routing cache first, and only if
// it can't find the route there, it looks up in the regular table,
// with all the locks as usual - and places the found route on the
// cache.
// We should use this function whenever it makes sense and performance
// is important. We don't have to change all the existing code to use it.
//
// 3. A new function invalidates the routing cache. It should be called
// whenever routes are modified. Missing a few places is not a disaster
// but can lead to the wrong route being used in esoteric places. It
// would be better to find a good place to call this function every
// time (e.g., perhaps write-unlock of the radix tree is a good place?
// or not good enough if just a route itself is modified?).

#ifndef INCLUDED_ROUTECACHE_HH
#define INCLUDED_ROUTECACHE_HH

// FIXME: probably most of these includes are unnecessary
#include <bsd/porting/netport.h>
#include <bsd/porting/sync_stub.h>
#include <bsd/sys/sys/param.h>
#include <bsd/sys/sys/mbuf.h>
#include <bsd/sys/sys/socket.h>
#include <bsd/sys/sys/domain.h>
#include <bsd/sys/net/if.h>
#include <bsd/sys/net/if_dl.h>
#include <bsd/sys/netinet/in_var.h>

#include <bsd/sys/net/route.h>

#include <osv/rcu.hh>
#include <unordered_map>
#include <functional>


class route_cache {
public:
// Note that this returns a copy of a routing entry, *not* a pointer.
// So the return value shouldn't be written to, nor, of course, be RTFREE'd.
static void lookup(struct bsd_sockaddr_in *dst, u_int fibnum, struct rtentry *ret) {
// A slow implementation
struct route ro {};
ro.ro_dst = *(struct bsd_sockaddr *)dst;
in_rtalloc_ign(&ro, 0, fibnum);
memcpy(ret, ro.ro_rt, sizeof(*ret));
RO_RTFREE(&ro);
ret->rt_refcnt = -1; // try to catch some monkey-business
mutex_init(&ret->rt_mtx._mutex); // try to catch some monkey-business?
}

static void invalidate() {

}
};

#endif
10 changes: 7 additions & 3 deletions bsd/sys/netinet/in_pcb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@
#include <bsd/sys/netinet6/ip6_var.h>
#endif /* INET6 */

#include <bsd/sys/net/routecache.hh>


#ifdef IPSEC
#include <bsd/sys/netipsec/ipsec.h>
Expand Down Expand Up @@ -674,6 +676,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
struct bsd_sockaddr *sa;
struct bsd_sockaddr_in *sin;
struct route sro;
struct rtentry rte_one;
int error;

KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
Expand All @@ -693,7 +696,10 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
* Find out route to destination.
*/
if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum);
{
route_cache::lookup(sin, inp->inp_inc.inc_fibnum, &rte_one);
sro.ro_rt = &rte_one;
}

/*
* If we found a route, use the address corresponding to
Expand Down Expand Up @@ -790,8 +796,6 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
}

done:
if (sro.ro_rt != NULL)
RTFREE(sro.ro_rt);
return (error);
}

Expand Down
11 changes: 5 additions & 6 deletions bsd/sys/netinet/ip_output.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
#include <bsd/sys/netinet/ip_var.h>
#include <bsd/sys/netinet/ip_options.h>

#include <bsd/sys/net/routecache.hh>

#ifdef IPSEC
#include <netinet/ip_ipsec.h>
#include <netipsec/ipsec.h>
Expand Down Expand Up @@ -111,6 +113,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
struct rtentry *rte; /* cache for ro->ro_rt */
struct in_addr odst;
struct m_tag *fwd_tag = NULL;
struct rtentry rte_one;
#ifdef IPSEC
int no_route_but_check_spd = 0;
#endif
Expand All @@ -125,10 +128,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
}
}

if (ro == NULL) {
ro = &iproute;
bzero(ro, sizeof (*ro));
}

#ifdef FLOWTABLE
if (ro->ro_rt == NULL) {
Expand Down Expand Up @@ -250,8 +251,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
#else
in_rtalloc_ign(ro, 0,
inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
route_cache::lookup(dst, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m), &rte_one);
ro->ro_rt = &rte_one;
#endif
rte = ro->ro_rt;
}
Expand Down Expand Up @@ -653,8 +654,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
IPSTAT_INC(ips_fragmented);

done:
if (ro == &iproute)
RO_RTFREE(ro);
if (ia != NULL)
ifa_free(&ia->ia_ifa);
return (error);
Expand Down
6 changes: 4 additions & 2 deletions bsd/sys/netinet/tcp_subr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@

#include <machine/in_cksum.h>
#include <bsd/sys/sys/md5.h>
#include <bsd/sys/net/routecache.hh>

VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
#ifdef INET6
Expand Down Expand Up @@ -1689,6 +1690,7 @@ u_long
tcp_maxmtu(struct in_conninfo *inc, int *flags)
{
struct route sro;
struct rtentry rte_one;
struct bsd_sockaddr_in *dst;
struct ifnet *ifp;
u_long maxmtu = 0;
Expand All @@ -1701,7 +1703,8 @@ tcp_maxmtu(struct in_conninfo *inc, int *flags)
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = inc->inc_faddr;
in_rtalloc_ign(&sro, 0, inc->inc_fibnum);
route_cache::lookup(dst, inc->inc_fibnum, &rte_one);
sro.ro_rt = &rte_one;
}
if (sro.ro_rt != NULL) {
ifp = sro.ro_rt->rt_ifp;
Expand All @@ -1716,7 +1719,6 @@ tcp_maxmtu(struct in_conninfo *inc, int *flags)
ifp->if_hwassist & CSUM_TSO)
*flags |= CSUM_TSO;
}
RTFREE(sro.ro_rt);
}
return (maxmtu);
}
Expand Down

0 comments on commit ef36d12

Please sign in to comment.