Skip to content

Commit

Permalink
chore(tiering): External alloc free page chaining (#3237)
Browse files Browse the repository at this point in the history
Signed-off-by: Vladislav Oleshko <[email protected]>
  • Loading branch information
dranikpg authored Jun 30, 2024
1 parent 65e42de commit f58000c
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 17 deletions.
1 change: 1 addition & 0 deletions src/server/tiered_storage_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ TEST_F(TieredStorageTest, FlushAll) {
auto reader = pp_->at(0)->LaunchFiber([&] {
while (!done) {
Run("reader", {"GET", absl::StrCat("k", rand() % kNum)});
util::ThisFiber::Yield();
}
});

Expand Down
45 changes: 33 additions & 12 deletions src/server/tiering/external_alloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,22 +131,29 @@ template <size_t N> size_t FindFirst(const std::bitset<N>& bs) {
*/
namespace detail {

// Page can be exactly in either these 3 states:
// 1. unitialized - with no blocks being allocated - segment_inuse will be 0 in that case, 1
// otherwise.
// 2. Partly utilized by 1 or more blocks, with available > 0 in that case. It must be present in
// free_pages_ list then.
// 3. Fully utilized, with available==0, in that case it's not part of free_pages_ list.
struct Page {
std::bitset<256> free_blocks; // bitmask of free blocks (32 bytes).
uint8_t id; // index inside the Segment.pages array.

// need some mapping function to map from block_size to real_block_size given Page class.
BinIdx block_size_bin;
BinIdx bin_idx;
uint8_t segment_inuse : 1; // true if segment allocated this page.
uint8_t reserved[3];

// can be computed via free_blocks.count().
uint16_t available; // in number of blocks.
Page* next_free; // next page in the free_pages_ list

// We can not use c'tor because we use the trick in segment where we allocate more pages
// than SegmentDescr declares.
void Reset(uint8_t new_id) {
static_assert(sizeof(Page) == 40);
static_assert(sizeof(Page) == 48);

memset(&id, 0, sizeof(Page) - offsetof(Page, id));
id = new_id;
Expand All @@ -162,7 +169,7 @@ void Page::Init(PageClass pc, BinIdx bin_id) {
DCHECK_EQ(available, 0);
DCHECK(segment_inuse);

block_size_bin = bin_id;
bin_idx = bin_id;
if (pc == PageClass::LARGE_P) {
available = 1;
} else {
Expand Down Expand Up @@ -221,7 +228,7 @@ class ExternalAllocator::SegmentDescr {

size_t BlockOffset(const Page* page, unsigned blockpos) {
return offset_ + page->id * (1 << page_info_.page_shift) +
ToBlockSize(page->block_size_bin) * blockpos;
ToBlockSize(page->bin_idx) * blockpos;
}

bool HasFreePages() const {
Expand Down Expand Up @@ -333,28 +340,28 @@ ExternalAllocator::~ExternalAllocator() {
int64_t ExternalAllocator::Malloc(size_t sz) {
uint8_t bin_idx = ToBinIdx(sz);
Page* page = free_pages_[bin_idx];

if (page->available == 0) { // empty page.
PageClass pc = detail::ClassFromSize(sz);

if (pc == PageClass::LARGE_P) {
return LargeMalloc(sz);
}

page = FindPage(pc);
if (!page)
return -int64_t(kSegmentSize);

free_pages_[bin_idx] = page;
page->Init(pc, bin_idx);
}

DCHECK(page->available);

size_t pos = FindFirst(page->free_blocks);

page->free_blocks.flip(pos);
--page->available;
allocated_bytes_ += ToBlockSize(page->block_size_bin);

if (--page->available == 0) // Remove empty page from freelist
free_pages_[bin_idx] = page->next_free ? page->next_free : &empty_page;

allocated_bytes_ += ToBlockSize(page->bin_idx);
SegmentDescr* seg = ToSegDescr(page);
return seg->BlockOffset(page, pos);
}
Expand All @@ -372,7 +379,7 @@ void ExternalAllocator::Free(size_t offset, size_t sz) {
Page* page = seg->GetPage(page_id);
unsigned page_size = (1 << seg->page_shift());
unsigned block_offs = delta % page_size;
unsigned block_size = ToBlockSize(page->block_size_bin);
unsigned block_size = ToBlockSize(page->bin_idx);
unsigned block_id = block_offs / block_size;
unsigned blocks_num = page_size / block_size;

Expand All @@ -384,8 +391,14 @@ void ExternalAllocator::Free(size_t offset, size_t sz) {
++page->available;

DCHECK_EQ(page->available, page->free_blocks.count());
// If page becomes fully free, return it to segment list, otherwise if it just became non-empty,
// then return it to free pages list
if (page->available == blocks_num) {
FreePage(page, seg, block_size);
} else if (page->available == 1) {
DCHECK_NE(page, free_pages_[page->bin_idx]);
page->next_free = free_pages_[page->bin_idx];
free_pages_[page->bin_idx] = page;
}
allocated_bytes_ -= block_size;
}
Expand Down Expand Up @@ -479,11 +492,19 @@ void ExternalAllocator::FreePage(Page* page, SegmentDescr* owner, size_t block_s

// Remove fast allocation reference.
if (free_pages_[bidx] == page) {
free_pages_[bidx] = &empty_page;
free_pages_[bidx] = page->next_free ? page->next_free : &empty_page;
} else {
for (auto* cur = free_pages_[bidx]; cur != nullptr; cur = cur->next_free) {
if (cur->next_free == page) {
cur->next_free = page->next_free;
break;
}
}
}

page->segment_inuse = 0;
page->available = 0;
page->next_free = nullptr;

if (!owner->HasFreePages()) {
// Segment was fully booked but now it has a free page.
Expand Down
4 changes: 2 additions & 2 deletions src/server/tiering/external_alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ class ExternalAllocator {

static SegmentDescr* ToSegDescr(Page*);

SegmentDescr* sq_[2]; // map: PageClass -> free Segment.
Page* free_pages_[detail::kNumFreePages];
SegmentDescr* sq_[2]; // map: PageClass -> free Segment.
Page* free_pages_[detail::kNumFreePages]; // intrusive linked lists of pages with free blocks

// A segment for each 256MB range. To get a segment id from the offset, shift right by 28.
std::vector<SegmentDescr*> segments_;
Expand Down
35 changes: 32 additions & 3 deletions src/server/tiering/external_alloc_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,18 @@ TEST_F(ExternalAllocatorTest, Basic) {

ext_alloc_.AddStorage(0, kSegSize);
EXPECT_EQ(0, ext_alloc_.Malloc(kMinBlockSize - 96)); // page0: 1
EXPECT_EQ(kMinBlockSize, ext_alloc_.Malloc(kMinBlockSize)); // page0: 2
EXPECT_EQ(kMinBlockSize, ext_alloc_.Malloc(kMinBlockSize)); // page0: 2

constexpr auto kAnotherLen = kMinBlockSize * 2 - 10;
size_t offset2 = ext_alloc_.Malloc(kAnotherLen); // page1: 1
EXPECT_EQ(offset2, 1_MB); // another page.

ext_alloc_.Free(offset2, kAnotherLen); // should return the page to the segment.
EXPECT_EQ(offset2, ext_alloc_.Malloc(16_KB)); // another page. page1: 1

ext_alloc_.Free(0, kMinBlockSize - 96); // page0: 1
ext_alloc_.Free(kMinBlockSize, kMinBlockSize); // page0: 0
ext_alloc_.Free(0, kMinBlockSize - 96); // page0: 1
ext_alloc_.Free(kMinBlockSize, kMinBlockSize); // page0: 0

EXPECT_EQ(0, ext_alloc_.Malloc(kMinBlockSize * 2)); // page0
}

Expand Down Expand Up @@ -117,4 +119,31 @@ TEST_F(ExternalAllocatorTest, Classes) {
EXPECT_EQ(1_MB + 4_KB, ExternalAllocator::GoodSize(1_MB + 1));
}

// Fill up the allocator until it has to grow, remove 90% and make sure it has free space even with
// extreme fragmentation
TEST_F(ExternalAllocatorTest, EmptyFull) {
const int kAllocSize = kMinBlockSize;
ext_alloc_.AddStorage(0, 2 * kSegSize);

// Fill up the allocator
vector<int64_t> offsets;
int64_t offset;
do {
offset = ext_alloc_.Malloc(kAllocSize);
if (offset >= 0)
offsets.push_back(offset);
} while (offset >= 0);

// Keep only 10%, free 90%
for (size_t i = 0; i < offsets.size(); i++) {
if (i % 10 == 0)
continue;
ext_alloc_.Free(offsets[i], kAllocSize);
}

// Expect to succeed adding 10% without growing
for (size_t i = 0; i < offsets.size() / 10; i++)
EXPECT_GT(ext_alloc_.Malloc(kAllocSize), 0u);
}

} // namespace dfly::tiering

0 comments on commit f58000c

Please sign in to comment.