Skip to content

Commit

Permalink
syscall: implement getdents64
Browse files Browse the repository at this point in the history
V2: The only difference is removed delete_dir() function
was accidentally left from previous attempts to implement this
syscall.

It looks like the golang apps that need to iterate over entries
in a directory use a system call getdents64 which is documented
in https://man7.org/linux/man-pages/man2/getdents.2.html. Normally
this functionality is provided by the libc functions like opendir(),
readdir(), etc which actually do delegate to getdents64. Go is known
of bypassing libc in such cases.

So this patch implements the syscall getdents64 by adding a utility
function to VFS main.cc that is then called by syscall in linux.cc.
For details of how this function works please look at the comments.

This patch also adds a unit test to verify this syscall works.

Refs #1188

Signed-off-by: Waldemar Kozaczuk <[email protected]>
  • Loading branch information
wkozaczuk committed May 27, 2022
1 parent bcc5e1b commit 6716ad9
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 1 deletion.
65 changes: 65 additions & 0 deletions fs/vfs/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,71 @@ int readdir64_r(DIR *dir, struct dirent64 *entry,
extern "C" OSV_LIBC_API
struct dirent *readdir64(DIR *dir) __attribute__((alias("readdir")));

struct linux_dirent64 {
u64 d_ino;
s64 d_off;
unsigned short d_reclen;
unsigned char d_type;
char d_name[];
};

#undef getdents64
extern "C"
ssize_t sys_getdents64(int fd, void *dirp, size_t count)
{
auto *dir = fdopendir(fd);
if (dir) {
// We have verified that fd points to a valid directory
// but we do NOT need the DIR handle so just delete it
delete dir;

struct file *fp;
int error = fget(fd, &fp);
if (error) {
errno = error;
return -1;
}

size_t bytes_read = 0;
off_t last_off = -1;
errno = 0;

// Iterate over as many entries as there is space in the buffer
// by directly calling sys_readdir()
struct dirent entry;
while ((error = sys_readdir(fp, &entry)) == 0) {
auto rec_len = offsetof(linux_dirent64, d_name) + strlen(entry.d_name) + 1;
if (rec_len <= count) {
auto *ldirent = static_cast<linux_dirent64*>(dirp + bytes_read);
ldirent->d_ino = entry.d_ino;
ldirent->d_off = entry.d_off;
ldirent->d_type = entry.d_type;
strcpy(ldirent->d_name, entry.d_name);
ldirent->d_reclen = rec_len;
count -= rec_len;
bytes_read += rec_len;
last_off = entry.d_off;
} else {
if (last_off >= 0)
sys_seekdir(fp, last_off);
break;
}
}

fdrop(fp);

if (error && error != ENOENT) {
errno = error;
return -1;
} else {
errno = 0;
return bytes_read;
}
} else {
return -1;
}
}

OSV_LIBC_API
void rewinddir(DIR *dirp)
{
Expand Down
4 changes: 4 additions & 0 deletions linux.cc
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,9 @@ static int tgkill(int tgid, int tid, int sig)
return -1;
}

#define __NR_sys_getdents64 __NR_getdents64
extern "C" ssize_t sys_getdents64(int fd, void *dirp, size_t count);

OSV_LIBC_API long syscall(long number, ...)
{
// Save FPU state and restore it at the end of this function
Expand Down Expand Up @@ -512,6 +515,7 @@ OSV_LIBC_API long syscall(long number, ...)
SYSCALL2(statfs, const char *, struct statfs *);
SYSCALL3(unlinkat, int, const char *, int);
SYSCALL3(symlinkat, const char *, int, const char *);
SYSCALL3(sys_getdents64, int, void *, size_t);
}

debug_always("syscall(): unimplemented system call %d\n", number);
Expand Down
2 changes: 1 addition & 1 deletion modules/tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so tst-bsd-evh.so \
tst-getopt.so tst-getopt-pie.so tst-non-pie.so tst-semaphore.so \
tst-elf-init.so tst-realloc.so tst-setjmp.so \
libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
tst-sigaction.so tst-syscall.so tst-ifaddrs.so
tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so
# libstatic-thread-variable.so tst-static-thread-variable.so \
#TODO For now let us disable these tests for aarch64 until
Expand Down
111 changes: 111 additions & 0 deletions tests/tst-getdents.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#include <dirent.h> /* Defines DT_* constants */
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <cassert>

#include <memory>
#include <string>
#include <vector>
#include <algorithm>

#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)

struct test_dirent64 {
unsigned long d_ino;
off_t d_off;
unsigned char d_type;
std::string d_name;

bool operator ==(const test_dirent64 &b) const {
return d_ino == b.d_ino &&
d_off == b.d_off &&
d_type == b.d_type &&
d_name == b.d_name;
}
};

// This code is loosely based on the example found under https://man7.org/linux/man-pages/man2/getdents.2.html
void test_getdents64(const char *dir_path, size_t buf_size, std::vector<test_dirent64> &dirents) {
struct linux_dirent64 {
unsigned long d_ino;
off_t d_off;
unsigned short d_reclen;
unsigned char d_type;
char d_name[];
};

int fd = open(dir_path, O_RDONLY | O_DIRECTORY);
if (fd == -1)
handle_error("open");

std::unique_ptr<char []> buf_ptr(new char[buf_size]);
char *buf = buf_ptr.get();

for (;;) {
long nread = syscall(SYS_getdents64, fd, buf, buf_size);
if (nread == -1)
handle_error("getdents64");

if (nread == 0)
break;

printf("--------------- nread=%ld ---------------\n", nread);
printf("inode# file type d_reclen d_off d_name\n");
for (long bpos = 0; bpos < nread;) {
auto *d = (struct linux_dirent64 *) (buf + bpos);
printf("%8ld ", d->d_ino);

char d_type = d->d_type;
printf("%-10s ", (d_type == DT_REG) ? "regular" :
(d_type == DT_DIR) ? "directory" :
(d_type == DT_FIFO) ? "FIFO" :
(d_type == DT_SOCK) ? "socket" :
(d_type == DT_LNK) ? "symlink" :
(d_type == DT_BLK) ? "block dev" :
(d_type == DT_CHR) ? "char dev" : "???");

printf("%4d %10jd %s\n", d->d_reclen,
(intmax_t) d->d_off, d->d_name);
bpos += d->d_reclen;

test_dirent64 dirent;
dirent.d_ino = d->d_ino;
dirent.d_off = d->d_off;
dirent.d_type = d_type;
dirent.d_name = d->d_name;
dirents.push_back(dirent);
}
}

close(fd);
}

#define LARGE_BUF_SIZE 1024
#define SMALL_BUF_SIZE 128

int main()
{
// Verify that getdents64 works correctly against /proc directory and yields
// correct results
std::vector<test_dirent64> dirents_1;
test_getdents64("/proc", LARGE_BUF_SIZE, dirents_1);

assert(std::count_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 d) { return d.d_type == DT_REG; }) >= 3);
assert(std::count_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 d) { return d.d_type == DT_DIR; }) >= 5);

assert(std::find_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 d) { return d.d_name == ".."; }) != dirents_1.end());
assert(std::find_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 d) { return d.d_name == "cpuinfo"; }) != dirents_1.end());
assert(std::find_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 d) { return d.d_name == "sys"; }) != dirents_1.end());

// Verify that getdents64 works with smaller buffer and yields same results as above
std::vector<test_dirent64> dirents_2;
test_getdents64("/proc", SMALL_BUF_SIZE, dirents_2);

assert(std::equal(dirents_1.begin(), dirents_1.end(), dirents_2.begin()));
}

0 comments on commit 6716ad9

Please sign in to comment.