Skip to content

Commit 1262e90

Browse files
authored
[WasmFS] Add a way to opt out of using the dcache (emscripten-core#17754)
Besides acting as a cache for relatively slow WasmFS backends, the dcache also serves the critical purpose of ensuring that file identities are stable, i.e. that each `File` object corresponds 1:1 to an underlying file. Maintaining this property is critical because a large number of subtle bugs pop up without it. For example, if a single file is represented by two `File` objects, then removing it will only clear the parent pointer of one of the objects, leaving the other in an inconsistent state. However, sometimes the dcache can introduce more problems than it solves. For example, in a hypothetical case-insensitive backend, the dcache would incorrectly accumulate multiple entries with different capitalizations corresponding to the same underlying file. In other cases, such as for the in-memory backend, the dcache is superfluous because the backend already ensures that files correspond 1:1 to `File` objects. For these exceptional cases, provide a new virtual directory method, `maintainsFileIdentity`, that allows backends to opt out of using the dcache and handle all directory lookups themselves. Opting out comes with a few additional responsibilities that are documented in file.h.
1 parent 986caa9 commit 1262e90

5 files changed

Lines changed: 90 additions & 23 deletions

File tree

system/lib/wasmfs/backends/memory_backend.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,17 @@ MemoryDirectory::findEntry(const std::string& name) {
3838
});
3939
}
4040

41+
std::shared_ptr<File> MemoryDirectory::getChild(const std::string& name) {
42+
if (auto entry = findEntry(name); entry != entries.end()) {
43+
return entry->child;
44+
}
45+
return nullptr;
46+
}
47+
4148
bool MemoryDirectory::removeChild(const std::string& name) {
4249
auto entry = findEntry(name);
4350
if (entry != entries.end()) {
51+
entry->child->locked().setParent(nullptr);
4452
entries.erase(entry);
4553
}
4654
return true;
@@ -71,6 +79,17 @@ bool MemoryDirectory::insertMove(const std::string& name,
7179
return true;
7280
}
7381

82+
std::string MemoryDirectory::getName(std::shared_ptr<File> file) {
83+
auto it =
84+
std::find_if(entries.begin(), entries.end(), [&](const auto& entry) {
85+
return entry.child == file;
86+
});
87+
if (it != entries.end()) {
88+
return it->name;
89+
}
90+
return "";
91+
}
92+
7493
class MemoryFileBackend : public Backend {
7594
public:
7695
std::shared_ptr<DataFile> createFile(mode_t mode) override {

system/lib/wasmfs/file.cpp

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,17 @@ void DataFile::Handle::preloadFromJS(int index) {
4343
void Directory::Handle::cacheChild(const std::string& name,
4444
std::shared_ptr<File> child,
4545
DCacheKind kind) {
46-
// Update the dcache and set the child's parent.
47-
auto& dcache = getDir()->dcache;
48-
auto [_, inserted] = dcache.insert({name, {kind, child}});
49-
assert(inserted && "inserted child already existed!");
50-
assert(child->locked().getParent() == nullptr);
46+
// Update the dcache if the backend hasn't opted out of using the dcache or if
47+
// this is a mount point, in which case it is not under the control of the
48+
// backend.
49+
if (kind == DCacheKind::Mount || !getDir()->maintainsFileIdentity()) {
50+
auto& dcache = getDir()->dcache;
51+
auto [_, inserted] = dcache.insert({name, {kind, child}});
52+
assert(inserted && "inserted child already existed!");
53+
}
54+
// Set the child's parent.
55+
assert(child->locked().getParent() == nullptr ||
56+
child->locked().getParent() == getDir());
5157
child->locked().setParent(getDir());
5258
}
5359

@@ -143,30 +149,42 @@ bool Directory::Handle::insertMove(const std::string& name,
143149
if (!getParent()) {
144150
return false;
145151
}
152+
146153
// Look up the file in its old parent's cache.
147154
auto oldParent = file->locked().getParent();
148155
auto& oldCache = oldParent->dcache;
149156
auto oldIt = std::find_if(oldCache.begin(), oldCache.end(), [&](auto& kv) {
150157
return kv.second.file == file;
151158
});
152-
assert(oldIt != oldCache.end());
153-
auto [oldName, entry] = *oldIt;
154-
assert(oldName.size());
159+
160+
// TODO: Handle moving mount points correctly by only updating caches without
161+
// involving the backend.
162+
155163
// Attempt the move.
156164
if (!getDir()->insertMove(name, file)) {
157165
return false;
158166
}
159-
// Update parent pointers and caches to reflect the successful move.
160-
oldCache.erase(oldIt);
161-
auto& newCache = getDir()->dcache;
162-
auto [it, inserted] = newCache.insert({name, entry});
163-
if (!inserted) {
164-
// Update and overwrite the overwritten file.
165-
it->second.file->locked().setParent(nullptr);
166-
it->second = entry;
167+
168+
if (oldIt != oldCache.end()) {
169+
// Do the move and update the caches.
170+
auto [oldName, entry] = *oldIt;
171+
assert(oldName.size());
172+
// Update parent pointers and caches to reflect the successful move.
173+
oldCache.erase(oldIt);
174+
auto& newCache = getDir()->dcache;
175+
auto [it, inserted] = newCache.insert({name, entry});
176+
if (!inserted) {
177+
// Update and overwrite the overwritten file.
178+
it->second.file->locked().setParent(nullptr);
179+
it->second = entry;
180+
}
181+
file->locked().setParent(getDir());
182+
} else {
183+
// This backend doesn't use the dcache.
184+
assert(getDir()->maintainsFileIdentity());
167185
}
168-
file->locked().setParent(getDir());
169186

187+
// TODO: Moving mount points probably shouldn't update the mtime.
170188
auto now = time(NULL);
171189
oldParent->locked().setMTime(now);
172190
setMTime(now);
@@ -194,6 +212,9 @@ bool Directory::Handle::removeChild(const std::string& name) {
194212
}
195213

196214
std::string Directory::Handle::getName(std::shared_ptr<File> file) {
215+
if (getDir()->maintainsFileIdentity()) {
216+
return getDir()->getName(file);
217+
}
197218
auto& dcache = getDir()->dcache;
198219
for (auto it = dcache.begin(); it != dcache.end(); ++it) {
199220
if (it->second.file == file) {

system/lib/wasmfs/file.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#pragma once
1111

12+
#include "support.h"
1213
#include <assert.h>
1314
#include <emscripten/html5.h>
1415
#include <map>
@@ -215,6 +216,30 @@ class Directory : public File {
215216
// The list of entries in this directory.
216217
virtual std::vector<Directory::Entry> getEntries() = 0;
217218

219+
// Only backends that maintain file identity themselves (see below) need to
220+
// implement this.
221+
virtual std::string getName(std::shared_ptr<File> file) {
222+
WASMFS_UNREACHABLE("getName unimplemented");
223+
}
224+
225+
// Whether this directory implementation always returns the same `File` object
226+
// for a given file. Most backends can be much simpler if they don't handle
227+
// this themselves. Instead, they rely on the directory cache (dcache) to
228+
// maintain file identity for them by ensuring each file is looked up in the
229+
// backend only once. Some backends, however, already track file identity, so
230+
// the dcache is not necessary (or would even introduce problems).
231+
//
232+
// When this is `true`, backends are responsible for:
233+
//
234+
// 1. Ensuring that all insert* and getChild calls returning a particular
235+
// file return the same File object.
236+
//
237+
// 2. Clearing the File's parent field in `removeChild`.
238+
//
239+
// 3. Implementing `getName`, since it cannot be implemented in terms of the
240+
// dcache.
241+
virtual bool maintainsFileIdentity() { return false; }
242+
218243
public:
219244
static constexpr FileKind expectedKind = File::DirectoryKind;
220245
Directory(mode_t mode, backend_t backend)

system/lib/wasmfs/memory_backend.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,7 @@ class MemoryDirectory : public Directory {
5858
entries.push_back({name, child});
5959
}
6060

61-
std::shared_ptr<File> getChild(const std::string& name) override {
62-
return nullptr;
63-
}
61+
std::shared_ptr<File> getChild(const std::string& name) override;
6462

6563
bool removeChild(const std::string& name) override;
6664

@@ -90,6 +88,12 @@ class MemoryDirectory : public Directory {
9088
size_t getNumEntries() override { return entries.size(); }
9189
std::vector<Directory::Entry> getEntries() override;
9290

91+
std::string getName(std::shared_ptr<File> file) override;
92+
93+
// Since we internally track files with `File` objects, we don't need the
94+
// dcache as well.
95+
bool maintainsFileIdentity() override { return true; }
96+
9397
public:
9498
MemoryDirectory(mode_t mode, backend_t backend) : Directory(mode, backend) {}
9599
};

system/lib/wasmfs/support.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@
55

66
#pragma once
77

8-
#include <stdnoreturn.h>
9-
108
#include <cstdlib>
119

1210
#ifndef NDEBUG
1311
// In debug builds show a message.
1412
namespace wasmfs {
15-
noreturn void
13+
[[noreturn]] void
1614
handle_unreachable(const char* msg, const char* file, unsigned line);
1715
}
1816
#define WASMFS_UNREACHABLE(msg) \

0 commit comments

Comments
 (0)