From ee72a59a29c29d2b580a45a68da954fdcadb4c5c Mon Sep 17 00:00:00 2001 From: Pierre Krieger Date: Thu, 27 Oct 2022 16:46:14 +0200 Subject: [PATCH] Properly handle data channel opening failures (#2936) Fix https://github.com/paritytech/smoldot/issues/2900 In order to open outbound substreams on a WebRTC connection, the mechanism that the API of smoldot uses is as follows: one function to ask the implementation to open a substream, and one callback that is later called in order to notify that a substream has been fully opened. The API of smoldot doesn't provide a way to report that opening an outbound substream has failed. This is because this isn't really supposed to happen. If you have a connection, opening a substream is purely a "local side change": you allocate an ID, then send a message. It should maybe not even be asynchronous, but that's off-topic here. The problem here is that this mental model doesn't really fit what the browser does. In particular, the browser does nothing until a data channel is first opened, plus the browser can call `onclose` or `onerror` on the data channel before `onopen`. To accomodate for that, the current code is a bit hacky in the sense that it reports the connection as immediately open. This PR does it a bit more differently and more properly: we immediately open a data channel at the same time as we create the `RTCPeerConnection` in order for the browser to start connecting, even though smoldot didn't request a channel yet. We store this first data channel on the side, and when smoldot asks for a data channel we provide this one. If a data channel closes while it is still opening (the handshake one or another), we just kill the entire connection. --- bin/light-base/src/platform.rs | 5 + bin/wasm-node/CHANGELOG.md | 1 + bin/wasm-node/javascript/src/index-browser.ts | 203 ++++++++++++------ .../src/instance/bindings-smoldot-light.ts | 4 + bin/wasm-node/rust/src/bindings.rs | 5 + 5 files changed, 148 insertions(+), 70 deletions(-) diff --git a/bin/light-base/src/platform.rs b/bin/light-base/src/platform.rs index 4c4433e29f..4cca8e0cc3 100644 --- a/bin/light-base/src/platform.rs +++ b/bin/light-base/src/platform.rs @@ -80,6 +80,11 @@ pub trait Platform: Send + 'static { /// Queues the opening of an additional outbound substream. /// /// The substream, once opened, must be yielded by [`Platform::next_substream`]. + /// + /// > **Note**: No mechanism exists in this API to handle the situation where a substream fails + /// > to open, as this is not supposed to happen. If you need to handle such a + /// > situation, either try again opening a substream again or reset the entire + /// > connection. fn open_out_substream(connection: &mut Self::Connection); /// Waits until a new incoming substream arrives on the connection. diff --git a/bin/wasm-node/CHANGELOG.md b/bin/wasm-node/CHANGELOG.md index 4d2e2ab288..a5efea828b 100644 --- a/bin/wasm-node/CHANGELOG.md +++ b/bin/wasm-node/CHANGELOG.md @@ -11,6 +11,7 @@ ### Fixed - Fix errors showing in the browser's console about WebSockets being already in the CLOSING or CLOSED state. ([#2925](https://github.com/paritytech/smoldot/pull/2925)) +- No longer panic when a WebRTC connection fails to open due to the browser calling callbacks in an unexpected order. ([#2936](https://github.com/paritytech/smoldot/pull/2936)) ## 0.7.3 - 2022-10-19 diff --git a/bin/wasm-node/javascript/src/index-browser.ts b/bin/wasm-node/javascript/src/index-browser.ts index 7aaebdeb04..aabf44a0c9 100644 --- a/bin/wasm-node/javascript/src/index-browser.ts +++ b/bin/wasm-node/javascript/src/index-browser.ts @@ -148,38 +148,126 @@ export function start(options?: ClientOptions): Client { // TODO: detect localhost for Firefox? https://bugzilla.mozilla.org/show_bug.cgi?id=1659672 - let pc: RTCPeerConnection | null = null; + // Note that `pc` can be the connection, but also null or undefined. + // `undefined` means "certificate generation in progress", while `null` means "opening must + // be cancelled". + // While it would be better to use for example a string instead of `null`, using `null` lets + // us use the `!` operator more easily and leads to more readable code. + let pc: RTCPeerConnection | null | undefined = undefined; + // Contains the data channels that are open and have been reported to smoldot. const dataChannels = new Map(); - // TODO: this system is a complete hack - let isFirstSubstream = true; - // The opening of the connection is asynchronous. If smoldot calls `close` in the meanwhile, - // this variable is set to `true`, and we interrupt the opening. - let cancelOpening = false; + // For various reasons explained below, we open a data channel in advance without reporting it + // to smoldot. This data channel is stored in this variable. Once it is reported to smoldot, + // it is inserted in `dataChannels`. + let handshakeDataChannel: RTCDataChannel | undefined; + // Multihash-encoded DTLS certificate of the local node. Unknown as long as it hasn't been + // generated. + // TODO: could be merged with `pc` in one variable, and maybe even the other fields as well + let localTlsCertificateMultihash: Uint8Array | undefined; + + // Kills all the JavaScript objects (the connection and all its substreams), ensuring that no + // callback will be called again. Doesn't report anything to smoldot, as this should be done + // by the caller. + const killAllJs = () => { + // The `RTCPeerConnection` is created pretty quickly. It is however still possible for + // smoldot to cancel the opening, in which case `pc` will still be undefined. + if (!pc) { + console.assert(dataChannels.size === 0 && !handshakeDataChannel, "substreams exist while pc is undef") + pc = null; + return + } + + pc!.onconnectionstatechange = null; + pc!.onnegotiationneeded = null; + pc!.ondatachannel = null; + + for (const channel of Array.from(dataChannels.values())) { + channel.onopen = null; + channel.onerror = null; + channel.onclose = null; + channel.onmessage = null; + } + dataChannels.clear(); + if (handshakeDataChannel) { + handshakeDataChannel.onopen = null; + handshakeDataChannel.onerror = null; + handshakeDataChannel.onclose = null; + handshakeDataChannel.onmessage = null; + } + handshakeDataChannel = undefined; + + pc!.close(); // Not necessarily necessary, but it doesn't hurt to do so. + }; // Function that configures a newly-opened channel and adds it to the map. Used for both // inbound and outbound substreams. - const addChannel = (dataChannel: RTCDataChannel, direction: 'inbound' | 'outbound') => { + const addChannel = (dataChannel: RTCDataChannel, direction: 'first-outbound' | 'inbound' | 'outbound') => { const dataChannelId = dataChannel.id!; dataChannel.binaryType = 'arraybuffer'; + let isOpen = false; + dataChannel.onopen = () => { + console.assert(!isOpen, "substream opened twice") + isOpen = true; + + if (direction === 'first-outbound') { + console.assert(dataChannels.size === 0, "dataChannels not empty when opening"); + console.assert(handshakeDataChannel === dataChannel, "handshake substream mismatch"); + config.onOpen({ + type: 'multi-stream', + handshake: 'webrtc', + // `addChannel` can never be called before the local certificate is generated, so this + // value is always defined. + localTlsCertificateMultihash: localTlsCertificateMultihash!, + remoteTlsCertificateMultihash: remoteCertMultihash + }); + } else { + console.assert(direction !== 'outbound' || !handshakeDataChannel, "handshakeDataChannel still defined"); config.onStreamOpened(dataChannelId, direction); + } }; - dataChannel.onerror = (_error) => { - config.onStreamReset(dataChannelId); - }; - - dataChannel.onclose = () => { + dataChannel.onerror = dataChannel.onclose = (error) => { + // A couple of different things could be happening here. + if (handshakeDataChannel === dataChannel && !isOpen) { + // The handshake data channel that we have opened ahead of time failed to open. As this + // happens before we have reported the WebRTC connection as a whole as being open, we + // need to report that the connection has failed to open. + killAllJs(); + config.onConnectionReset("handshake data channel failed to open" + error ? " " + error.toString() : ""); + } else if (handshakeDataChannel === dataChannel) { + // The handshake data channel has been closed before we reported it to smoldot. This + // isn't really a problem. We just update the state and continue running. If smoldot + // requests a substream, another one will be opened. It could be a valid implementation + // to also just kill the entire connection, however doing so is a bit too intrusive and + // punches through abstraction layers. + handshakeDataChannel.onopen = null; + handshakeDataChannel.onerror = null; + handshakeDataChannel.onclose = null; + handshakeDataChannel.onmessage = null; + handshakeDataChannel = undefined; + } else if (!isOpen) { + // Substream wasn't opened yet and thus has failed to open. The API has no mechanism to + // report substream openings failures. We could try opening it again, but given that + // it's unlikely to succeed, we simply opt to kill the entire connection. + killAllJs(); + config.onConnectionReset("data channel failed to open" + error ? " " + error.toString() : ""); + } else { + // Substream was open and is now closed. Normal situation. config.onStreamReset(dataChannelId); + } }; dataChannel.onmessage = (m) => { - // The `data` field is an `ArrayBuffer`. - config.onMessage(new Uint8Array(m.data), dataChannelId); + // The `data` field is an `ArrayBuffer`. + config.onMessage(new Uint8Array(m.data), dataChannelId); } - dataChannels.set(dataChannelId, dataChannel); + if (direction !== 'first-outbound') + dataChannels.set(dataChannelId, dataChannel); + else + handshakeDataChannel = dataChannel } // It is possible for the browser to use multiple different certificates. @@ -188,7 +276,7 @@ export function start(options?: ClientOptions): Client { // According to , // browsers are guaranteed to support `{ name: "ECDSA", namedCurve: "P-256" }`. RTCPeerConnection.generateCertificate({ name: "ECDSA", namedCurve: "P-256", hash: "SHA-256" } as EcKeyGenParams).then(async (localCertificate) => { - if (cancelOpening) + if (pc === null) return; // Create a new WebRTC connection. @@ -229,7 +317,7 @@ export function start(options?: ClientOptions): Client { config.onConnectionReset('Failed to obtain the browser certificate fingerprint'); return; } - const localTlsCertificateMultihash = new Uint8Array(34); + localTlsCertificateMultihash = new Uint8Array(34); localTlsCertificateMultihash.set([0x12, 32], 0); localTlsCertificateMultihash.set(localTlsCertificateHex!.split(':').map((s) => parseInt(s, 16)), 2); @@ -240,21 +328,8 @@ export function start(options?: ClientOptions): Client { // open. pc.onconnectionstatechange = (_event) => { if (pc!.connectionState == "closed" || pc!.connectionState == "disconnected" || pc!.connectionState == "failed") { + killAllJs(); config.onConnectionReset("WebRTC state transitioned to " + pc!.connectionState); - - pc!.onconnectionstatechange = null; - pc!.onnegotiationneeded = null; - pc!.ondatachannel = null; - - for (const channel of Array.from(dataChannels.values())) { - channel.onopen = null; - channel.onerror = null; - channel.onclose = null; - channel.onmessage = null; - } - - pc!.close(); // Not necessarily necessary, but it doesn't hurt to do so. - dataChannels.clear(); } }; @@ -342,49 +417,27 @@ export function start(options?: ClientOptions): Client { }; pc.ondatachannel = ({ channel }) => { + // TODO: is the substream maybe already open? according to the Internet it seems that no but it's unclear addChannel(channel, 'inbound') }; - // Creating a `RTCPeerConnection` doesn't actually do anything before a channel is created. - // The connection is therefore immediately reported as opened to smoldot so that it starts - // opening substreams. - // One concern might be that smoldot will think that the remote is reachable at this address - // (because we report the connection as being open) even when it might not be the case. - // However, WebRTC has a handshake to perform, and smoldot will only consider a connection - // as "actually open" once the handshake has finished. - config.onOpen({ - type: 'multi-stream', - handshake: 'webrtc', - localTlsCertificateMultihash, - remoteTlsCertificateMultihash: remoteCertMultihash - }); + // Creating a `RTCPeerConnection` doesn't actually do anything before `createDataChannel` is + // called. Smoldot's API, however, requires you to treat entire connections as open or + // closed. We know, according to the libp2p WebRTC specification, that every connection + // always starts with a substream where a handshake is performed. After we've reported that + // the connection is open, smoldot will open a substream in order to perform the handshake. + // Instead of following this API, we open this substream in advance, and will notify smoldot + // that the connection is open when the substream is open. + // Note that the label passed to `createDataChannel` is required to be empty as per the + // libp2p WebRTC specification. + addChannel(pc!.createDataChannel("", { id: 0, negotiated: true }), 'first-outbound') }); return { reset: (streamId: number | undefined): void => { // If `streamId` is undefined, then the whole connection must be destroyed. if (streamId === undefined) { - // The `RTCPeerConnection` is created at the same time as we report the connection as - // being open. It is however possible for smoldot to cancel the opening, in which case - // `pc` will still be undefined. - if (!pc) { - cancelOpening = true; - return; - } - - pc.onconnectionstatechange = null; - pc.onnegotiationneeded = null; - pc.ondatachannel = null; - - for (const channel of Array.from(dataChannels.values())) { - channel.onopen = null; - channel.onerror = null; - channel.onclose = null; - channel.onmessage = null; - } - - pc.close(); - dataChannels.clear(); + killAllJs(); } else { const channel = dataChannels.get(streamId)!; @@ -404,12 +457,22 @@ export function start(options?: ClientOptions): Client { openOutSubstream: () => { // `openOutSubstream` can only be called after we have called `config.onOpen`, therefore // `pc` is guaranteed to be non-null. - // Note that the label passed to `createDataChannel` is required to be empty as per the - // libp2p WebRTC specification. - if (isFirstSubstream) { - isFirstSubstream = false; - addChannel(pc!.createDataChannel("", { id: 0, negotiated: true }), 'outbound') + // As explained above, we open a data channel ahead of time. If this data channel is still + // there, we report it. + if (handshakeDataChannel) { + // Do this asynchronously because calling callbacks within callbacks is error-prone. + (async () => { + // We need to check again if `handshakeDataChannel` is still defined, as the + // connection might have been closed. + if (handshakeDataChannel) { + config.onStreamOpened(handshakeDataChannel.id!, 'outbound') + dataChannels.set(handshakeDataChannel.id!, handshakeDataChannel) + handshakeDataChannel = undefined + } + })() } else { + // Note that the label passed to `createDataChannel` is required to be empty as per the + // libp2p WebRTC specification. addChannel(pc!.createDataChannel(""), 'outbound') } } diff --git a/bin/wasm-node/javascript/src/instance/bindings-smoldot-light.ts b/bin/wasm-node/javascript/src/instance/bindings-smoldot-light.ts index 8e87460fb1..ebf5e4c74a 100644 --- a/bin/wasm-node/javascript/src/instance/bindings-smoldot-light.ts +++ b/bin/wasm-node/javascript/src/instance/bindings-smoldot-light.ts @@ -107,6 +107,10 @@ export interface Config { * connections of type "multi-stream". * * The `onStreamOpened` callback must later be called with an outbound direction. + * + * Note that no mechanism exists in this API to handle the situation where a substream fails + * to open, as this is not supposed to happen. If you need to handle such a situation, either + * try again opening a substream again or reset the entire connection. */ openOutSubstream(): void; } diff --git a/bin/wasm-node/rust/src/bindings.rs b/bin/wasm-node/rust/src/bindings.rs index 164fe4ad7e..079a0e349d 100644 --- a/bin/wasm-node/rust/src/bindings.rs +++ b/bin/wasm-node/rust/src/bindings.rs @@ -209,6 +209,11 @@ extern "C" { /// /// This function will only be called for multi-stream connections. The connection must /// currently be in the `Open` state. See the documentation of [`connection_new`] for details. + /// + /// > **Note**: No mechanism exists in this API to handle the situation where a substream fails + /// > to open, as this is not supposed to happen. If you need to handle such a + /// > situation, either try again opening a substream again or reset the entire + /// > connection. pub fn connection_stream_open(connection_id: u32); /// Abruptly closes an existing substream of a multi-stream connection. The substream must