Skip to content

Commit

Permalink
Fix serialization of S3 paths with spaces (#3565)
Browse files Browse the repository at this point in the history

Signed-off-by: Ben Sherman <[email protected]>
Signed-off-by: Paolo Di Tommaso <[email protected]>
Co-authored-by: Paolo Di Tommaso <[email protected]>
  • Loading branch information
bentsherman and pditommaso committed Feb 18, 2023
1 parent b24ee13 commit 935df94
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,11 @@ class PathSerializer extends Serializer<Path> {
return FileSystems.getDefault().getPath(path)
}

// try to find provider
// NOTE: PATHS FOR NON DEFAULT FILE SYSTEMS SHOULD BE CREATED
// BY THE CORRESPONDING SPECIALISED SERIALIZER CLASSES
//
// THIS IS LEGACY CODE THAT EVENTUALLY SHOULD BE REMOVED
//
def uri = URI.create("$scheme://$path")
def fs = FileHelper.getOrCreateFileSystemFor(uri)
return fs.provider().getPath(uri)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ class S3PathFactory extends FileSystemPathFactory {
// normalise 's3' path
if( str.startsWith('s3://') && str[5]!='/' ) {
final path = "s3:///${str.substring(5)}"
// note: this URI constructor parse the path parameter and extract the `scheme` and `authority` components
final uri = new URI(null,null, path,null,null)
return FileHelper.getOrCreateFileSystemFor(uri).provider().getPath(uri)
return create(path)
}
return null
}
Expand All @@ -42,4 +40,22 @@ class S3PathFactory extends FileSystemPathFactory {
: null
}

/**
* Creates a {@link S3Path} from a S3 formatted URI.
*
* @param path
* A S3 URI path e.g. s3:///BUCKET_NAME/some/data.
* NOTE it expect the s3 prefix provided with triple `/` .
* This is required by the underlying implementation expecting the host name in the URI to be empty
* and the bucket name to be the first path element
* @return
* The corresponding {@link S3Path}
*/
static S3Path create(String path) {
if( !path ) throw new IllegalArgumentException("Missing S3 path argument")
if( !path.startsWith('s3:///') ) throw new IllegalArgumentException("S3 path must start with s3:/// prefix -- offending value '$path'")
// note: this URI constructor parse the path parameter and extract the `scheme` and `authority` components
final uri = new URI(null,null, path,null,null)
return (S3Path)FileHelper.getOrCreateFileSystemFor(uri).provider().getPath(uri)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,47 @@

package nextflow.cloud.aws.util


import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.Serializer
import com.esotericsoftware.kryo.io.Input
import com.esotericsoftware.kryo.io.Output
import com.upplication.s3fs.S3Path
import groovy.transform.CompileStatic
import nextflow.util.PathSerializer
import groovy.util.logging.Slf4j
import nextflow.util.SerializerRegistrant
import org.pf4j.Extension

/**
* Register the S3Path serializer
*
* @author Paolo Di Tommaso <[email protected]>
*/
@Slf4j
@Extension
@CompileStatic
class S3PathSerializer implements SerializerRegistrant {
class S3PathSerializer extends Serializer<S3Path> implements SerializerRegistrant {

@Override
void register(Map<Class, Object> serializers) {
serializers.put(S3Path, PathSerializer)
serializers.put(S3Path, S3PathSerializer)
}

@Override
void write(Kryo kryo, Output output, S3Path target) {
final scheme = target.getFileSystem().provider().getScheme()
final path = target.toString()
log.trace "S3Path serialization > scheme: $scheme; path: $path"
output.writeString(scheme)
output.writeString(path)
}

@Override
S3Path read(Kryo kryo, Input input, Class<S3Path> type) {
final scheme = input.readString()
final path = input.readString()
if( scheme != 's3' ) throw new IllegalStateException("Unexpected scheme for S3 path -- offending value '$scheme'")
log.trace "S3Path de-serialization > scheme: $scheme; path: $path"
return (S3Path) S3PathFactory.create("s3://${path}")
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright 2020-2022, Seqera Labs
* Copyright 2013-2019, Centre for Genomic Regulation (CRG)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package nextflow.util

import nextflow.cloud.aws.util.S3PathFactory
import spock.lang.Specification
/**
*
* @author Ben Sherman <[email protected]>
*/
class S3PathSerializerTest extends Specification {

def 'should serialise s3 path' () {
when:
def path = S3PathFactory.parse('s3://mybucket/file.txt')
def buffer = KryoHelper.serialize(path)
then:
KryoHelper.deserialize(buffer).getClass().getName() == 'com.upplication.s3fs.S3Path'
KryoHelper.deserialize(buffer) == S3PathFactory.parse('s3://mybucket/file.txt')
}

def 'should serialise s3 path with spaces' () {
when:
def path = S3PathFactory.parse('s3://mybucket/file with spaces.txt')
def buffer = KryoHelper.serialize(path)
then:
KryoHelper.deserialize(buffer).getClass().getName() == 'com.upplication.s3fs.S3Path'
KryoHelper.deserialize(buffer) == S3PathFactory.parse('s3://mybucket/file with spaces.txt')
}

}

0 comments on commit 935df94

Please sign in to comment.