Skip to content

Commit

Permalink
Remove Glacier auto-retrieval (#4705)
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Sherman <[email protected]>
  • Loading branch information
bentsherman authored Feb 1, 2024
1 parent 3a19386 commit 5f0ec50
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 159 deletions.
13 changes: 4 additions & 9 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -232,24 +232,19 @@ The following settings are available:
: The AWS S3 API entry point e.g. `https://s3-us-west-1.amazonaws.com`.

`aws.client.glacierAutoRetrieval`
: :::{versionadded} 22.12.0-edge
: :::{deprecated} 24.02.0-edge
Glacier auto-retrieval is no longer supported. Instead, consider using the AWS CLI to restore any Glacier objects before or at the beginning of your pipeline (i.e. in a Nextflow process).
:::
: *Experimental: may change in a future release.*
: Enable auto retrieval of S3 objects with a Glacier storage class (default: `false`).
: :::{note}
This feature only works for S3 objects that are downloaded by Nextflow directly. It is not supported for tasks (e.g. when using the AWS Batch executor), since that would lead to many tasks sitting idle for several hours and wasting resources. If you need to restore many objects from Glacier, consider restoring them in a script prior to launching the pipeline.
:::

`aws.client.glacierExpirationDays`
: :::{versionadded} 22.12.0-edge
: :::{deprecated} 24.02.0-edge
:::
: *Experimental: may change in a future release.*
: The time, in days, between when an object is restored to the bucket and when it expires (default: `7`).

`aws.client.glacierRetrievalTier`
: :::{versionadded} 23.03.0-edge
: :::{deprecated} 24.02.0-edge
:::
: *Experimental: may change in a future release.*
: The retrieval tier to use when restoring objects from Glacier, one of [`Expedited`, `Standard`, `Bulk`].

`aws.client.maxConnections`
Expand Down
103 changes: 0 additions & 103 deletions plugins/nf-amazon/src/main/nextflow/cloud/aws/nio/S3Client.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
import com.amazonaws.services.s3.model.CopyPartRequest;
import com.amazonaws.services.s3.model.CopyPartResult;
import com.amazonaws.services.s3.model.GetObjectTaggingRequest;
import com.amazonaws.services.s3.model.GlacierJobParameters;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadResult;
import com.amazonaws.services.s3.model.ListObjectsRequest;
Expand All @@ -63,7 +62,6 @@
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.RestoreObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.SSEAlgorithm;
import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams;
Expand Down Expand Up @@ -112,12 +110,6 @@ public class S3Client {

private Integer uploadMaxThreads = 10;

private boolean glacierAutoRetrieval;

private int glacierExpirationDays = 7;

private String glacierRetrievalTier;

public S3Client(AmazonS3 client) {
this.client = client;
}
Expand Down Expand Up @@ -319,38 +311,6 @@ public CannedAccessControlList getCannedAcl() {
return cannedAcl;
}

public void setGlacierAutoRetrieval(boolean value) {
this.glacierAutoRetrieval = value;
log.debug("Setting S3 glacierAutoRetrieval={}", glacierAutoRetrieval);
}

public void setGlacierAutoRetrieval(String value) {
if( value==null )
return;
setGlacierAutoRetrieval(Boolean.parseBoolean(value));
}

public void setGlacierExpirationDays(int days) {
this.glacierExpirationDays = days;
log.debug("Setting S3 glacierExpirationDays={}", glacierExpirationDays);
}

public void setGlacierExpirationDays(String days) {
if( days==null )
return;
try {
setGlacierExpirationDays(Integer.parseInt(days));
}
catch( NumberFormatException e ) {
log.warn("Not a valid AWS S3 glacierExpirationDays: `{}` -- Using default", days);
}
}

public void setGlacierRetrievalTier(String tier) {
this.glacierRetrievalTier = tier;
log.debug("Setting S3 glacierRetrievalTier={}", glacierRetrievalTier);
}

public AmazonS3 getClient() {
return client;
}
Expand Down Expand Up @@ -555,73 +515,10 @@ public void downloadFile(S3Path source, File target) {
Thread.currentThread().interrupt();
}
catch (AmazonS3Exception e) {
handleAmazonException(source, target, e);
}
}

private void handleAmazonException(S3Path source, File target, AmazonS3Exception e) {
// the following message is returned when accessing a Glacier stored file
// "The operation is not valid for the object's storage class"
final boolean isGlacierError = e.getMessage().contains("storage class")
&& e.getErrorCode().equals("InvalidObjectState");

if( isGlacierError && glacierAutoRetrieval ) {
log.info("S3 download s3://{}/{} failed due to invalid storage class -- Retrieving from Glacier", source.getBucket(), source.getKey());
restoreFromGlacier(source.getBucket(), source.getKey());
downloadFile(source, target);
}
else {
throw e;
}
}

protected void restoreFromGlacier(String bucketName, String key) {
final int sleepMillis = 30_000;
final long _5_mins = 5 * 60 * 1_000;

try {
RestoreObjectRequest request = new RestoreObjectRequest(bucketName, key);

String storageClass = client.getObjectMetadata(bucketName, key).getStorageClass();
if( storageClass!=null && !storageClass.equals("INTELLIGENT_TIERING") )
request.setExpirationInDays(glacierExpirationDays);

if( glacierRetrievalTier != null )
request.setGlacierJobParameters(
new GlacierJobParameters()
.withTier(glacierRetrievalTier)
);

client.restoreObjectV2(request);
}
catch (AmazonS3Exception e) {
if( e.getMessage().contains("RestoreAlreadyInProgress") ) {
log.debug("S3 Glacier restore already initiated for object s3://{}/{}", bucketName, key);
}
else {
throw e;
}
}

try {
boolean ongoingRestore = true;
long begin = System.currentTimeMillis();
while( ongoingRestore ) {
final long now = System.currentTimeMillis();
if( now-begin>_5_mins ) {
log.info("S3 Glacier restore ongoing for object s3://{}/{}", bucketName, key);
begin = now;
}
Thread.sleep(sleepMillis);
ongoingRestore = client.getObjectMetadata(bucketName, key).getOngoingRestore();
}
}
catch (InterruptedException e) {
log.debug("S3 Glacier restore s3://{}/{} interrupted", bucketName, key);
Thread.currentThread().interrupt();
}
}

public void downloadDirectory(S3Path source, File targetFile) throws IOException {
//
// the download directory method provided by the TransferManager replicates
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -845,9 +845,9 @@ protected S3FileSystem createFileSystem(URI uri, AwsConfig awsConfig) {
client.setKmsKeyId(props.getProperty("storage_kms_key_id"));
client.setUploadChunkSize(props.getProperty("upload_chunk_size"));
client.setUploadMaxThreads(props.getProperty("upload_max_threads"));
client.setGlacierAutoRetrieval(props.getProperty("glacier_auto_retrieval"));
client.setGlacierExpirationDays(props.getProperty("glacier_expiration_days"));
client.setGlacierRetrievalTier(props.getProperty("glacier_retrieval_tier"));

if( props.getProperty("glacier_auto_retrieval") != null )
log.warn("Glacier auto-retrieval is no longer supported, config option `aws.client.glacierAutoRetrieval` will be ignored");

return new S3FileSystem(this, client, uri, props);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1417,48 +1417,4 @@ class AwsS3NioTest extends Specification implements AwsS3BaseSpec {
deleteBucket(bucket1)
}

@Ignore // takes too long to test via CI server
def 'should restore from glacier' () {
given:
def TEXT = randomText(10_000)
def folder = Files.createTempDirectory('test')
def sourceFile = Files.write(folder.resolve('foo.data'), TEXT.bytes)
def downloadFile = folder.resolve('copy.data')
and:
def bucket1 = createBucket()

// upload a file to a remote bucket
when:
def target = s3path("s3://$bucket1/foo.data")
and:
target.setStorageClass('GLACIER')
def client = target.getFileSystem().getClient()
and:
FileHelper.copyPath(sourceFile, target)
// the file exist
then:
Files.exists(target)
and:
client
.getObjectMetadata(target.getBucket(), target.getKey())
.getStorageClass() == 'GLACIER'

when:
FileHelper.copyPath(target, downloadFile)
then:
thrown(AmazonS3Exception)

when:
client.setGlacierAutoRetrieval(true)
and:
FileHelper.copyPath(target, downloadFile)
then:
Files.exists(downloadFile)

cleanup:
client?.setGlacierAutoRetrieval(false)
folder?.delete()
deleteBucket(bucket1)
}

}

0 comments on commit 5f0ec50

Please sign in to comment.