From a36dbcf7ff3f55c77717ccc0b4197c9211cfe5a9 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Tue, 8 Aug 2023 00:05:52 +0900 Subject: [PATCH 1/2] Move cache/converter.go to util/converter/converter.go Signed-off-by: Akihiro Suda --- cache/blobs.go | 3 ++- cache/manager_test.go | 9 +++++---- {cache => util/converter}/converter.go | 6 +++--- 3 files changed, 10 insertions(+), 8 deletions(-) rename {cache => util/converter}/converter.go (93%) diff --git a/cache/blobs.go b/cache/blobs.go index 0571406fef74..18372f4f3439 100644 --- a/cache/blobs.go +++ b/cache/blobs.go @@ -14,6 +14,7 @@ import ( "github.com/moby/buildkit/session" "github.com/moby/buildkit/util/bklog" "github.com/moby/buildkit/util/compression" + "github.com/moby/buildkit/util/converter" "github.com/moby/buildkit/util/flightcontrol" "github.com/moby/buildkit/util/winlayers" digest "github.com/opencontainers/go-digest" @@ -422,7 +423,7 @@ func ensureCompression(ctx context.Context, ref *immutableRef, comp compression. } // Resolve converters - layerConvertFunc, err := getConverter(ctx, ref.cm.ContentStore, desc, comp) + layerConvertFunc, err := converter.New(ctx, ref.cm.ContentStore, desc, comp) if err != nil { return struct{}{}, err } else if layerConvertFunc == nil { diff --git a/cache/manager_test.go b/cache/manager_test.go index 76679cf8bb06..8a0f56a032f9 100644 --- a/cache/manager_test.go +++ b/cache/manager_test.go @@ -44,6 +44,7 @@ import ( "github.com/moby/buildkit/solver" "github.com/moby/buildkit/util/compression" "github.com/moby/buildkit/util/contentutil" + "github.com/moby/buildkit/util/converter" "github.com/moby/buildkit/util/iohelper" "github.com/moby/buildkit/util/leaseutil" "github.com/moby/buildkit/util/overlay" @@ -1423,7 +1424,7 @@ func testSharingCompressionVariant(ctx context.Context, t *testing.T, co *cmOut, require.NoError(t, err, "compression: %v", c) uDgst := bDesc.Digest if c != compression.Uncompressed { - convertFunc, err := getConverter(ctx, co.cs, bDesc, compression.New(compression.Uncompressed)) + convertFunc, err := converter.New(ctx, co.cs, bDesc, compression.New(compression.Uncompressed)) require.NoError(t, err, "compression: %v", c) uDesc, err := convertFunc(ctx, co.cs, bDesc) require.NoError(t, err, "compression: %v", c) @@ -1558,7 +1559,7 @@ func TestConversion(t *testing.T) { testName := fmt.Sprintf("%s=>%s", i, j) // Prepare the source compression type - convertFunc, err := getConverter(egctx, store, orgDesc, compSrc) + convertFunc, err := converter.New(egctx, store, orgDesc, compSrc) require.NoError(t, err, testName) srcDesc := &orgDesc if convertFunc != nil { @@ -1567,7 +1568,7 @@ func TestConversion(t *testing.T) { } // Convert the blob - convertFunc, err = getConverter(egctx, store, *srcDesc, compDest) + convertFunc, err = converter.New(egctx, store, *srcDesc, compDest) require.NoError(t, err, testName) resDesc := srcDesc if convertFunc != nil { @@ -1576,7 +1577,7 @@ func TestConversion(t *testing.T) { } // Check the uncompressed digest is the same as the original - convertFunc, err = getConverter(egctx, store, *resDesc, compression.New(compression.Uncompressed)) + convertFunc, err = converter.New(egctx, store, *resDesc, compression.New(compression.Uncompressed)) require.NoError(t, err, testName) recreatedDesc := resDesc if convertFunc != nil { diff --git a/cache/converter.go b/util/converter/converter.go similarity index 93% rename from cache/converter.go rename to util/converter/converter.go index f19412b7086a..5a1f10705273 100644 --- a/cache/converter.go +++ b/util/converter/converter.go @@ -1,4 +1,4 @@ -package cache +package converter import ( "bufio" @@ -20,9 +20,9 @@ import ( "github.com/pkg/errors" ) -// getConverter returns converter function according to the specified compression type. +// New returns converter function according to the specified compression type. // If no conversion is needed, this returns nil without error. -func getConverter(ctx context.Context, cs content.Store, desc ocispecs.Descriptor, comp compression.Config) (converter.ConvertFunc, error) { +func New(ctx context.Context, cs content.Store, desc ocispecs.Descriptor, comp compression.Config) (converter.ConvertFunc, error) { if needs, err := comp.Type.NeedsConversion(ctx, cs, desc); err != nil { return nil, errors.Wrapf(err, "failed to determine conversion needs") } else if !needs { From 2f8292cf791b5effdf16d2bd88d23e9abf01d81b Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Mon, 24 Jul 2023 21:40:21 +0900 Subject: [PATCH 2/2] exporter/containerimage: new option: rewrite-timestamp rewrite-timestamp rewrites timestamps in layers for reproducible builds. When a layer is rewritten, an annotation "buildkit/rewritten-timestamp=" is set to the layer. Example: ``` buildctl build --frontend dockerfile.v0 \ --opt build-arg:SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH} \ --output type=oci,dest=dest.tar,rewrite-timestamp=true \ ... ``` Alternative to PR 3560 Signed-off-by: Akihiro Suda --- README.md | 2 + docs/build-repro.md | 30 ++----- exporter/containerimage/export.go | 20 ++++- exporter/containerimage/exptypes/keys.go | 4 + exporter/containerimage/opts.go | 3 + exporter/containerimage/writer.go | 64 ++++++++++++- frontend/dockerfile/dockerfile_test.go | 89 +++++++++++++------ util/contentutil/storewithprovider.go | 24 +++++ util/converter/converter.go | 60 +++++++++++-- util/converter/tarconverter/tarconverter.go | 49 ++++++++++ .../tarconverter/tarconverter_test.go | 41 +++++++++ 11 files changed, 330 insertions(+), 56 deletions(-) create mode 100644 util/contentutil/storewithprovider.go create mode 100644 util/converter/tarconverter/tarconverter.go create mode 100644 util/converter/tarconverter/tarconverter_test.go diff --git a/README.md b/README.md index d319a2d4b24c..6aba77df4bde 100644 --- a/README.md +++ b/README.md @@ -261,6 +261,8 @@ Keys supported by image output: * `name-canonical=true`: add additional canonical name `name@` * `compression=`: choose compression type for layers newly created and cached, gzip is default value. estargz should be used with `oci-mediatypes=true`. * `compression-level=`: compression level for gzip, estargz (0-9) and zstd (0-22) +* `rewrite-timestamp=true` (Present in the `master` branch ): rewrite the file timestamps to the `SOURCE_DATE_EPOCH` value. + See [`docs/build-repro.md`](docs/build-repro.md) for how to specify the `SOURCE_DATE_EPOCH` value. * `force-compression=true`: forcefully apply `compression` option to all layers (including already existing layers) * `store=true`: store the result images to the worker's (e.g. containerd) image store as well as ensures that the image has all blobs in the content store (default `true`). Ignored if the worker doesn't have image store (e.g. OCI worker). * `annotation.=`: attach an annotation with the respective `key` and `value` to the built image diff --git a/docs/build-repro.md b/docs/build-repro.md index 140a396a2d36..014213665b20 100644 --- a/docs/build-repro.md +++ b/docs/build-repro.md @@ -57,26 +57,14 @@ The build arg value is used for: - the timestamp of the files exported with the `local` exporter - the timestamp of the files exported with the `tar` exporter -The build arg value is not used for the timestamps of the files inside the image currently ([Caveats](#caveats)). - -See also the [documentation](/frontend/dockerfile/docs/reference.md#buildkit-built-in-build-args) of the Dockerfile frontend. - -## Caveats -### Timestamps of the files inside the image -Currently, the `SOURCE_DATE_EPOCH` value is not used for the timestamps of the files inside the image. - -Workaround: -```dockerfile -# Limit the timestamp upper bound to SOURCE_DATE_EPOCH. -# Workaround for https://github.com/moby/buildkit/issues/3180 -ARG SOURCE_DATE_EPOCH -RUN find $( ls / | grep -E -v "^(dev|mnt|proc|sys)$" ) -newermt "@${SOURCE_DATE_EPOCH}" -writable -xdev | xargs touch --date="@${SOURCE_DATE_EPOCH}" --no-dereference - -# Squashing is needed so that only files with the defined timestamp from the last layer are added to the image. -# This squashing also addresses non-reproducibility of whiteout timestamps (https://github.com/moby/buildkit/issues/3168) on BuildKit prior to v0.12. -FROM scratch -COPY --from=0 / / +To apply the build arg value to the timestamps of the files inside the image, specify `rewrite-timestamp=true` as an image exporter option: +``` +--output type=image,name=docker.io/username/image,push=true,rewrite-timestamp=true ``` -The `touch` command above is [not effective](https://github.com/moby/buildkit/issues/3309) for mount point directories. -A workaround is to create mount point directories below `/dev` (tmpfs) so that the mount points will not be included in the image layer. + +The `rewrite-timestamp` option is only available in the `master` branch of BuildKit. +See [v0.12 documentation](https://github.com/moby/buildkit/blob/v0.12/docs/build-repro.md#caveats) for dealing with timestamps +in BuildKit v0.12 and v0.11. + +See also the [documentation](/frontend/dockerfile/docs/reference.md#buildkit-built-in-build-args) of the Dockerfile frontend. diff --git a/exporter/containerimage/export.go b/exporter/containerimage/export.go index 5cfd9cbb9def..3f8865f8e830 100644 --- a/exporter/containerimage/export.go +++ b/exporter/containerimage/export.go @@ -274,6 +274,13 @@ func (e *imageExporterInstance) Export(ctx context.Context, src *exporter.Source tagDone(nil) if e.unpack { + if opts.RewriteTimestamp { + // e.unpackImage cannot be used because src ref does not point to the rewritten image + /// + // TODO: change e.unpackImage so that it takes Result[Remote] as parameter. + // https://github.com/moby/buildkit/pull/4057#discussion_r1324106088 + return nil, nil, errors.New("exporter option \"rewrite-timestamp\" conflicts with \"unpack\"") + } if err := e.unpackImage(ctx, img, src, session.NewGroup(sessionID)); err != nil { return nil, nil, err } @@ -310,7 +317,18 @@ func (e *imageExporterInstance) Export(ctx context.Context, src *exporter.Source } } if e.push { - err := e.pushImage(ctx, src, sessionID, targetName, desc.Digest) + if opts.RewriteTimestamp { + annotations := map[digest.Digest]map[string]string{} + addAnnotations(annotations, *desc) + // e.pushImage cannot be used because src ref does not point to the rewritten image + // + // TODO: change e.pushImage so that it takes Result[Remote] as parameter. + // https://github.com/moby/buildkit/pull/4057#discussion_r1324106088 + err = push.Push(ctx, e.opt.SessionManager, sessionID, e.opt.ImageWriter.opt.ContentStore, e.opt.ImageWriter.ContentStore(), + desc.Digest, targetName, e.insecure, e.opt.RegistryHosts, e.pushByDigest, annotations) + } else { + err = e.pushImage(ctx, src, sessionID, targetName, desc.Digest) + } if err != nil { return nil, nil, errors.Wrapf(err, "failed to push %v", targetName) } diff --git a/exporter/containerimage/exptypes/keys.go b/exporter/containerimage/exptypes/keys.go index c43221849911..722f099cf0ce 100644 --- a/exporter/containerimage/exptypes/keys.go +++ b/exporter/containerimage/exptypes/keys.go @@ -72,4 +72,8 @@ var ( // Value: int (0-9) for gzip and estargz // Value: int (0-22) for zstd OptKeyCompressionLevel ImageExporterOptKey = "compression-level" + + // Rewrite timestamps in layers to match SOURCE_DATE_EPOCH + // Value: bool + OptKeyRewriteTimestamp ImageExporterOptKey = "rewrite-timestamp" ) diff --git a/exporter/containerimage/opts.go b/exporter/containerimage/opts.go index 791f268afda5..a9dc2bd9a7d1 100644 --- a/exporter/containerimage/opts.go +++ b/exporter/containerimage/opts.go @@ -21,6 +21,7 @@ type ImageCommitOpts struct { Epoch *time.Time ForceInlineAttestations bool // force inline attestations to be attached + RewriteTimestamp bool // rewrite timestamps in layers to match the epoch } func (c *ImageCommitOpts) Load(ctx context.Context, opt map[string]string) (map[string]string, error) { @@ -52,6 +53,8 @@ func (c *ImageCommitOpts) Load(ctx context.Context, opt map[string]string) (map[ err = parseBool(&c.ForceInlineAttestations, k, v) case exptypes.OptKeyPreferNondistLayers: err = parseBool(&c.RefCfg.PreferNonDistributable, k, v) + case exptypes.OptKeyRewriteTimestamp: + err = parseBool(&c.RewriteTimestamp, k, v) default: rest[k] = v } diff --git a/exporter/containerimage/writer.go b/exporter/containerimage/writer.go index 15d6edb82b12..b43761aeda85 100644 --- a/exporter/containerimage/writer.go +++ b/exporter/containerimage/writer.go @@ -28,6 +28,8 @@ import ( attestationTypes "github.com/moby/buildkit/util/attestation" "github.com/moby/buildkit/util/bklog" "github.com/moby/buildkit/util/compression" + "github.com/moby/buildkit/util/contentutil" + "github.com/moby/buildkit/util/converter" "github.com/moby/buildkit/util/progress" "github.com/moby/buildkit/util/purl" "github.com/moby/buildkit/util/system" @@ -134,7 +136,14 @@ func (ic *ImageWriter) Commit(ctx context.Context, inp *exporter.Source, session config := exptypes.ParseKey(inp.Metadata, exptypes.ExporterImageConfigKey, p) inlineCache := exptypes.ParseKey(inp.Metadata, exptypes.ExporterInlineCache, p) - mfstDesc, configDesc, err := ic.commitDistributionManifest(ctx, opts, ref, config, &remotes[0], annotations, inlineCache, opts.Epoch, session.NewGroup(sessionID)) + remote := &remotes[0] + if opts.RewriteTimestamp { + remote, err = ic.rewriteRemoteWithEpoch(ctx, opts, remote) + if err != nil { + return nil, err + } + } + mfstDesc, configDesc, err := ic.commitDistributionManifest(ctx, opts, ref, config, remote, annotations, inlineCache, opts.Epoch, session.NewGroup(sessionID)) if err != nil { return nil, err } @@ -200,6 +209,13 @@ func (ic *ImageWriter) Commit(ctx context.Context, inp *exporter.Source, session } } + if opts.RewriteTimestamp { + remote, err = ic.rewriteRemoteWithEpoch(ctx, opts, remote) + if err != nil { + return nil, err + } + } + desc, _, err := ic.commitDistributionManifest(ctx, opts, r, config, remote, opts.Annotations.Platform(&p.Platform), inlineCache, opts.Epoch, session.NewGroup(sessionID)) if err != nil { return nil, err @@ -324,6 +340,52 @@ func (ic *ImageWriter) exportLayers(ctx context.Context, refCfg cacheconfig.RefC return out, err } +// rewriteImageLayerWithEpoch rewrites the file timestamps in the layer blob to match the epoch, and returns a new descriptor that points to +// the new blob. +// +// If no conversion is needed, this returns nil without error. +func rewriteImageLayerWithEpoch(ctx context.Context, cs content.Store, desc ocispecs.Descriptor, comp compression.Config, epoch *time.Time) (*ocispecs.Descriptor, error) { + converterFn, err := converter.NewWithRewriteTimestamp(ctx, cs, desc, comp, epoch) + if err != nil { + return nil, err + } + if converterFn == nil { + return nil, nil + } + return converterFn(ctx, cs, desc) +} + +func (ic *ImageWriter) rewriteRemoteWithEpoch(ctx context.Context, opts *ImageCommitOpts, remote *solver.Remote) (*solver.Remote, error) { + if opts.Epoch == nil { + bklog.G(ctx).Warn("rewrite-timestamp is specified, but no source-date-epoch was found") + return remote, nil + } + remoteDescriptors := remote.Descriptors + cs := contentutil.NewStoreWithProvider(ic.opt.ContentStore, remote.Provider) + eg, ctx := errgroup.WithContext(ctx) + rewriteDone := progress.OneOff(ctx, + fmt.Sprintf("rewriting layers with source-date-epoch %d (%s)", opts.Epoch.Unix(), opts.Epoch.String())) + for i, desc := range remoteDescriptors { + i, desc := i, desc + eg.Go(func() error { + if rewrittenDesc, err := rewriteImageLayerWithEpoch(ctx, cs, desc, opts.RefCfg.Compression, opts.Epoch); err != nil { + bklog.G(ctx).WithError(err).Warnf("failed to rewrite layer %d/%d to match source-date-epoch %d (%s)", + i+1, len(remoteDescriptors), opts.Epoch.Unix(), opts.Epoch.String()) + } else if rewrittenDesc != nil { + remoteDescriptors[i] = *rewrittenDesc + } + return nil + }) + } + if err := rewriteDone(eg.Wait()); err != nil { + return nil, err + } + return &solver.Remote{ + Provider: cs, + Descriptors: remoteDescriptors, + }, nil +} + func (ic *ImageWriter) commitDistributionManifest(ctx context.Context, opts *ImageCommitOpts, ref cache.ImmutableRef, config []byte, remote *solver.Remote, annotations *Annotations, inlineCache []byte, epoch *time.Time, sg session.Group) (*ocispecs.Descriptor, *ocispecs.Descriptor, error) { if len(config) == 0 { var err error diff --git a/frontend/dockerfile/dockerfile_test.go b/frontend/dockerfile/dockerfile_test.go index 8abe4f321004..4bb64914045c 100644 --- a/frontend/dockerfile/dockerfile_test.go +++ b/frontend/dockerfile/dockerfile_test.go @@ -43,12 +43,10 @@ import ( "github.com/moby/buildkit/solver/errdefs" "github.com/moby/buildkit/solver/pb" "github.com/moby/buildkit/util/contentutil" - "github.com/moby/buildkit/util/iohelper" "github.com/moby/buildkit/util/testutil" "github.com/moby/buildkit/util/testutil/httpserver" "github.com/moby/buildkit/util/testutil/integration" "github.com/moby/buildkit/util/testutil/workers" - digest "github.com/opencontainers/go-digest" ocispecs "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" "github.com/stretchr/testify/require" @@ -6549,9 +6547,16 @@ func testReproSourceDateEpoch(t *testing.T, sb integration.Sandbox) { if sb.Snapshotter() == "native" { t.Skip("the digest is not reproducible with the \"native\" snapshotter because hardlinks are processed in a different way: https://github.com/moby/buildkit/pull/3456#discussion_r1062650263") } + + registry, err := sb.NewRegistry() + if errors.Is(err, integration.ErrRequirements) { + t.Skip(err.Error()) + } + require.NoError(t, err) + f := getFrontend(t, sb) - tm := time.Date(2023, time.January, 10, 12, 34, 56, 0, time.UTC) + tm := time.Date(2023, time.January, 10, 12, 34, 56, 0, time.UTC) // 1673354096 t.Logf("SOURCE_DATE_EPOCH=%d", tm.Unix()) dockerfile := []byte(`# The base image cannot be busybox, due to https://github.com/moby/buildkit/issues/3455 @@ -6565,19 +6570,9 @@ RUN touch -d '2030-01-01 12:34:56' /foo-2030.1 RUN rm -f /foo.1 RUN rm -f /foo-2010.1 RUN rm -f /foo-2030.1 - -# Limit the timestamp upper bound to SOURCE_DATE_EPOCH. -# Workaround for https://github.com/moby/buildkit/issues/3180 -ARG SOURCE_DATE_EPOCH -RUN find $( ls / | grep -E -v "^(dev|mnt|proc|sys)$" ) -newermt "@${SOURCE_DATE_EPOCH}" -writable -xdev | xargs touch --date="@${SOURCE_DATE_EPOCH}" --no-dereference - -# Squashing is needed to apply the touched timestamps across multiple "RUN" instructions. -# This squashing also addresses non-reproducibility of whiteout timestamps (https://github.com/moby/buildkit/issues/3168). -FROM scratch -COPY --from=0 / / `) - const expectedDigest = "sha256:d286483eccf4d57c313a3f389cdc196e668d914d319c574b15aabdf1963c5eeb" + const expectedDigest = "sha256:29f2980a804038b0f910af98e9ddb18bfa4d5514995ee6bb4343ddf621a4e183" dir := integration.Tmpdir( t, @@ -6585,14 +6580,13 @@ COPY --from=0 / / ) defer os.RemoveAll(dir) - c, err := client.New(sb.Context(), sb.Address()) + ctx := sb.Context() + c, err := client.New(ctx, sb.Address()) require.NoError(t, err) defer c.Close() - outDigester := digest.SHA256.Digester() - outW := &iohelper.NopWriteCloser{Writer: outDigester.Hash()} - - _, err = f.Solve(sb.Context(), c, client.SolveOpt{ + target := registry + "/buildkit/testreprosourcedateepoch:" + fmt.Sprintf("%d", tm.Unix()) + solveOpt := client.SolveOpt{ FrontendAttrs: map[string]string{ "build-arg:SOURCE_DATE_EPOCH": fmt.Sprintf("%d", tm.Unix()), "platform": "linux/amd64", @@ -6603,17 +6597,62 @@ COPY --from=0 / / }, Exports: []client.ExportEntry{ { - Type: client.ExporterOCI, - Output: fixedWriteCloser(outW), + Type: client.ExporterImage, + Attrs: map[string]string{ + "name": target, + "push": "true", + "oci-mediatypes": "true", + "rewrite-timestamp": "true", + }, }, }, - }, nil) + CacheExports: []client.CacheOptionsEntry{ + { + Type: "registry", + Attrs: map[string]string{ + "ref": target + "-cache", + "oci-mediatypes": "true", + "image-manifest": "true", + }, + }, + }, + } + _, err = f.Solve(ctx, c, solveOpt, nil) require.NoError(t, err) - outDigest := outDigester.Digest().String() - t.Logf("OCI archive digest=%q", outDigest) + desc, manifest := readImage(t, ctx, target) + _, cacheManifest := readImage(t, ctx, target+"-cache") t.Log("The digest may change depending on the BuildKit version, the snapshotter configuration, etc.") - require.Equal(t, expectedDigest, outDigest) + require.Equal(t, expectedDigest, desc.Digest.String()) + // Image layers must have rewritten-timestamp + for _, l := range manifest.Layers { + require.Equal(t, fmt.Sprintf("%d", tm.Unix()), l.Annotations["buildkit/rewritten-timestamp"]) + } + // Cache layers must *not* have rewritten-timestamp + for _, l := range cacheManifest.Layers { + require.Empty(t, l.Annotations["buildkit/rewritten-timestamp"]) + } + + // Build again, but without rewrite-timestamp + solveOpt2 := solveOpt + delete(solveOpt2.Exports[0].Attrs, "rewrite-timestamp") + _, err = f.Solve(ctx, c, solveOpt2, nil) + require.NoError(t, err) + _, manifest2 := readImage(t, ctx, target) + for _, l := range manifest2.Layers { + require.Empty(t, l.Annotations["buildkit/rewritten-timestamp"]) + } +} + +//nolint:revive // context-as-argument: context.Context should be the first parameter of a function +func readImage(t *testing.T, ctx context.Context, ref string) (ocispecs.Descriptor, ocispecs.Manifest) { + desc, provider, err := contentutil.ProviderFromRef(ref) + require.NoError(t, err) + dt, err := content.ReadBlob(ctx, provider, desc) + require.NoError(t, err) + var manifest ocispecs.Manifest + require.NoError(t, json.Unmarshal(dt, &manifest)) + return desc, manifest } func testNilContextInSolveGateway(t *testing.T, sb integration.Sandbox) { diff --git a/util/contentutil/storewithprovider.go b/util/contentutil/storewithprovider.go new file mode 100644 index 000000000000..0b5df244f15a --- /dev/null +++ b/util/contentutil/storewithprovider.go @@ -0,0 +1,24 @@ +package contentutil + +import ( + "context" + + "github.com/containerd/containerd/content" + ocispecs "github.com/opencontainers/image-spec/specs-go/v1" +) + +func NewStoreWithProvider(cs content.Store, p content.Provider) content.Store { + return &storeWithProvider{Store: cs, p: p} +} + +type storeWithProvider struct { + content.Store + p content.Provider +} + +func (cs *storeWithProvider) ReaderAt(ctx context.Context, desc ocispecs.Descriptor) (content.ReaderAt, error) { + if ra, err := cs.p.ReaderAt(ctx, desc); err == nil { + return ra, nil + } + return cs.Store.ReaderAt(ctx, desc) +} diff --git a/util/converter/converter.go b/util/converter/converter.go index 5a1f10705273..6bb8aa858e9c 100644 --- a/util/converter/converter.go +++ b/util/converter/converter.go @@ -1,11 +1,13 @@ package converter import ( + "archive/tar" "bufio" "context" "fmt" "io" "sync" + "time" "github.com/containerd/containerd/content" "github.com/containerd/containerd/errdefs" @@ -14,6 +16,7 @@ import ( "github.com/moby/buildkit/identity" "github.com/moby/buildkit/util/bklog" "github.com/moby/buildkit/util/compression" + "github.com/moby/buildkit/util/converter/tarconverter" "github.com/moby/buildkit/util/iohelper" digest "github.com/opencontainers/go-digest" ocispecs "github.com/opencontainers/image-spec/specs-go/v1" @@ -23,9 +26,20 @@ import ( // New returns converter function according to the specified compression type. // If no conversion is needed, this returns nil without error. func New(ctx context.Context, cs content.Store, desc ocispecs.Descriptor, comp compression.Config) (converter.ConvertFunc, error) { - if needs, err := comp.Type.NeedsConversion(ctx, cs, desc); err != nil { + return NewWithRewriteTimestamp(ctx, cs, desc, comp, nil) +} + +// NewWithRewriteTimestamp returns converter function according to the specified compression type and the epoch. +// If no conversion is needed, this returns nil without error. +func NewWithRewriteTimestamp(ctx context.Context, cs content.Store, desc ocispecs.Descriptor, comp compression.Config, rewriteTimestamp *time.Time) (converter.ConvertFunc, error) { + needs, err := comp.Type.NeedsConversion(ctx, cs, desc) + if err != nil { return nil, errors.Wrapf(err, "failed to determine conversion needs") - } else if !needs { + } + if !needs && rewriteTimestamp != nil { + needs = desc.Annotations[labelRewrittenTimestamp] != fmt.Sprintf("%d", rewriteTimestamp.UTC().Unix()) + } + if !needs { // No conversion. No need to return an error here. return nil, nil } @@ -38,15 +52,17 @@ func New(ctx context.Context, cs content.Store, desc ocispecs.Descriptor, comp c c := conversion{target: comp} c.compress, c.finalize = comp.Type.Compress(ctx, comp) c.decompress = from.Decompress + c.rewriteTimestamp = rewriteTimestamp return (&c).convert, nil } type conversion struct { - target compression.Config - decompress compression.Decompressor - compress compression.Compressor - finalize compression.Finalizer + target compression.Config + decompress compression.Decompressor + compress compression.Compressor + finalize compression.Finalizer + rewriteTimestamp *time.Time } var bufioPool = sync.Pool{ @@ -55,6 +71,20 @@ var bufioPool = sync.Pool{ }, } +func rewriteTimestampInTarHeader(epoch time.Time) tarconverter.HeaderConverter { + return func(hdr *tar.Header) { + if hdr.ModTime.After(epoch) { + hdr.ModTime = epoch + } + if hdr.AccessTime.After(epoch) { + hdr.AccessTime = epoch + } + if hdr.ChangeTime.After(epoch) { + hdr.ChangeTime = epoch + } + } +} + func (c *conversion) convert(ctx context.Context, cs content.Store, desc ocispecs.Descriptor) (*ocispecs.Descriptor, error) { bklog.G(ctx).WithField("blob", desc).WithField("target", c.target).Debugf("converting blob to the target compression") // prepare the source and destination @@ -86,11 +116,17 @@ func (c *conversion) convert(ctx context.Context, cs content.Store, desc ocispec // convert this layer diffID := digest.Canonical.Digester() - rdr, err := c.decompress(ctx, cs, desc) + decR, err := c.decompress(ctx, cs, desc) if err != nil { return nil, err } - defer rdr.Close() + defer decR.Close() + rdr := decR + if c.rewriteTimestamp != nil { + tcR := tarconverter.NewReader(decR, rewriteTimestampInTarHeader(*c.rewriteTimestamp)) + defer tcR.Close() + rdr = tcR + } if _, err := io.Copy(zw, io.TeeReader(rdr, diffID.Hash())); err != nil { return nil, err } @@ -101,6 +137,9 @@ func (c *conversion) convert(ctx context.Context, cs content.Store, desc ocispec return nil, errors.Wrap(err, "failed to flush diff during conversion") } labelz[labels.LabelUncompressed] = diffID.Digest().String() // update diffID label + if c.rewriteTimestamp != nil { + labelz[labelRewrittenTimestamp] = fmt.Sprintf("%d", c.rewriteTimestamp.UTC().Unix()) + } if err = w.Commit(ctx, 0, "", content.WithLabels(labelz)); err != nil && !errdefs.IsAlreadyExists(err) { return nil, err } @@ -117,6 +156,9 @@ func (c *conversion) convert(ctx context.Context, cs content.Store, desc ocispec newDesc.Digest = info.Digest newDesc.Size = info.Size newDesc.Annotations = map[string]string{labels.LabelUncompressed: diffID.Digest().String()} + if c.rewriteTimestamp != nil { + newDesc.Annotations[labelRewrittenTimestamp] = fmt.Sprintf("%d", c.rewriteTimestamp.UTC().Unix()) + } if c.finalize != nil { a, err := c.finalize(ctx, cs) if err != nil { @@ -140,3 +182,5 @@ func (w *onceWriteCloser) Close() (err error) { }) return } + +const labelRewrittenTimestamp = "buildkit/rewritten-timestamp" diff --git a/util/converter/tarconverter/tarconverter.go b/util/converter/tarconverter/tarconverter.go new file mode 100644 index 000000000000..6942be841771 --- /dev/null +++ b/util/converter/tarconverter/tarconverter.go @@ -0,0 +1,49 @@ +package tarconverter + +import ( + "archive/tar" + "io" +) + +type HeaderConverter func(*tar.Header) + +// NewReader returns a reader that applies headerConverter. +// Forked from https://github.com/moby/moby/blob/v24.0.6/pkg/archive/copy.go#L308-L373 . +func NewReader(srcContent io.Reader, headerConverter HeaderConverter) io.ReadCloser { + rebased, w := io.Pipe() + + go func() { + srcTar := tar.NewReader(srcContent) + rebasedTar := tar.NewWriter(w) + + for { + hdr, err := srcTar.Next() + if err == io.EOF { + // Signals end of archive. + rebasedTar.Close() + w.Close() + return + } + if err != nil { + w.CloseWithError(err) + return + } + if headerConverter != nil { + headerConverter(hdr) + } + if err = rebasedTar.WriteHeader(hdr); err != nil { + w.CloseWithError(err) + return + } + + // Ignoring GoSec G110. See https://github.com/moby/moby/blob/v24.0.6/pkg/archive/copy.go#L355-L363 + //nolint:gosec // G110: Potential DoS vulnerability via decompression bomb (gosec) + if _, err = io.Copy(rebasedTar, srcTar); err != nil { + w.CloseWithError(err) + return + } + } + }() + + return rebased +} diff --git a/util/converter/tarconverter/tarconverter_test.go b/util/converter/tarconverter/tarconverter_test.go new file mode 100644 index 000000000000..ceea6968d1e3 --- /dev/null +++ b/util/converter/tarconverter/tarconverter_test.go @@ -0,0 +1,41 @@ +package tarconverter + +import ( + "archive/tar" + "bytes" + "io" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func createTar(t testing.TB, name string, b []byte) []byte { + buf := bytes.NewBuffer(nil) + tw := tar.NewWriter(buf) + hdr := tar.Header{ + Typeflag: tar.TypeReg, + Name: name, + Size: int64(len(b)), + Mode: 0o644, + ModTime: time.Now(), + } + assert.NoError(t, tw.WriteHeader(&hdr)) + _, err := tw.Write(b) + assert.NoError(t, err) + assert.NoError(t, tw.Close()) + return buf.Bytes() +} + +// https://github.com/moby/buildkit/pull/4057#issuecomment-1693484361 +func TestPaddingForReader(t *testing.T) { + inB := createTar(t, "foo", []byte("hi")) + assert.Equal(t, 2048, len(inB)) + r := NewReader(bytes.NewReader(inB), func(hdr *tar.Header) { + hdr.ModTime = time.Unix(0, 0) + }) + outB, err := io.ReadAll(r) + assert.NoError(t, err) + assert.NoError(t, r.Close()) + assert.Equal(t, len(inB), len(outB)) +}