Skip to content

Commit

Permalink
Merge pull request #472 from restic/update-chunker
Browse files Browse the repository at this point in the history
Update chunker
  • Loading branch information
fd0 committed Feb 24, 2016
2 parents d84dec4 + 2ce49ea commit 77d85ce
Show file tree
Hide file tree
Showing 11 changed files with 351 additions and 302 deletions.
18 changes: 9 additions & 9 deletions src/restic/archiver.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package restic

import (
"crypto/sha256"
"bytes"
"encoding/json"
"fmt"
"io"
Expand All @@ -11,13 +11,14 @@ import (
"sync"
"time"

"github.com/restic/chunker"
"restic/backend"
"restic/debug"
"restic/pack"
"restic/pipe"
"restic/repository"

"github.com/restic/chunker"

"github.com/juju/errors"
)

Expand Down Expand Up @@ -154,12 +155,11 @@ type saveResult struct {
bytes uint64
}

func (arch *Archiver) saveChunk(chunk *chunker.Chunk, p *Progress, token struct{}, file *os.File, resultChannel chan<- saveResult) {
hash := chunk.Digest
id := backend.ID{}
copy(id[:], hash)
func (arch *Archiver) saveChunk(chunk chunker.Chunk, p *Progress, token struct{}, file *os.File, resultChannel chan<- saveResult) {
defer freeBuf(chunk.Data)

err := arch.Save(pack.Data, id, chunk.Length, chunk.Reader(file))
id := backend.Hash(chunk.Data)
err := arch.Save(pack.Data, id, chunk.Length, bytes.NewReader(chunk.Data))
// TODO handle error
if err != nil {
panic(err)
Expand Down Expand Up @@ -220,11 +220,11 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) error {
return err
}

chnker := chunker.New(file, arch.repo.Config.ChunkerPolynomial, sha256.New())
chnker := chunker.New(file, arch.repo.Config.ChunkerPolynomial)
resultChannels := [](<-chan saveResult){}

for {
chunk, err := chnker.Next()
chunk, err := chnker.Next(getBuf())
if err == io.EOF {
break
}
Expand Down
55 changes: 21 additions & 34 deletions src/restic/archiver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@ package restic_test

import (
"bytes"
"crypto/sha256"
"io"
"testing"
"time"

"github.com/restic/chunker"
"restic"
"restic/backend"
"restic/checker"
"restic/crypto"
"restic/pack"
"restic/repository"
. "restic/test"

"github.com/restic/chunker"
)

var testPol = chunker.Pol(0x3DA3358B4DC173)
Expand All @@ -24,17 +24,12 @@ type Rdr interface {
io.ReaderAt
}

type chunkedData struct {
buf []byte
chunks []*chunker.Chunk
}

func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) {
rd.Seek(0, 0)
ch := chunker.New(rd, testPol, sha256.New())
ch := chunker.New(rd, testPol)

for {
chunk, err := ch.Next()
chunk, err := ch.Next(buf)

if err == io.EOF {
break
Expand All @@ -43,12 +38,10 @@ func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.K
OK(b, err)

// reduce length of buf
buf = buf[:chunk.Length]
n, err := io.ReadFull(chunk.Reader(rd), buf)
OK(b, err)
Assert(b, uint(n) == chunk.Length, "invalid length: got %d, expected %d", n, chunk.Length)
Assert(b, uint(len(chunk.Data)) == chunk.Length,
"invalid length: got %d, expected %d", len(chunk.Data), chunk.Length)

_, err = crypto.Encrypt(key, buf2, buf)
_, err = crypto.Encrypt(key, buf2, chunk.Data)
OK(b, err)
}
}
Expand All @@ -72,18 +65,16 @@ func BenchmarkChunkEncrypt(b *testing.B) {
}

func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) {
ch := chunker.New(rd, testPol, sha256.New())
ch := chunker.New(rd, testPol)

for {
chunk, err := ch.Next()
chunk, err := ch.Next(buf)
if err == io.EOF {
break
}

// reduce length of chunkBuf
buf = buf[:chunk.Length]
io.ReadFull(chunk.Reader(rd), buf)
crypto.Encrypt(key, buf, buf)
crypto.Encrypt(key, chunk.Data, chunk.Data)
}
}

Expand Down Expand Up @@ -258,8 +249,7 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) {
duplication := 7

arch := restic.NewArchiver(repo)
data, chunks := getRandomData(seed, dataSizeMb*1024*1024)
reader := bytes.NewReader(data)
chunks := getRandomData(seed, dataSizeMb*1024*1024)

errChannels := [](<-chan error){}

Expand All @@ -272,18 +262,15 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) {
errChan := make(chan error)
errChannels = append(errChannels, errChan)

go func(reader *bytes.Reader, c *chunker.Chunk, errChan chan<- error) {
go func(c chunker.Chunk, errChan chan<- error) {
barrier <- struct{}{}

hash := c.Digest
id := backend.ID{}
copy(id[:], hash)

time.Sleep(time.Duration(hash[0]))
err := arch.Save(pack.Data, id, c.Length, c.Reader(reader))
id := backend.Hash(c.Data)
time.Sleep(time.Duration(id[0]))
err := arch.Save(pack.Data, id, c.Length, bytes.NewReader(c.Data))
<-barrier
errChan <- err
}(reader, c, errChan)
}(c, errChan)
}
}

Expand All @@ -298,20 +285,20 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) {
assertNoUnreferencedPacks(t, chkr)
}

func getRandomData(seed int, size int) ([]byte, []*chunker.Chunk) {
func getRandomData(seed int, size int) []chunker.Chunk {
buf := Random(seed, size)
chunks := []*chunker.Chunk{}
chunker := chunker.New(bytes.NewReader(buf), testPol, sha256.New())
var chunks []chunker.Chunk
chunker := chunker.New(bytes.NewReader(buf), testPol)

for {
c, err := chunker.Next()
c, err := chunker.Next(nil)
if err == io.EOF {
break
}
chunks = append(chunks, c)
}

return buf, chunks
return chunks
}

func createAndInitChecker(t *testing.T, repo *repository.Repository) *checker.Checker {
Expand Down
21 changes: 21 additions & 0 deletions src/restic/buffer_pool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package restic

import (
"sync"

"github.com/restic/chunker"
)

var bufPool = sync.Pool{
New: func() interface{} {
return make([]byte, chunker.MinSize)
},
}

func getBuf() []byte {
return bufPool.Get().([]byte)
}

func freeBuf(data []byte) {
bufPool.Put(data)
}
5 changes: 2 additions & 3 deletions src/restic/repository/packer_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package repository
import (
"sync"

"github.com/restic/chunker"
"restic/backend"
"restic/crypto"
"restic/debug"
Expand All @@ -18,8 +17,8 @@ type packerManager struct {
packs []*pack.Packer
}

const minPackSize = 4 * chunker.MiB
const maxPackSize = 16 * chunker.MiB
const minPackSize = 4 * 1024 * 1024
const maxPackSize = 16 * 1024 * 1024
const maxPackers = 200

// findPacker returns a packer for a new blob of size bytes. Either a new one is
Expand Down
4 changes: 2 additions & 2 deletions vendor/manifest
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
{
"importpath": "github.com/restic/chunker",
"repository": "https://github.com/restic/chunker",
"revision": "fc45043175c38d59374024a38fb7123c40a64f20",
"branch": "HEAD"
"revision": "16c849a106e0a50d658e8f5e49a01f6728f4f92c",
"branch": "master"
},
{
"importpath": "golang.org/x/crypto/pbkdf2",
Expand Down
9 changes: 5 additions & 4 deletions vendor/src/github.com/restic/chunker/README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
[![GoDoc](https://godoc.org/github.com/restic/chunker?status.svg)](http://godoc.org/github.com/restic/chunker)
[![Build Status](https://travis-ci.org/restic/chunker.svg?branch=master)](https://travis-ci.org/restic/chunker)

Content Defined Chunking (CDC) based on a rolling Rabin Checksum.

Part of https://github.com/restic/restic.
The package `chunker` implements content-defined-chunking (CDC) based on a
rolling Rabin Hash. The library is part of the [restic backup
program](https://github.com/restic/restic).

An introduction to Content Defined Chunking can be found in the restic blog
post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc/).
post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc).

You can find the API documentation at
https://godoc.org/github.com/restic/chunker
Loading

0 comments on commit 77d85ce

Please sign in to comment.