Skip to content

Commit

Permalink
Use fnv1a64 for tile hash (#253)
Browse files Browse the repository at this point in the history
  • Loading branch information
msbarry authored Jun 4, 2022
1 parent c39c667 commit b0f634b
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.OptionalInt;
import java.util.OptionalLong;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -53,7 +53,7 @@ void testStilInvalidWithOneTile() throws IOException {
VectorTile.encodeGeometry(point(0, 0)),
Map.of()
)));
writer.write(new TileEncodingResult(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), OptionalInt.empty()));
writer.write(new TileEncodingResult(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), OptionalLong.empty()));
}
assertInvalid(mbtiles);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.DoubleSummaryStatistics;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
Expand All @@ -34,23 +34,23 @@ public static void main(String[] args) throws IOException {
*/
int distinctTilesInPercent = arguments.getInteger("bench_distinct_tiles", "distinct tiles in percent", 10);
/*
* select avg(length(tile_data))
* from (select tile_data_id from tiles_shallow group by tile_data_id having count(*) = 1) as x
* select avg(length(tile_data))
* from (select tile_data_id from tiles_shallow group by tile_data_id having count(*) = 1) as x
* join tiles_data using(tile_data_id)
* => ~785 (Australia)
*/
int distinctTileDataSize =
arguments.getInteger("bench_distinct_tile_data_size", "distinct tile data size in bytes", 800);
/*
* select avg(length(tile_data))
* from (select tile_data_id from tiles_shallow group by tile_data_id having count(*) > 1) as x
* join tiles_shallow using(tile_data_id)
* select avg(length(tile_data))
* from (select tile_data_id from tiles_shallow group by tile_data_id having count(*) > 1) as x
* join tiles_shallow using(tile_data_id)
* join tiles_data using(tile_data_id)
* => ~93 (Australia)
*/
int dupeTileDataSize = arguments.getInteger("bench_dupe_tile_data_size", "dupe tile data size in bytes", 100);
/*
* select count(*) * 100.0 / sum(usage_count)
* select count(*) * 100.0 / sum(usage_count)
* from (select tile_data_id, count(*) as usage_count from tiles_shallow group by tile_data_id having count(*) > 1)
* => ~0.17% (Australia)
*/
Expand Down Expand Up @@ -107,11 +107,11 @@ private static void writeTiles(BatchedTileWriter writer, int tilesToWrite, int d
TileCoord coord = TileCoord.ofXYZ(x, y, z);
TileEncodingResult toWrite;
if (tilesWritten % 100 < distinctTilesInPercent) {
toWrite = new TileEncodingResult(coord, distinctTileData, OptionalInt.empty());
toWrite = new TileEncodingResult(coord, distinctTileData, OptionalLong.empty());
} else {
++dupeCounter;
int hash = dupeHashMod == 0 ? 0 : dupeCounter % dupeHashMod;
toWrite = new TileEncodingResult(coord, dupeTileData, OptionalInt.of(hash));
toWrite = new TileEncodingResult(coord, dupeTileData, OptionalLong.of(hash));
}

writer.write(toWrite);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -378,12 +378,12 @@ public TileCoord tileCoord() {
* <p>
* Used as an optimization to avoid writing the same (ocean) tiles over and over again.
*/
public int generateContentHash() {
int hash = Hashing.FNV1_32_INIT;
public long generateContentHash() {
long hash = Hashing.FNV1_64_INIT;
for (var feature : entries) {
byte layerId = extractLayerIdFromKey(feature.key());
hash = Hashing.fnv1a32(hash, layerId);
hash = Hashing.fnv1a32(hash, feature.value());
hash = Hashing.fnv1a64(hash, layerId);
hash = Hashing.fnv1a64(hash, feature.value());
}
return hash;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

import static com.fasterxml.jackson.annotation.JsonInclude.Include.NON_ABSENT;

import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.LongIntHashMap;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import com.onthegomap.planetiler.geo.GeoUtils;
import com.onthegomap.planetiler.geo.TileCoord;
import com.onthegomap.planetiler.util.Format;
import java.io.Closeable;
import java.io.IOException;
import java.nio.file.Path;
Expand All @@ -29,6 +30,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.DoubleStream;
Expand Down Expand Up @@ -509,6 +511,7 @@ private abstract class BatchedTableWriterBase<T> implements AutoCloseable {
private final boolean insertStmtInsertIgnore;
private final String insertStmtValuesPlaceHolder;
private final String insertStmtColumnsCsv;
private long count = 0;


protected BatchedTableWriterBase(String tableName, List<String> columns, boolean insertIgnore) {
Expand All @@ -523,6 +526,7 @@ protected BatchedTableWriterBase(String tableName, List<String> columns, boolean

/** Queue-up a write or flush to disk if enough are waiting. */
void write(T item) {
count++;
batch.add(item);
if (batch.size() >= batchLimit) {
flush(batchStatement);
Expand Down Expand Up @@ -561,6 +565,10 @@ private void flush(PreparedStatement statement) {
}
}

public long count() {
return count;
}

@Override
public void close() {
if (!batch.isEmpty()) {
Expand Down Expand Up @@ -660,6 +668,8 @@ public interface BatchedTileWriter extends AutoCloseable {

@Override
void close();

default void printStats() {}
}

private class BatchedNonCompactTileWriter implements BatchedTileWriter {
Expand All @@ -682,18 +692,18 @@ private class BatchedCompactTileWriter implements BatchedTileWriter {

private final BatchedTileShallowTableWriter batchedTileShallowTableWriter = new BatchedTileShallowTableWriter();
private final BatchedTileDataTableWriter batchedTileDataTableWriter = new BatchedTileDataTableWriter();
private final IntIntHashMap tileDataIdByHash = new IntIntHashMap(1_000);
private final LongIntHashMap tileDataIdByHash = new LongIntHashMap(1_000);

private int tileDataIdCounter = 1;

@Override
public void write(TileEncodingResult encodingResult) {
int tileDataId;
boolean writeData;
OptionalInt tileDataHashOpt = encodingResult.tileDataHash();
OptionalLong tileDataHashOpt = encodingResult.tileDataHash();

if (tileDataHashOpt.isPresent()) {
int tileDataHash = tileDataHashOpt.getAsInt();
long tileDataHash = tileDataHashOpt.getAsLong();
if (tileDataIdByHash.containsKey(tileDataHash)) {
tileDataId = tileDataIdByHash.get(tileDataHash);
writeData = false;
Expand All @@ -717,6 +727,17 @@ public void close() {
batchedTileShallowTableWriter.close();
batchedTileDataTableWriter.close();
}

@Override
public void printStats() {
if (LOGGER.isDebugEnabled()) {
var format = Format.defaultInstance();
LOGGER.debug("Shallow tiles written: {}", format.integer(batchedTileShallowTableWriter.count()));
LOGGER.debug("Tile data written: {} ({} omitted)", format.integer(batchedTileDataTableWriter.count()),
format.percent(1d - batchedTileDataTableWriter.count() * 1d / batchedTileShallowTableWriter.count()));
LOGGER.debug("Unique tile hashes: {}", format.integer(tileDataIdByHash.size()));
}
}
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.Queue;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
Expand Down Expand Up @@ -257,7 +257,7 @@ private void tileEncoder(Iterable<TileBatch> prev, Consumer<TileBatch> next) thr
* recomputing if the input hasn't changed.
*/
byte[] lastBytes = null, lastEncoded = null;
Integer lastTileDataHash = null;
Long lastTileDataHash = null;
boolean compactDb = config.compactDb();

for (TileBatch batch : prev) {
Expand All @@ -268,7 +268,7 @@ private void tileEncoder(Iterable<TileBatch> prev, Consumer<TileBatch> next) thr
FeatureGroup.TileFeatures tileFeatures = batch.in.get(i);
featuresProcessed.incBy(tileFeatures.getNumFeaturesProcessed());
byte[] bytes, encoded;
Integer tileDataHash;
Long tileDataHash;
if (tileFeatures.hasSameContents(last)) {
bytes = lastBytes;
encoded = lastEncoded;
Expand Down Expand Up @@ -299,7 +299,7 @@ private void tileEncoder(Iterable<TileBatch> prev, Consumer<TileBatch> next) thr
maxTileSizesByZoom[zoom].accumulate(encodedLength);
result.add(
new TileEncodingResult(tileFeatures.tileCoord(), bytes,
tileDataHash == null ? OptionalInt.empty() : OptionalInt.of(tileDataHash))
tileDataHash == null ? OptionalLong.empty() : OptionalLong.of(tileDataHash))
);
}
// hand result off to writer
Expand Down Expand Up @@ -361,6 +361,7 @@ private void tileWriter(Iterable<TileBatch> tileBatches) throws ExecutionExcepti
}
lastTileWritten.set(lastTile);
}
batchedTileWriter.printStats();
}

if (time != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import com.onthegomap.planetiler.geo.TileCoord;
import java.util.Arrays;
import java.util.Objects;
import java.util.OptionalInt;
import java.util.OptionalLong;

public record TileEncodingResult(
TileCoord coord,
byte[] tileData,
/** will always be empty in non-compact mode and might also be empty in compact mode */
OptionalInt tileDataHash
OptionalLong tileDataHash
) {

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,20 @@ public final class Hashing {
public static final int FNV1_32_INIT = 0x811c9dc5;
private static final int FNV1_PRIME_32 = 16777619;

/**
* Initial hash for the FNV-1 and FNV-1a 64-bit hash function.
*/
public static final long FNV1_64_INIT = 0xcbf29ce484222325L;
private static final long FNV1_PRIME_64 = 1099511628211L;

private Hashing() {}

/**
* Computes the hash using the FNV-1a 32-bit hash function, starting with the initial hash.
* <p>
* The hash generation must always start with {@link #FNV1_32_INIT} as initial hash but this version comes in handy
* when generating the hash for multiple bytes consecutively in a loop.
*
*
* @param initHash the initial hash
* @param data the data to generate the hash for
* @return the generated hash
Expand All @@ -35,12 +41,41 @@ public static int fnv1a32(int initHash, byte... data) {

/**
* Computes the hash using the FNV-1a 32-bit hash function.
*
*
* @param data the data to generate the hash for
* @return the hash
*/
public static int fnv1a32(byte... data) {
return fnv1a32(FNV1_32_INIT, data);
}

/**
* Computes the hash using the FNV-1a 64-bit hash function, starting with the initial hash.
* <p>
* The hash generation must always start with {@link #FNV1_64_INIT} as initial hash but this version comes in handy
* when generating the hash for multiple bytes consecutively in a loop.
*
* @param initHash the initial hash
* @param data the data to generate the hash for
* @return the generated hash
*/
public static long fnv1a64(long initHash, byte... data) {
long hash = initHash;
for (byte datum : data) {
hash ^= (datum & 0xff);
hash *= FNV1_PRIME_64;
}
return hash;
}

/**
* Computes the hash using the FNV-1a 64-bit hash function.
*
* @param data the data to generate the hash for
* @return the hash
*/
public static long fnv1a64(byte... data) {
return fnv1a64(FNV1_64_INIT, data);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
Expand Down Expand Up @@ -55,7 +55,7 @@ void testWriteTiles(int howMany, boolean skipIndexCreation, boolean optimize, bo
(byte) (dataBase >> 16),
(byte) (dataBase >> 24)
});
writer.write(new TileEncodingResult(entry.tile(), entry.bytes(), OptionalInt.of(dataHash)));
writer.write(new TileEncodingResult(entry.tile(), entry.bytes(), OptionalLong.of(dataHash)));
expected.add(entry);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.OptionalInt;
import java.util.OptionalLong;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -53,7 +53,7 @@ void testValidWithNameAndOneTile() throws IOException {
VectorTile.encodeGeometry(point(0, 0)),
Map.of()
)));
writer.write(new TileEncodingResult(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), OptionalInt.empty()));
writer.write(new TileEncodingResult(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), OptionalLong.empty()));
}
assertValid(mbtiles);
}
Expand All @@ -77,7 +77,7 @@ void testInvalidGeometry() throws IOException {
)),
Map.of()
)));
writer.write(new TileEncodingResult(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), OptionalInt.empty()));
writer.write(new TileEncodingResult(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), OptionalLong.empty()));
}
assertInvalid(mbtiles);
}
Expand Down
Loading

0 comments on commit b0f634b

Please sign in to comment.