From 7474caa7adb4c22d3d2a5682f8936e3649797c4b Mon Sep 17 00:00:00 2001 From: brharrington Date: Fri, 27 Sep 2024 08:27:58 -0500 Subject: [PATCH] core: remove cached hash code for ItemId (#1696) The id is generated from a hash function, typically SHA1 so we can just use a part of that for the hash code of the id object. Since data is sometimes partitioned and routed based on the most and least significant bits, we pick one of the middle bytes to reduce the liklihood of collisions. Benchmarks show it is about 25% slower to access the hash code this way, but it does provide some memory savings and compute savings when creating an id instance since the murmur hash doesn't need to be computed. --- .../com/netflix/atlas/core/model/ItemId.scala | 44 +++++++------ .../atlas/core/model/ItemIdSuite.scala | 61 ++++++++----------- .../atlas/core/model/ItemIdHashCode.scala | 60 ++++++++++++++++++ 3 files changed, 110 insertions(+), 55 deletions(-) create mode 100644 atlas-jmh/src/main/scala/com/netflix/atlas/core/model/ItemIdHashCode.scala diff --git a/atlas-core/src/main/scala/com/netflix/atlas/core/model/ItemId.scala b/atlas-core/src/main/scala/com/netflix/atlas/core/model/ItemId.scala index 4fe3aae30..46ee96323 100644 --- a/atlas-core/src/main/scala/com/netflix/atlas/core/model/ItemId.scala +++ b/atlas-core/src/main/scala/com/netflix/atlas/core/model/ItemId.scala @@ -16,28 +16,37 @@ package com.netflix.atlas.core.model import java.math.BigInteger - import com.netflix.atlas.core.util.Strings -import scala.util.hashing.MurmurHash3 +import java.lang.invoke.MethodHandles +import java.nio.ByteOrder /** - * Represents an identifier for a tagged item. + * Represents an identifier for a tagged item. If using for a hash map the + * bytes used for the id should come from a decent hash function as 4 bytes + * from the middle are used for the hash code of the id object. * * @param data * Bytes for the id. This is usually the results of computing a SHA1 hash * over a normalized representation of the tags. - * @param hc - * Precomputed hash code for the bytes. */ -class ItemId private (private val data: Array[Byte], private val hc: Int) - extends Comparable[ItemId] { +class ItemId private (private val data: Array[Byte]) extends Comparable[ItemId] { + + // Typically it should be 20 bytes for SHA1. Require at least 16 to avoid + // checks for other operations. + require(data.length >= 16) - override def hashCode(): Int = hc + override def hashCode(): Int = { + // Choose middle byte. The id should be generated using decent hash + // function so in theory any subset will do. In some cases data is + // routed based on the prefix or a modulo of the intValue. Choosing a + // byte toward the middle helps to mitigate that. + ItemId.intHandle.get(data, 12) + } override def equals(obj: Any): Boolean = { obj match { - case other: ItemId => hc == other.hc && java.util.Arrays.equals(data, other.data) + case other: ItemId => java.util.Arrays.equals(data, other.data) case _ => false } } @@ -60,21 +69,16 @@ class ItemId private (private val data: Array[Byte], private val hc: Int) def toBigInteger: BigInteger = new BigInteger(1, data) def intValue: Int = { - var result = 0 - val end = math.max(0, data.length - 4) - var i = data.length - 1 - var shift = 0 - while (i >= end) { - result |= (data(i) & 0xFF) << shift - i -= 1 - shift += 8 - } - result + ItemId.intHandle.get(data, data.length - 4) } } object ItemId { + // Helper to access integer from byte array + private val intHandle = + MethodHandles.byteArrayViewVarHandle(classOf[Array[Int]], ByteOrder.BIG_ENDIAN) + private val hexValueForByte = (0 until 256).toArray.map { i => Strings.zeroPad(i, 2) } @@ -84,7 +88,7 @@ object ItemId { * using MurmurHash3. */ def apply(data: Array[Byte]): ItemId = { - new ItemId(data, MurmurHash3.bytesHash(data)) + new ItemId(data) } /** diff --git a/atlas-core/src/test/scala/com/netflix/atlas/core/model/ItemIdSuite.scala b/atlas-core/src/test/scala/com/netflix/atlas/core/model/ItemIdSuite.scala index 9d18777e6..2e30fd3fc 100644 --- a/atlas-core/src/test/scala/com/netflix/atlas/core/model/ItemIdSuite.scala +++ b/atlas-core/src/test/scala/com/netflix/atlas/core/model/ItemIdSuite.scala @@ -19,44 +19,48 @@ import com.netflix.atlas.core.util.Hash import com.netflix.atlas.core.util.Strings import munit.FunSuite -import scala.util.hashing.MurmurHash3 - class ItemIdSuite extends FunSuite { + def testByteArray: Array[Byte] = { + (1 to 20).map(_.toByte).toArray + } + test("create from byte array") { - val bytes = Array(1.toByte, 2.toByte) + val bytes = testByteArray val id = ItemId(bytes) - assertEquals(id.hashCode(), MurmurHash3.bytesHash(bytes)) + assertEquals(id.hashCode(), 219025168) } test("equals") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) - val id2 = ItemId(Array(1.toByte, 2.toByte)) + val id1 = ItemId(testByteArray) + val id2 = ItemId(testByteArray) assertEquals(id1, id1) assertEquals(id1, id2) } test("not equals") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) - val id2 = ItemId(Array(1.toByte, 3.toByte)) + val id1 = ItemId(testByteArray) + val bytes = testByteArray + bytes(13) = 3.toByte // perturb byte used with hashing + val id2 = ItemId(bytes) assertNotEquals(id1, id2) assertNotEquals(id1.hashCode(), id2.hashCode()) } test("does not equal wrong object type") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) + val id1 = ItemId(testByteArray) assert(!id1.equals("foo")) } test("does not equal null") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) + val id1 = ItemId(testByteArray) assert(id1 != null) } test("toString") { - val bytes = Array(1.toByte, 2.toByte) + val bytes = testByteArray val id = ItemId(bytes) - assertEquals(id.toString, "0102") + assertEquals(id.toString, "0102030405060708090a0b0c0d0e0f1011121314") } test("toString sha1 bytes 0") { @@ -79,15 +83,15 @@ class ItemIdSuite extends FunSuite { } test("from String lower") { - val bytes = Array(10.toByte, 11.toByte) + val bytes = testByteArray val id = ItemId(bytes) - assertEquals(id, ItemId("0a0b")) + assertEquals(id, ItemId("0102030405060708090a0b0c0d0e0f1011121314")) } test("from String upper") { - val bytes = Array(10.toByte, 11.toByte) + val bytes = testByteArray val id = ItemId(bytes) - assertEquals(id, ItemId("0A0B")) + assertEquals(id, ItemId("0102030405060708090A0B0C0D0E0F1011121314")) } test("from String invalid") { @@ -97,15 +101,17 @@ class ItemIdSuite extends FunSuite { } test("compareTo equals") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) - val id2 = ItemId(Array(1.toByte, 2.toByte)) + val id1 = ItemId(testByteArray) + val id2 = ItemId(testByteArray) assertEquals(id1.compareTo(id1), 0) assertEquals(id1.compareTo(id2), 0) } test("compareTo less than/greater than") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) - val id2 = ItemId(Array(1.toByte, 3.toByte)) + val id1 = ItemId(testByteArray) + val bytes = testByteArray + bytes(bytes.length - 1) = 21.toByte + val id2 = ItemId(bytes) assert(id1.compareTo(id2) < 0) assert(id2.compareTo(id1) > 0) } @@ -116,19 +122,4 @@ class ItemIdSuite extends FunSuite { assertEquals(id.intValue, id.toBigInteger.intValue()) } } - - test("int value: one byte") { - val id = ItemId(Array(57.toByte)) - assertEquals(id.intValue, id.toBigInteger.intValue()) - } - - test("int value: two bytes") { - val id = ItemId(Array(1.toByte, 2.toByte)) - assertEquals(id.intValue, id.toBigInteger.intValue()) - } - - test("int value: three bytes") { - val id = ItemId(Array(1.toByte, 2.toByte, 0xFF.toByte)) - assertEquals(id.intValue, id.toBigInteger.intValue()) - } } diff --git a/atlas-jmh/src/main/scala/com/netflix/atlas/core/model/ItemIdHashCode.scala b/atlas-jmh/src/main/scala/com/netflix/atlas/core/model/ItemIdHashCode.scala new file mode 100644 index 000000000..cbf840997 --- /dev/null +++ b/atlas-jmh/src/main/scala/com/netflix/atlas/core/model/ItemIdHashCode.scala @@ -0,0 +1,60 @@ +/* + * Copyright 2014-2024 Netflix, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.netflix.atlas.core.model + +import org.openjdk.jmh.annotations.Benchmark +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.State +import org.openjdk.jmh.infra.Blackhole + +import scala.util.Random + +/** + * ``` + * > jmh:run -prof gc -prof stack -wi 5 -i 10 -f1 -t1 .*ItemIdHashCode.* + * ``` + * + * ItemId.hashCode: + * + * ``` + * Benchmark Mode Cnt Score Error Units + * before thrpt 10 1547433250.866 ± 52995197.574 ops/s + * after thrpt 10 1162791300.824 ± 36880550.752 ops/s + * ``` + * + * + * ItemId.intValue: + * + * ``` + * before thrpt 10 292282926.019 ± 2643333.767 ops/s + * after thrpt 10 1101391907.134 ± 14583585.092 ops/s + * ``` + */ +@State(Scope.Thread) +class ItemIdHashCode { + + private val id = ItemId(Random.nextBytes(20)) + + @Benchmark + def intValue(bh: Blackhole): Unit = { + bh.consume(id.intValue) + } + + @Benchmark + def hashCode(bh: Blackhole): Unit = { + bh.consume(id.hashCode()) + } +}