aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/dev/morling/onebrc
diff options
context:
space:
mode:
authorRichard Startin <richardstartin@apache.org>2024-01-04 17:19:56 +0000
committerGitHub <noreply@github.com>2024-01-04 18:19:56 +0100
commitc3411f60230870f47e09d0d679be92326f8aa9f9 (patch)
tree10b64ff9eeb54a53ea03f33a91163dee513dc82a /src/main/java/dev/morling/onebrc
parenta09fa928db9ecba7a2cfaedd264d35c6cd63970c (diff)
Richard Startin: Adopt @spullara's double parsing code;
* increase chunk size * simplify and tune parameters
Diffstat (limited to 'src/main/java/dev/morling/onebrc')
-rw-r--r--src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java77
1 files changed, 28 insertions, 49 deletions
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java b/src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java
index c7f430b..911ecf9 100644
--- a/src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java
@@ -52,6 +52,26 @@ public class CalculateAverage_richardstartin {
return new String(bytes, StandardCharsets.UTF_8);
}
+ static double parseTemperature(ByteBuffer slice) {
+ // credit: adapted from spullara's submission
+ int value = 0;
+ int negative = 1;
+ int i = 0;
+ while (i != slice.limit()) {
+ byte b = slice.get(i++);
+ switch (b) {
+ case '-':
+ negative = -1;
+ case '.':
+ break;
+ default:
+ value = 10 * value + (b - '0');
+ }
+ }
+ value *= negative;
+ return value / 10.0;
+ }
+
@FunctionalInterface
interface IndexedStringConsumer {
void accept(String value, int index);
@@ -60,7 +80,7 @@ public class CalculateAverage_richardstartin {
/** Maps text to an integer encoding. Adapted from async-profiler. */
public static class Dictionary {
- private static final int ROW_BITS = 7;
+ private static final int ROW_BITS = 12;
private static final int ROWS = (1 << ROW_BITS);
private static final int CELLS = 3;
private static final int TABLE_CAPACITY = (ROWS * CELLS);
@@ -90,10 +110,10 @@ public class CalculateAverage_richardstartin {
forEach(this.table, consumer);
}
- public int encode(long hash, ByteBuffer slice) {
+ public int encode(int hash, ByteBuffer slice) {
Table table = this.table;
while (true) {
- int rowIndex = (int)(Math.abs(hash) % ROWS);
+ int rowIndex = Math.abs(hash) % ROWS;
Row row = table.rows[rowIndex];
for (int c = 0; c < CELLS; c++) {
ByteBuffer storedKey = row.keys.get(c);
@@ -111,7 +131,7 @@ public class CalculateAverage_richardstartin {
}
}
table = row.getOrCreateNextTable(this::nextBaseIndex);
- hash = Long.rotateRight(hash, ROW_BITS);
+ hash = Integer.rotateRight(hash, ROW_BITS);
}
}
@@ -207,43 +227,6 @@ public class CalculateAverage_richardstartin {
return buffer.limit();
}
- private static long hash(ByteBuffer slice) {
- long hash = slice.limit() + PRIME_5 + 0x123456789abcdef1L;
- int i = 0;
- for (; i + Long.BYTES < slice.limit(); i += Long.BYTES) {
- hash = hashLong(hash, slice.getLong(i));
- }
- long part = 0L;
- for (; i < slice.limit(); i++) {
- part = (part >>> 8) | ((slice.get(i) & 0xFFL) << 56);
- }
- hash = hashLong(hash, part);
- return mix(hash);
- }
-
- static final long PRIME_1 = 0x9E3779B185EBCA87L;
- static final long PRIME_2 = 0xC2B2AE3D27D4EB4FL;
- static final long PRIME_3 = 0x165667B19E3779F9L;
- static final long PRIME_4 = 0x85EBCA77C2B2AE63L;
- static final long PRIME_5 = 0x27D4EB2F165667C5L;
-
- private static long hashLong(long hash, long k) {
- k *= PRIME_2;
- k = Long.rotateLeft(k, 31);
- k *= PRIME_1;
- hash ^= k;
- return Long.rotateLeft(hash, 27) * PRIME_1 + PRIME_4;
- }
-
- private static long mix(long hash) {
- hash ^= hash >>> 33;
- hash *= PRIME_2;
- hash ^= hash >>> 29;
- hash *= PRIME_3;
- hash ^= hash >>> 32;
- return hash;
- }
-
static class Page {
static final int PAGE_SIZE = 1024;
@@ -311,16 +294,12 @@ public class CalculateAverage_richardstartin {
ByteBuffer key = slice.slice(offset, nextSeparator - offset).order(ByteOrder.LITTLE_ENDIAN);
// find the global dictionary code to aggregate,
// making this code global allows easy merging
- int dictId = dictionary.encode(hash(key), key);
+ int dictId = dictionary.encode(key.hashCode(), key);
offset = nextSeparator + 1;
int newLine = findIndexOf(slice, offset, NEW_LINE);
// parse the double
- // todo do this without allocating a string, could use a fast parsing falgorithm
- var bytes = new byte[newLine - offset];
- slice.get(offset, bytes);
- var string = new String(bytes, StandardCharsets.US_ASCII);
- double d = Double.parseDouble(string);
+ double d = parseTemperature(slice.slice(offset, newLine - offset));
Page.update(pages, dictId, d);
@@ -351,7 +330,7 @@ public class CalculateAverage_richardstartin {
protected double[][] compute() {
if (min == max) {
// fixme - hardcoded to problem size
- var pages = new double[1024][];
+ var pages = new double[600][];
var slice = slices.get(min);
computeSlice(slice, pages);
return pages;
@@ -368,7 +347,7 @@ public class CalculateAverage_richardstartin {
}
public static void main(String[] args) throws IOException {
- int maxChunkSize = 10 << 20; // 10MiB
+ int maxChunkSize = 250 << 20; // 250MiB
try (var raf = new RandomAccessFile(FILE, "r");
var channel = raf.getChannel()) {
long size = channel.size();