aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/dev/morling/onebrc
diff options
context:
space:
mode:
authorElliot Barlas <elliotbarlas@gmail.com>2024-01-07 01:05:18 -0800
committerGitHub <noreply@github.com>2024-01-07 10:05:18 +0100
commitc13997c9e0e67dc7cbfe3aae2b72bb80998d6492 (patch)
tree725d9ab5f73013649e4401df3464b6922ebbf3b5 /src/main/java/dev/morling/onebrc
parentaa0395d01bd800d82dc83b39df27c688e1f03363 (diff)
Continue unrolling and inlining value parser. Make targeted use of ByteBuffer.getInt() instead of ByteBuffer.get(). Switch from GraalVM CE to GraalVM. (#201)
Diffstat (limited to 'src/main/java/dev/morling/onebrc')
-rw-r--r--src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java121
1 files changed, 78 insertions, 43 deletions
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java b/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java
index e4612e6..63ff69f 100644
--- a/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java
@@ -18,6 +18,7 @@ package dev.morling.onebrc;
import java.io.IOException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
@@ -28,7 +29,7 @@ import java.util.TreeMap;
public class CalculateAverage_ebarlas {
- private static final int MAX_KEY_SIZE = 100 * 4; // max 4 bytes per UTF-8 char
+ private static final int MAX_KEY_SIZE = 100;
private static final int HASH_FACTOR = 433;
private static final int HASH_TBL_SIZE = 16_383; // range of allowed hash values, inclusive
@@ -52,7 +53,7 @@ public class CalculateAverage_ebarlas {
var pSize = pEnd - pStart;
Runnable r = () -> {
try {
- var buffer = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize);
+ var buffer = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize).order(ByteOrder.LITTLE_ENDIAN);
partitions[pIdx] = processBuffer(buffer, pIdx == 0);
}
catch (IOException e) {
@@ -113,7 +114,7 @@ public class CalculateAverage_ebarlas {
var merged = mergeFooterAndHeader(pPrev.footer, pNext.header);
if (merged != null) {
if (merged[merged.length - 1] == '\n') { // fold into prev partition
- doProcessBuffer(ByteBuffer.wrap(merged), true, pPrev.stats);
+ doProcessBuffer(ByteBuffer.wrap(merged).order(ByteOrder.LITTLE_ENDIAN), true, pPrev.stats);
}
else { // no newline appeared in partition, carry forward
pNext.footer = merged;
@@ -142,56 +143,90 @@ public class CalculateAverage_ebarlas {
private static Partition doProcessBuffer(ByteBuffer buffer, boolean first, Stats[] stats) {
var header = first ? null : readHeader(buffer);
var keyStart = reallyDoProcessBuffer(buffer, stats);
- var footer = keyStart < buffer.position() ? readFooter(buffer, keyStart) : null;
+ var footer = keyStart < buffer.limit() ? readFooter(buffer, keyStart) : null;
return new Partition(header, footer, stats);
}
private static int reallyDoProcessBuffer(ByteBuffer buffer, Stats[] stats) {
var keyBuf = new byte[MAX_KEY_SIZE]; // buffer for key
- var keyPos = 0; // current position in key buffer
- var keyHash = 0; // accumulating hash of key
- var keyStart = buffer.position(); // start of key in buffer used for footer calc
- try { // abort with exception to avoid hasRemaining() calls
- while (true) {
- var b = buffer.get();
- if (b != ';') {
- keyHash = HASH_FACTOR * keyHash + b;
- keyBuf[keyPos++] = b;
- }
- else {
- var idx = keyHash & HASH_TBL_SIZE;
- var st = stats[idx];
- if (st == null) { // nothing in table, eagerly claim spot
- st = stats[idx] = newStats(keyBuf, keyPos, keyHash);
+ int keyStart = 0; // start of key in buffer used for footer calc
+ try { // abort with exception to allow optimistic line processing
+ while (true) { // one line per iteration
+ keyStart = buffer.position(); // preserve line start
+ int n = buffer.getInt(); // first four bytes of key
+ byte b1 = (byte) (n & 0xFF);
+ byte b2 = (byte) ((n >> 8) & 0xFF);
+ byte b3 = (byte) ((n >> 16) & 0xFF);
+ byte b = (byte) ((n >> 24) & 0xFF);
+ int keyPos;
+ int keyHash = keyBuf[0] = b1;
+ if (b2 != ';' && b3 != ';') { // true for keys of length 3 or more
+ keyBuf[1] = b2;
+ keyBuf[2] = b3;
+ keyHash = HASH_FACTOR * (HASH_FACTOR * keyHash + b2) + b3;
+ keyPos = 3;
+ while (b != ';') {
+ keyHash = HASH_FACTOR * keyHash + b;
+ keyBuf[keyPos++] = b;
+ b = buffer.get();
}
- else if (!Arrays.equals(st.key, 0, st.key.length, keyBuf, 0, keyPos)) {
- st = findInTable(stats, keyHash, keyBuf, keyPos);
+ }
+ else { // slow path, rewind and consume byte-by-byte
+ buffer.position(keyStart + 1);
+ keyPos = 1;
+ while ((b = buffer.get()) != ';') {
+ keyHash = HASH_FACTOR * keyHash + b;
+ keyBuf[keyPos++] = b;
}
- var negative = false;
- b = buffer.get(); // digit or dash
- if (b == '-') {
- negative = true;
- b = buffer.get(); // digit after neg
+ }
+ var idx = keyHash & HASH_TBL_SIZE;
+ var st = stats[idx];
+ if (st == null) { // nothing in table, eagerly claim spot
+ st = stats[idx] = newStats(keyBuf, keyPos, keyHash);
+ }
+ else if (!Arrays.equals(st.key, 0, st.key.length, keyBuf, 0, keyPos)) {
+ st = findInTable(stats, keyHash, keyBuf, keyPos);
+ }
+ var value = buffer.getInt();
+ b = (byte) (value & 0xFF); // digit or dash
+ int val;
+ if (b == '-') { // dash branch
+ val = ((byte) ((value >> 8) & 0xFF)) - '0'; // digit after dash
+ b = (byte) ((value >> 16) & 0xFF); // second digit or decimal
+ if (b != '.') { // second digit
+ val = val * 10 + (b - '0'); // calc second digit
+ // skip decimal (at >> 24)
+ b = buffer.get(); // digit after decimal
+ val = val * 10 + (b - '0'); // calc digit after decimal
}
- var val = b - '0';
- b = buffer.get(); // second digit or decimal
- if (b != '.') {
- val = val * 10 + (b - '0');
- buffer.get(); // decimal
+ else { // decimal branch
+ // skip decimal (at >> 16)
+ b = (byte) ((value >> 24) & 0xFF); // digit after decimal
+ val = val * 10 + (b - '0'); // calc digit after decimal
}
- val = val * 10 + (buffer.get() - '0'); // digit after decimal
buffer.get(); // newline
- var v = negative ? -val : val;
- st.min = Math.min(st.min, v);
- st.max = Math.max(st.max, v);
- st.sum += v;
- st.count++;
- keyStart = buffer.position(); // preserve line start
- b = buffer.get(); // first byte of key
- keyHash = b;
- keyBuf[0] = b;
- keyPos = 1;
+ val = -val;
+ }
+ else { // first digit branch
+ val = b - '0'; // calc first digit
+ b = (byte) ((value >> 8) & 0xFF); // second digit or decimal
+ if (b != '.') { // second digit branch
+ val = val * 10 + (b - '0'); // calc second digit
+ // skip decimal (at >> 16)
+ b = (byte) ((value >> 24) & 0xFF); // digit after decimal
+ val = val * 10 + (b - '0'); // calc digit after decimal
+ buffer.get(); // newline
+ }
+ else { // decimal branch
+ b = (byte) ((value >> 16) & 0xFF); // digit after decimal
+ val = val * 10 + (b - '0'); // calc digit after decimal
+ // skip newline (at >> 24)
+ }
}
+ st.min = Math.min(st.min, val);
+ st.max = Math.max(st.max, val);
+ st.sum += val;
+ st.count++;
}
}
catch (BufferUnderflowException ignore) {
@@ -220,7 +255,7 @@ public class CalculateAverage_ebarlas {
}
private static byte[] readFooter(ByteBuffer buffer, int lineStart) { // read from line start to current pos (end-of-input)
- var footer = new byte[buffer.position() - lineStart];
+ var footer = new byte[buffer.limit() - lineStart];
buffer.get(lineStart, footer, 0, footer.length);
return footer;
}