/* * Copyright 2023 The original authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package dev.morling.onebrc; import jdk.incubator.vector.*; import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; import java.lang.foreign.ValueLayout; import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.file.Path; import java.util.ArrayList; import java.util.Iterator; import java.util.TreeMap; import java.util.stream.Collector; import java.util.stream.Collectors; import static java.lang.Double.doubleToRawLongBits; import static java.lang.Double.longBitsToDouble; import static java.lang.foreign.ValueLayout.*; /** * Broad experiments in this implementation: * - Memory-Map the file with new MemorySegments * - Use SIMD/vectorized search for the semicolon and new line feeds * - Use SIMD/vectorized comparison for the 'key' *

* Absolute stupid things / performance left on the table * - Single Threaded! Multi threading planned. * - The hash map/table is super basic. * - Hash table implementation / hashing has no resizing and is quite basic * - Zero time spend on profiling =) *

*

* Cheats used: * - Only works with Unix line feed \n * - double parsing is only accepting XX.X and X.X * - HashMap has no resizing, check, horrible hash etc. * - Used the double parsing from yemreinci */ public class CalculateAverage_gamlerhart { private static final String FILE = "./measurements.txt"; final static VectorSpecies byteVec = ByteVector.SPECIES_PREFERRED; final static Vector zero = byteVec.zero(); final static int vecLen = byteVec.length(); final static Vector semiColon = byteVec.broadcast(';'); final static VectorMask allTrue = byteVec.maskAll(true); final static ValueLayout.OfInt INT_UNALIGNED_BIG_ENDIAN = ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.BIG_ENDIAN); public static void main(String[] args) throws Exception { try (var arena = Arena.ofShared(); FileChannel fc = FileChannel.open(Path.of(FILE))) { long fileSize = fc.size(); MemorySegment fileContent = fc.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, arena); ArrayList

sections = splitFileIntoSections(fileSize, fileContent); var loopBound = byteVec.loopBound(fileSize) - vecLen; var result = sections.stream() .parallel() .map(s -> { return parseSection(s.start, s.end, loopBound, fileContent); }); var measurements = new TreeMap(); result.forEachOrdered(m -> { m.fillMerge(fileContent, measurements); }); System.out.println(measurements); } } private static PrivateHashMap parseSection(long start, long end, long loopBound, MemorySegment fileContent) { var map = new PrivateHashMap(); for (long i = start; i < end;) { long nameStart = i; int simdSearchEnd = 0; int nameLen = 0; // Vectorized Search if (i < loopBound) { do { var vec = byteVec.fromMemorySegment(fileContent, i, ByteOrder.BIG_ENDIAN); var hasSemi = vec.eq(semiColon); simdSearchEnd = hasSemi.firstTrue(); i += simdSearchEnd; nameLen += simdSearchEnd; } while (simdSearchEnd == vecLen && i < loopBound); } // Left-over search while (loopBound <= i && fileContent.get(JAVA_BYTE, i) != ';') { nameLen++; i++; } i++; // Consume ; // Copied from yemreinci. I mostly wanted to experiment the vector math, not with parsing =) double val; { boolean negative = false; if ((fileContent.get(JAVA_BYTE, i)) == '-') { negative = true; i++; } byte b; double temp; if ((b = fileContent.get(JAVA_BYTE, i + 1)) == '.') { // temperature is in either XX.X or X.X form temp = (fileContent.get(JAVA_BYTE, i) - '0') + (fileContent.get(JAVA_BYTE, i + 2) - '0') / 10.0; i += 3; } else { temp = (fileContent.get(JAVA_BYTE, i) - '0') * 10 + (b - '0') + (fileContent.get(JAVA_BYTE, i + 3) - '0') / 10.0; i += 4; } val = (negative ? -temp : temp); } i++; // Consume \n map.add(fileContent, nameStart, nameLen, val); } return map; } private static ArrayList
splitFileIntoSections(long fileSize, MemorySegment fileContent) { var cpuCount = Runtime.getRuntime().availableProcessors(); var roughChunkSize = fileSize / cpuCount; ArrayList
sections = new ArrayList<>(cpuCount); for (long sStart = 0; sStart < fileSize;) { var endGuess = Math.min(sStart + roughChunkSize, fileSize); for (; endGuess < fileSize && fileContent.get(JAVA_BYTE, endGuess) != '\n'; endGuess++) { } sections.add(new Section(sStart, endGuess)); sStart = endGuess + 1; } return sections; } private static class PrivateHashMap { private static final int SIZE_SHIFT = 14; public static final int SIZE = 1 << SIZE_SHIFT; public static int MASK = 0xFFFFFFFF >>> (32 - SIZE_SHIFT); public static long SHIFT_POS = 16; public static long MASK_POS = 0xFFFFFFFFFFFF0000L; public static long MASK_LEN = 0x000000000000FFFFL; // Encoding: // - Key: long // - 48 bits index, 16 bits length final long[] keys = new long[SIZE]; final Value[] values = new Value[SIZE]; private class Value { public Value(double min, double max, double sum, long count) { this.min = min; this.max = max; this.sum = sum; this.count = count; } public double min; public double max; public double sum; public long count; } // int debug_size = 0; // int debug_reprobeMax = 0; public PrivateHashMap() { } public void add(MemorySegment file, long pos, int len, double val) { int hashCode = calculateHash(file, pos, len); doAdd(file, hashCode, pos, len, val); } private static int calculateHash(MemorySegment file, long pos, int len) { if (len > 4) { return file.get(INT_UNALIGNED_BIG_ENDIAN, pos) + 31 * len; } else { int hashCode = len; int i = 0; for (; i < len; i++) { int v = file.get(JAVA_BYTE, pos + i); hashCode = 31 * hashCode + v; } return hashCode; } } private void doAdd(MemorySegment file, int hash, long pos, int len, double val) { int slot = hash & MASK; for (var probe = 0; probe < 20000; probe++) { var iSl = ((slot + probe) & MASK); var slotEntry = keys[iSl]; var emtpy = slotEntry == 0; if (emtpy) { long keyInfo = pos << SHIFT_POS | len; keys[iSl] = keyInfo; values[iSl] = new Value(val, val, val, 1); // debug_size++; return; } else if (isSameEntry(file, slotEntry, pos, len)) { var vE = values[iSl]; vE.min = Math.min(vE.min, val); vE.max = Math.max(vE.max, val); vE.sum = vE.sum + val; vE.count++; return; } else { // long keyPos = (slotEntry & MASK_POS) >> SHIFT_POS; // int keyLen = (int) (slotEntry & MASK_LEN); // System.out.println("Colliding " + new String(file.asSlice(pos,len).toArray(ValueLayout.JAVA_BYTE)) + // " with key" + new String(file.asSlice(keyPos,keyLen).toArray(ValueLayout.JAVA_BYTE)) + // " hash " + hash + " slot " + slot + "+" + probe + " at " + iSl); // debug_reprobeMax = Math.max(debug_reprobeMax, probe); } } throw new IllegalStateException("More than 20000 reprobes"); // throw new IllegalStateException("More than 100 reprobes: At " + debug_size + ""); } private boolean isSameEntry(MemorySegment file, long slotEntry, long pos, int len) { long keyPos = (slotEntry & MASK_POS) >> SHIFT_POS; int keyLen = (int) (slotEntry & MASK_LEN); var isSame = len == keyLen && isSame(file, keyPos, pos, len); return isSame; } private static boolean isSame(MemorySegment file, long i1, long i2, int len) { int i = 0; var i1len = i1 + vecLen; var i2len = i2 + vecLen; if (len < vecLen && i1len <= file.byteSize() && i2len <= file.byteSize()) { var v1 = byteVec.fromMemorySegment(file, i1, ByteOrder.nativeOrder()); var v2 = byteVec.fromMemorySegment(file, i2, ByteOrder.nativeOrder()); var isTrue = v1.compare(VectorOperators.EQ, v2, allTrue.indexInRange(0, len)); return isTrue.trueCount() == len; } while (8 < (len - i)) { var v1 = file.get(JAVA_LONG_UNALIGNED, i1 + i); var v2 = file.get(JAVA_LONG_UNALIGNED, i2 + i); if (v1 != v2) { return false; } i += 8; } while (i < len) { var v1 = file.get(JAVA_BYTE, i1 + i); var v2 = file.get(JAVA_BYTE, i2 + i); if (v1 != v2) { return false; } i++; } return true; } public void fillMerge(MemorySegment file, TreeMap treeMap) { for (int i = 0; i < keys.length; i++) { var ji = i; long keyE = keys[ji]; if (keyE != 0) { long keyPos = (keyE & MASK_POS) >> SHIFT_POS; int keyLen = (int) (keyE & MASK_LEN); byte[] keyBytes = new byte[keyLen]; MemorySegment.copy(file, JAVA_BYTE, keyPos, keyBytes, 0, keyLen); var key = new String(keyBytes); var vE = values[ji]; var min = vE.min; var max = vE.max; var sum = vE.sum; var count = vE.count; treeMap.compute(key, (k, e) -> { if (e == null) { return new ResultRow(min, max, sum, count); } else { return new ResultRow(Math.min(e.min, min), Math.max(e.max, max), e.sum + sum, e.count + count); } }); } } } // public String debugPrint(MemorySegment file) { // StringBuilder b = new StringBuilder(); // for (int i = 0; i < keyValues.length / 5; i++) { // var ji = i * 5; // long keyE = keyValues[ji]; // if (keyE != 0) { // long keyPos = (keyE & MASK_POS) >> SHIFT_POS; // int keyLen = (int) (keyE & MASK_LEN); // byte[] keyBytes = new byte[keyLen]; // MemorySegment.copy(file, JAVA_BYTE, keyPos, keyBytes, 0, keyLen); // var key = new String(keyBytes); // var min = longBitsToDouble(keyValues[ji + 1]); // var max = longBitsToDouble(keyValues[ji + 2]); // var sum = longBitsToDouble(keyValues[ji + 3]); // var count = keyValues[ji + 4]; // b.append("{").append(key).append("@").append(ji) // .append(",").append(min) // .append(",").append(max) // .append(",").append(sum) // .append(",").append(count).append("},"); // } // } // return b.toString(); // } } record Section(long start, long end) { } private static record ResultRow(double min, double max, double sum, long count) { public String toString() { return round(min) + "/" + round(((Math.round(sum * 10.0) / 10.0) / count)) + "/" + round(max); } private double round(double value) { return Math.round(value * 10.0) / 10.0; } } ; }