aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/dev
diff options
context:
space:
mode:
authorMarkus Ebner <seijikun@users.noreply.github.com>2024-01-03 20:06:14 +0100
committerGitHub <noreply@github.com>2024-01-03 20:06:14 +0100
commit580c4ac2142ce3dfbcf6448db73c14e3c2df933a (patch)
treee8ae72f0aaec1044274a130512f40fb41ef3a497 /src/main/java/dev
parent4a08c93469f749c2078b176a8778356b3f596799 (diff)
Adding seijikun's submission
Diffstat (limited to 'src/main/java/dev')
-rw-r--r--src/main/java/dev/morling/onebrc/CalculateAverage_seijikun.java221
1 files changed, 221 insertions, 0 deletions
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_seijikun.java b/src/main/java/dev/morling/onebrc/CalculateAverage_seijikun.java
new file mode 100644
index 0000000..8038c1a
--- /dev/null
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_seijikun.java
@@ -0,0 +1,221 @@
+/*
+ * Copyright 2023 The original authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package dev.morling.onebrc;
+
+import java.io.*;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.TreeMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+public class CalculateAverage_seijikun {
+
+ private static final String FILE = "./measurements.txt";
+
+
+ private static class MeasurementAggregator {
+ private double min = Double.POSITIVE_INFINITY;
+ private double max = Double.NEGATIVE_INFINITY;
+ private double sum;
+ private long count;
+
+ public void printInto(PrintStream out) {
+ out.printf("%.1f/%.1f/%.1f", min, (sum / (double) count), max);
+ }
+ }
+
+ public static class StationIdent implements Comparable<StationIdent> {
+ private final int nameLength;
+ private final String name;
+ private final int nameHash;
+
+ public StationIdent(byte[] name, int nameHash) {
+ this.nameLength = name.length;
+ this.name = new String(name);
+ this.nameHash = nameHash;
+ }
+
+ @Override
+ public int hashCode() {
+ return nameHash;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ var other = (StationIdent) obj;
+ if (other.nameLength != nameLength) {
+ return false;
+ }
+ return name.equals(other.name);
+ }
+
+ @Override
+ public int compareTo(StationIdent o) {
+ return name.compareTo(o.name);
+ }
+ }
+
+ public static class ChunkReader implements Runnable {
+ RandomAccessFile file;
+
+ // Start offset of this chunk
+ private final long startOffset;
+ // end offset of this chunk
+ private final long endOffset;
+
+ // state
+ private MappedByteBuffer buffer = null;
+ private int ptr = 0;
+ private TreeMap<StationIdent, MeasurementAggregator> workSet;
+
+ public ChunkReader(RandomAccessFile file, long startOffset, long endOffset) {
+ this.file = file;
+ this.startOffset = startOffset;
+ this.endOffset = endOffset;
+ }
+
+ private StationIdent readStationName() {
+ int startPtr = ptr;
+ int hashCode = 0;
+ int hashBytePtr = 0;
+ byte c;
+ while ((c = buffer.get(ptr++)) != ';') {
+ hashCode ^= ((int) c) << (hashBytePtr * 8);
+ hashBytePtr = (hashBytePtr + 1) % 4;
+ }
+ byte[] stationNameBfr = new byte[ptr - startPtr - 1];
+ buffer.get(startPtr, stationNameBfr);
+ return new StationIdent(stationNameBfr, hashCode);
+ }
+
+ private double readTemperature() {
+ double ret = 0, div = 1;
+ byte c = buffer.get(ptr++);
+ boolean neg = (c == '-');
+ if (neg)
+ c = buffer.get(ptr++);
+
+ do {
+ ret = ret * 10 + c - '0';
+ } while ((c = buffer.get(ptr++)) >= '0' && c <= '9');
+
+ if (c == '.') {
+ while ((c = buffer.get(ptr++)) != '\n') {
+ ret += (c - '0') / (div *= 10);
+ }
+ }
+
+ if (neg)
+ return -ret;
+ return ret;
+ }
+
+ @Override
+ public void run() {
+ workSet = new TreeMap<>();
+ int chunkSize = (int) (endOffset - startOffset);
+ try {
+ buffer = file.getChannel().map(FileChannel.MapMode.READ_ONLY, startOffset, chunkSize);
+
+ while (ptr < chunkSize) {
+ var station = readStationName();
+ var temp = readTemperature();
+ var stationWorkSet = workSet.get(station);
+ if (stationWorkSet == null) {
+ stationWorkSet = new MeasurementAggregator();
+ workSet.put(station, stationWorkSet);
+ }
+ stationWorkSet.min = Math.min(temp, stationWorkSet.min);
+ stationWorkSet.max = Math.max(temp, stationWorkSet.max);
+ stationWorkSet.sum += temp;
+ stationWorkSet.count += 1;
+ }
+ }
+ catch (IOException e) {
+ e.printStackTrace();
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ public static void main(String[] args) throws IOException, InterruptedException {
+ RandomAccessFile file = new RandomAccessFile(FILE, "r");
+
+ int jobCnt = Runtime.getRuntime().availableProcessors();
+
+ var chunks = new ChunkReader[jobCnt];
+ long chunkSize = file.length() / jobCnt;
+ long chunkStartPtr = 0;
+ byte[] tmpBuffer = new byte[128];
+ for (int i = 0; i < jobCnt; ++i) {
+ long chunkEndPtr = chunkStartPtr + chunkSize;
+ if (i != (jobCnt - 1)) { // align chunks to newlines
+ file.seek(chunkEndPtr - 1);
+ file.read(tmpBuffer);
+ int offset = 0;
+ while (tmpBuffer[offset] != '\n') {
+ offset += 1;
+ }
+ chunkEndPtr += offset;
+ }
+ else { // last chunk ends at file end
+ chunkEndPtr = file.length();
+ }
+ chunks[i] = new ChunkReader(file, chunkStartPtr, chunkEndPtr);
+ chunkStartPtr = chunkEndPtr;
+ }
+
+ try(var executor = Executors.newFixedThreadPool(jobCnt)) {
+ for (int i = 0; i < jobCnt; ++i) {
+ executor.submit(chunks[i]);
+ }
+ executor.shutdown();
+ var ignored = executor.awaitTermination(1, TimeUnit.DAYS);
+ }
+
+ // merge chunks
+ var result = chunks[0].workSet;
+ for (int i = 1; i < jobCnt; ++i) {
+ chunks[i].workSet.forEach((ident, otherStationWorkSet) -> {
+ var stationWorkSet = result.get(ident);
+ if (stationWorkSet == null) {
+ result.put(ident, otherStationWorkSet);
+ }
+ else {
+ stationWorkSet.min = Math.min(stationWorkSet.min, otherStationWorkSet.min);
+ stationWorkSet.max = Math.max(stationWorkSet.max, otherStationWorkSet.max);
+ stationWorkSet.sum += otherStationWorkSet.sum;
+ stationWorkSet.count += otherStationWorkSet.count;
+ }
+ });
+ }
+
+ // print in required format
+ System.out.write('{');
+ var iterator = result.entrySet().iterator();
+ while (iterator.hasNext()) {
+ var entry = iterator.next();
+ System.out.print(entry.getKey().name);
+ System.out.write('=');
+ entry.getValue().printInto(System.out);
+ if (iterator.hasNext()) {
+ System.out.print(", ");
+ }
+ }
+ System.out.println('}');
+ }
+}