aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/dev/morling/onebrc
diff options
context:
space:
mode:
authorDavid Kopec <snpukwana@gmail.com>2024-01-05 10:35:05 -0500
committerGitHub <noreply@github.com>2024-01-05 16:35:05 +0100
commit8a282ab71b3f9296a84df398f7e2e978ad5647d9 (patch)
tree3714ce7f8951ad17d503f9ac9cb9c460d905a36d /src/main/java/dev/morling/onebrc
parent87036747653846cced97df0171b4a1fd072da800 (diff)
Add davecom Entry
Diffstat (limited to 'src/main/java/dev/morling/onebrc')
-rw-r--r--src/main/java/dev/morling/onebrc/CalculateAverage_davecom.java208
1 files changed, 208 insertions, 0 deletions
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_davecom.java b/src/main/java/dev/morling/onebrc/CalculateAverage_davecom.java
new file mode 100644
index 0000000..a4f7619
--- /dev/null
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_davecom.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2023 The original authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package dev.morling.onebrc;
+
+import java.io.IOException;
+import java.math.RoundingMode;
+import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.text.DecimalFormat;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.HashMap;
+import java.util.IntSummaryStatistics;
+import java.util.ArrayList;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+public class CalculateAverage_davecom {
+
+ /*
+ * Original Header Could Not Be Changed to Match Checks so...
+ * Copyright 2024 David Kopec
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * Created by David Kopec with some inspiration from seijikun's solution
+ * and assistance from GitHub Copilot.
+ */
+
+ private static final String FILE = "./measurements.txt";
+
+ private static final ConcurrentHashMap<String, Integer> mins = new ConcurrentHashMap<>();
+ private static final ConcurrentHashMap<String, Integer> maxs = new ConcurrentHashMap<>();
+ private static final ConcurrentHashMap<String, Integer> sums = new ConcurrentHashMap<>();
+ private static final ConcurrentHashMap<String, Integer> counts = new ConcurrentHashMap<>();
+
+ public static void processChunk(MappedByteBuffer chunk, long chunkSize) {
+ // setup
+ chunk.load();
+ HashMap<String, IntSummaryStatistics> values = new HashMap<>();
+
+ // do the actual processing
+ long end = chunk.position() + chunkSize;
+ byte[] name = new byte[128];
+ int value = 0;
+ byte b = 0;
+ boolean negate = false;
+ long nameStart = 0;
+ long nameEnd = 0;
+ int nameLength = 0;
+ while (chunk.position() < end) {
+ // read name up to semicolon
+ nameStart = chunk.position();
+ b = chunk.get();
+ while (b != ';') {
+ b = chunk.get();
+ }
+ nameEnd = chunk.position() - 1;
+ nameLength = (int) (nameEnd - nameStart);
+ // generate byte array for name
+ chunk.get(chunk.position() - nameLength - 1, name, 0, nameLength);
+ // convert name to string
+ String nameString = new String(name, 0, nameLength);
+ // read value
+ value = 0;
+ b = chunk.get();
+ negate = false;
+ while (b != '\n') {
+ if (b == '.') {
+ b = chunk.get();
+ continue;
+ }
+ else if (b == '-') {
+ negate = true;
+ b = chunk.get();
+ continue;
+ }
+ value = value * 10 + (b - '0');
+ b = chunk.get();
+ }
+ if (negate) {
+ value = -value;
+ }
+
+ if (values.containsKey(nameString)) {
+ values.get(nameString).accept(value);
+ }
+ else {
+ IntSummaryStatistics stats = new IntSummaryStatistics();
+ stats.accept(value);
+ values.put(nameString, stats);
+ }
+ }
+
+ for (String nameStr : values.keySet()) {
+ IntSummaryStatistics stats = values.get(nameStr);
+ mins.compute(nameStr, (k, v) -> v == null ? stats.getMin() : Math.min(v, stats.getMin()));
+ maxs.compute(nameStr, (k, v) -> v == null ? stats.getMax() : Math.max(v, stats.getMax()));
+ sums.compute(nameStr, (k, v) -> v == null ? (int) stats.getSum() : (v + (int) stats.getSum()));
+ counts.compute(nameStr, (k, v) -> v == null ? (int) stats.getCount() : (v + (int) stats.getCount()));
+ }
+ }
+
+ public static void outputResults() {
+ // output results sorted by name with format {name}={min}/{mean}/{max} in one giant string
+ // fast string concatenation starting with { and ending with } with a comma between each (no newlines)
+ StringBuilder sb = new StringBuilder();
+ sb.append('{');
+ var sortedNames = mins.keySet().stream().sorted().toArray(String[]::new);
+ DecimalFormat df = new DecimalFormat("0.0");
+ df.setRoundingMode(RoundingMode.HALF_UP);
+ for (String name : sortedNames) {
+ double min = ((double) mins.get(name)) / 10;
+ double max = ((double) maxs.get(name)) / 10;
+ double average = ((double) sums.get(name)) / ((double) counts.get(name)) / 10;
+ sb.append(name);
+ sb.append('=');
+ sb.append(df.format(min));
+ sb.append('/');
+ sb.append(df.format(average));
+ sb.append('/');
+ sb.append(df.format(max));
+ sb.append(',');
+ sb.append(' ');
+ }
+ if (sb.length() > 1) {
+ sb.deleteCharAt(sb.length() - 2);
+ sb.deleteCharAt(sb.length() - 1);
+ }
+ sb.append('}');
+ System.out.println(sb.toString());
+ }
+
+ public static void main(String[] args) throws IOException {
+ // create thread pool
+ ExecutorService es = Executors.newVirtualThreadPerTaskExecutor();
+
+ // load file
+ FileChannel fc = FileChannel.open(Path.of(FILE));
+
+ // configuration information
+ long fileSize = fc.size();
+ int numProcessors = Runtime.getRuntime().availableProcessors();
+ int numChunks = numProcessors * 2000;
+ // System.out.println("numProcessors: " + numProcessors);
+ // System.out.println("numChunks: " + numChunks);
+
+ // create check buffer
+ ByteBuffer bb = ByteBuffer.allocateDirect(128);
+
+ // find appropriate chunks
+ // System.out.println("fileSize: " + fileSize);
+ long chunkLimit = fileSize / numChunks;
+ long chunkStart = 0;
+ long chunkEnd = chunkLimit;
+ // int chunkNum = 0;
+ while (chunkEnd < fileSize) {
+ // System.out.println("initiated chunkNum: " + chunkNum);
+ // find the next newline
+ fc.position(chunkEnd);
+ bb.clear();
+ fc.read(bb);
+ bb.flip();
+ while (bb.get() != '\n' && bb.position() < bb.limit()) {
+ }
+ chunkEnd = chunkEnd + bb.position();
+ if (chunkEnd > fileSize) {
+ chunkEnd = fileSize - 1;
+ }
+ // process chunk
+ long chunkSize = chunkEnd - chunkStart;
+ if (chunkSize < 1) {
+ break;
+ }
+ // System.out.println("chunkStart: " + chunkStart);
+ // System.out.println("chunkEnd: " + chunkEnd);
+ // System.out.println("chunkSize: " + chunkSize);
+ MappedByteBuffer chunk = fc.map(FileChannel.MapMode.READ_ONLY, chunkStart, chunkSize);
+ // final int chunkNumFinal = chunkNum;
+ es.submit(() -> {
+ // System.out.println("started chunkNum: " + chunkNumFinal);
+ processChunk(chunk, chunkSize);
+ // System.out.println("finished chunkNum: " + chunkNumFinal);
+ });
+ chunkStart = chunkEnd;
+ chunkEnd = chunkEnd + chunkLimit;
+ if (chunkEnd > fileSize) {
+ chunkEnd = fileSize - 1;
+ }
+ // chunkNum++;
+ }
+ es.close();
+
+ outputResults();
+ }
+}