aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/dev/morling/onebrc
diff options
context:
space:
mode:
authorkumarsaurav123 <kumar.saurav@eko.co.in>2024-01-11 14:25:24 +0530
committerGitHub <noreply@github.com>2024-01-11 09:55:24 +0100
commit99dd18b672304e6bfb78396c9456cdf73b3bb658 (patch)
treedb5d4e161de6e7adcb4a6a085645bd6fda4de694 /src/main/java/dev/morling/onebrc
parent965e852ba7e9c153d6690d5aa195b3f673d79966 (diff)
CalculateAverage_kumarsaurav123.java-> RAM 16 GB CPU 32 Core (#247)
Diffstat (limited to 'src/main/java/dev/morling/onebrc')
-rw-r--r--src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java256
1 files changed, 256 insertions, 0 deletions
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java b/src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java
new file mode 100644
index 0000000..bac5588
--- /dev/null
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_kumarsaurav123.java
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2023 The original authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package dev.morling.onebrc;
+
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Paths;
+import java.util.*;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collector;
+import java.util.stream.IntStream;
+
+import static java.util.stream.Collectors.groupingBy;
+
+public class CalculateAverage_kumarsaurav123 {
+
+ private static final String FILE = "./measurements.txt";
+
+ private static record Measurement(String station, double value) {
+ private Measurement(String[] parts) {
+ this(parts[0], Double.parseDouble(parts[1]));
+ }
+ }
+
+ private static record ResultRow(String station,double min, double mean, double max,double sum,double count) {
+ public String toString() {
+ return round(min) + "/" + round(mean) + "/" + round(max);
+ }
+
+ private double round(double value) {
+ return Math.round(value * 10.0) / 10.0;
+ }
+ }
+
+ ;
+
+ private static class MeasurementAggregator {
+ private double min = Double.POSITIVE_INFINITY;
+ private double max = Double.NEGATIVE_INFINITY;
+ private double sum;
+ private long count;
+
+ private String station;
+ }
+
+ public static void main(String[] args) {
+ HashMap<Byte,Integer> map=new HashMap<>();
+ map.put((byte) 48,0);
+ map.put((byte) 49,1);
+ map.put((byte) 50,2);
+ map.put((byte) 51,3);
+ map.put((byte) 52,4);
+ map.put((byte) 53,5);
+ map.put((byte) 54,6);
+ map.put((byte) 55,7);
+ map.put((byte) 56,8);
+ map.put((byte) 57,9);
+ Collector<ResultRow, MeasurementAggregator, ResultRow> collector2 = Collector.of(
+ MeasurementAggregator::new,
+ (a, m) -> {
+ a.min = Math.min(a.min, m.min);
+ a.max = Math.max(a.max, m.max);
+ a.sum += m.sum;
+ a.count += m.count;
+ },
+ (agg1, agg2) -> {
+ var res = new MeasurementAggregator();
+ res.min = Math.min(agg1.min, agg2.min);
+ res.max = Math.max(agg1.max, agg2.max);
+ res.sum = agg1.sum + agg2.sum;
+ res.count = agg1.count + agg2.count;
+
+ return res;
+ },
+ agg -> {
+ return new ResultRow(agg.station, agg.min, agg.sum / agg.count, agg.max, agg.sum, agg.count);
+ });
+ Collector<Measurement, MeasurementAggregator, ResultRow> collector = Collector.of(
+ MeasurementAggregator::new,
+ (a, m) -> {
+ a.min = Math.min(a.min, m.value);
+ a.max = Math.max(a.max, m.value);
+ a.sum += m.value;
+ a.station = m.station;
+ a.count++;
+ },
+ (agg1, agg2) -> {
+ var res = new MeasurementAggregator();
+ res.min = Math.min(agg1.min, agg2.min);
+ res.max = Math.max(agg1.max, agg2.max);
+ res.sum = agg1.sum + agg2.sum;
+ res.count = agg1.count + agg2.count;
+
+ return res;
+ },
+ agg -> {
+ return new ResultRow(agg.station, agg.min, agg.sum / agg.count, agg.max, agg.sum, agg.count);
+ });
+
+ long start = System.currentTimeMillis();
+ long len = Paths.get(FILE).toFile().length();
+ Map<Integer, List<byte[]>> leftOutsMap = new ConcurrentSkipListMap<>();
+ int chunkSize = 1_0000_00;
+ long proc = Math.max(1, (len / chunkSize));
+ ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2 * 2 * 2);
+ List<ResultRow> measurements = Collections.synchronizedList(new ArrayList<ResultRow>());
+ IntStream.range(0, (int) proc)
+ .mapToObj(i -> {
+ return new Runnable() {
+ @Override
+ public void run() {
+ try {
+ RandomAccessFile file = new RandomAccessFile(FILE, "r");
+ byte[] allBytes2 = new byte[chunkSize];
+ file.seek((long) i * (long) chunkSize);
+ int l = file.read(allBytes2);
+ byte[] eol = "\n".getBytes(StandardCharsets.UTF_8);
+ byte[] sep = ";".getBytes(StandardCharsets.UTF_8);
+
+ List<Measurement> mst = new ArrayList<>();
+ int st = 0;
+ int cnt = 0;
+ ArrayList<byte[]> local = new ArrayList<>();
+
+ for (int i = 0; i < l; i++) {
+ if (allBytes2[i] == eol[0]) {
+ if (i != 0) {
+ byte[] s2 = new byte[i - st];
+ System.arraycopy(allBytes2, st, s2, 0, s2.length);
+ if (cnt != 0) {
+ for(int j=0;j<s2.length;j++)
+ {
+ if (s2[j] == sep[0]) {
+ byte[] city = new byte[j];
+ byte[] value = new byte[s2.length-j-1];
+ System.arraycopy(s2, 0, city, 0, city.length);
+ System.arraycopy(s2, city.length+1, value,0, value.length);
+ double d=0.0;
+ int s=-1;
+ for(int k=value.length-1;k>=0;k--) {
+ if (value[k] == 45) {
+ d=d*-1;
+ }
+ else if (value[k] == 46) {
+ } else {
+ d = d + map.get(value[k]).intValue() * Math.pow(10, s);
+ s++;
+ }
+ }
+ mst.add(new Measurement(new String(city),d));
+
+ }
+ }
+
+ }
+ else {
+ local.add(s2);
+ }
+
+ }
+ cnt++;
+ st = i + 1;
+ }
+ }
+ if (st < l) {
+ byte[] s2 = new byte[allBytes2.length - st];
+ System.arraycopy(allBytes2, st, s2, 0, s2.length);
+ local.add(s2);
+ }
+ leftOutsMap.put(i, local);
+ allBytes2 = null;
+ measurements.addAll(mst.stream()
+ .collect(groupingBy(Measurement::station, collector))
+ .values());
+// System.out.println(measurements.size());
+ }
+ catch (Exception e) {
+ // throw new RuntimeException(e);
+ System.out.println("");
+ }
+ }
+ };
+ })
+ .forEach(executor::submit);
+ executor.shutdown();
+
+ try {
+ executor.awaitTermination(10, TimeUnit.MINUTES);
+ }
+ catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ Collection<Measurement> lMeasure = new ArrayList<>();
+ List<byte[]> leftOuts = leftOutsMap.values()
+ .stream()
+ .flatMap(List::stream)
+ .toList();
+ int size = 0;
+ for (int i = 0; i < leftOuts.size(); i++) {
+ size = size + leftOuts.get(i).length;
+ }
+ byte[] allBytes = new byte[size];
+ int pos = 0;
+ for (int i = 0; i < leftOuts.size(); i++) {
+ System.arraycopy(leftOuts.get(i), 0, allBytes, pos, leftOuts.get(i).length);
+ pos = pos + leftOuts.get(i).length;
+ }
+ List<String> l = Arrays.asList(new String(allBytes).split(";"));
+ List<Measurement> measurements1 = new ArrayList<>();
+ String city = l.get(0);
+ for (int i = 0; i < l.size() - 1; i++) {
+ int sIndex = l.get(i + 1).indexOf('.') + 2;
+
+ String tempp = l.get(i + 1).substring(0, sIndex);
+
+ measurements1.add(new Measurement(city, Double.parseDouble(tempp)));
+ city = l.get(i + 1).substring(sIndex);
+ }
+ measurements.addAll(measurements1.stream()
+ .collect(groupingBy(Measurement::station, collector))
+ .values());
+ Map<String, ResultRow> measurements2 = new TreeMap<>(measurements
+ .stream()
+ .parallel()
+ .collect(groupingBy(ResultRow::station, collector2)));
+
+ // Read from bytes 1000 to 2000
+ // Something like this
+
+ //
+ // Map<String, ResultRow> measurements = new TreeMap<>(Files.lines(Paths.get(FILE))
+ // .map(l -> new Measurement(l.split(";")))
+ // .collect(groupingBy(m -> m.station(), collector)));
+
+ System.out.println(measurements2);
+// System.out.println(System.currentTimeMillis() - start);
+ }
+}