aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Topolnik <marko.topolnik@gmail.com>2024-01-05 12:56:09 +0100
committerGunnar Morling <gunnar.morling@googlemail.com>2024-01-06 10:35:44 +0100
commit816e59b6785185e6df1fd8737c25c2cf3dc81e2f (patch)
tree448560775793ee2cd2a0e52ab512e5ba04a9d52e
parent0f1f204a0d483b79c81faa47a464c9e9bd11140f (diff)
Eliminate duplicate station names
-rw-r--r--src/main/java/dev/morling/onebrc/CreateMeasurements3.java29
1 files changed, 21 insertions, 8 deletions
diff --git a/src/main/java/dev/morling/onebrc/CreateMeasurements3.java b/src/main/java/dev/morling/onebrc/CreateMeasurements3.java
index da401ff..65589da 100644
--- a/src/main/java/dev/morling/onebrc/CreateMeasurements3.java
+++ b/src/main/java/dev/morling/onebrc/CreateMeasurements3.java
@@ -19,8 +19,10 @@ import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
+import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.concurrent.ThreadLocalRandom;
public class CreateMeasurements3 {
@@ -78,6 +80,7 @@ public class CreateMeasurements3 {
}
}
final var weatherStations = new ArrayList<WeatherStation>();
+ final var names = new HashSet<String>();
var minLen = Integer.MAX_VALUE;
var maxLen = Integer.MIN_VALUE;
try (var rows = new BufferedReader(new FileReader("data/weather_stations.csv"))) {
@@ -104,18 +107,15 @@ public class CreateMeasurements3 {
}
var name = new String(buf, 0, nameLen).trim();
while (name.length() < nameLen) {
- var n = nameSource.read();
- if (n == -1) {
- throw new Exception("Name source exhausted");
- }
- var ch = (char) n;
- if (ch != ' ') {
- name += ch;
- }
+ name += readNonSpace(nameSource);
+ }
+ while (names.contains(name)) {
+ name = name.substring(1) + readNonSpace(nameSource);
}
if (name.indexOf(';') != -1) {
throw new Exception("Station name contains a semicolon!");
}
+ names.add(name);
var lat = Float.parseFloat(row.substring(row.indexOf(';') + 1));
// Guesstimate mean temperature using cosine of latitude
var avgTemp = (float) (30 * Math.cos(Math.toRadians(lat))) - 10;
@@ -125,4 +125,17 @@ public class CreateMeasurements3 {
System.out.format("Generated %,d station names with length from %,d to %,d%n", KEYSET_SIZE, minLen, maxLen);
return weatherStations;
}
+
+ private static char readNonSpace(StringReader nameSource) throws IOException {
+ while (true) {
+ var n = nameSource.read();
+ if (n == -1) {
+ throw new IOException("Name source exhausted");
+ }
+ var ch = (char) n;
+ if (ch != ' ') {
+ return ch;
+ }
+ }
+ }
}