diff options
| author | Marko Topolnik <marko.topolnik@gmail.com> | 2024-01-05 12:56:09 +0100 |
|---|---|---|
| committer | Gunnar Morling <gunnar.morling@googlemail.com> | 2024-01-06 10:35:44 +0100 |
| commit | 816e59b6785185e6df1fd8737c25c2cf3dc81e2f (patch) | |
| tree | 448560775793ee2cd2a0e52ab512e5ba04a9d52e | |
| parent | 0f1f204a0d483b79c81faa47a464c9e9bd11140f (diff) | |
Eliminate duplicate station names
| -rw-r--r-- | src/main/java/dev/morling/onebrc/CreateMeasurements3.java | 29 |
1 files changed, 21 insertions, 8 deletions
diff --git a/src/main/java/dev/morling/onebrc/CreateMeasurements3.java b/src/main/java/dev/morling/onebrc/CreateMeasurements3.java index da401ff..65589da 100644 --- a/src/main/java/dev/morling/onebrc/CreateMeasurements3.java +++ b/src/main/java/dev/morling/onebrc/CreateMeasurements3.java @@ -19,8 +19,10 @@ import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; +import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; +import java.util.HashSet; import java.util.concurrent.ThreadLocalRandom; public class CreateMeasurements3 { @@ -78,6 +80,7 @@ public class CreateMeasurements3 { } } final var weatherStations = new ArrayList<WeatherStation>(); + final var names = new HashSet<String>(); var minLen = Integer.MAX_VALUE; var maxLen = Integer.MIN_VALUE; try (var rows = new BufferedReader(new FileReader("data/weather_stations.csv"))) { @@ -104,18 +107,15 @@ public class CreateMeasurements3 { } var name = new String(buf, 0, nameLen).trim(); while (name.length() < nameLen) { - var n = nameSource.read(); - if (n == -1) { - throw new Exception("Name source exhausted"); - } - var ch = (char) n; - if (ch != ' ') { - name += ch; - } + name += readNonSpace(nameSource); + } + while (names.contains(name)) { + name = name.substring(1) + readNonSpace(nameSource); } if (name.indexOf(';') != -1) { throw new Exception("Station name contains a semicolon!"); } + names.add(name); var lat = Float.parseFloat(row.substring(row.indexOf(';') + 1)); // Guesstimate mean temperature using cosine of latitude var avgTemp = (float) (30 * Math.cos(Math.toRadians(lat))) - 10; @@ -125,4 +125,17 @@ public class CreateMeasurements3 { System.out.format("Generated %,d station names with length from %,d to %,d%n", KEYSET_SIZE, minLen, maxLen); return weatherStations; } + + private static char readNonSpace(StringReader nameSource) throws IOException { + while (true) { + var n = nameSource.read(); + if (n == -1) { + throw new IOException("Name source exhausted"); + } + var ch = (char) n; + if (ch != ' ') { + return ch; + } + } + } } |
