aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/org/rschwietzke/CheaperCharBuffer.java
blob: d2e3d82d6f16c177d7208b0fb1ccc6c3babab7ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
/*
 *  Copyright 2023 The original authors
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.rschwietzke;

import java.util.Arrays;

/**
 * <p>This class is meant to replaces the old {@link CheaperCharBuffer} in all areas
 * where performance and memory-efficency is key. XMLString compatibility
 * remains in place in case one has used that in their own code.
 *
 * <p>This buffer is mutable and when you use it, make sure you work with
 * it responsibly. In many cases, we will reuse the buffer to avoid fresh
 * memory allocations, hence you have to pay attention to its usage pattern.
 * It is not meant to be a general String replacement.
 *
 * <p>This class avoids many of the standard runtime checks that will result
 * in a runtime or array exception anyway. Why check twice and raise the
 * same exception?
 *
 * @author René Schwietzke
 * @since 3.10.0
 */
public class CheaperCharBuffer implements CharSequence {
    // our data, can grow - that is not safe and has be altered from the original code
    // to allow speed
    public char[] data_;

    // the current size of the string data
    public int length_;

    // the current size of the string data
    private final int growBy_;

    // how much do we grow if needed, half a cache line
    public static final int CAPACITY_GROWTH = 64 / 2;

    // what is our start size?
    // a cache line is 64 byte mostly, the overhead is mostly 24 bytes
    // a char is two bytes, let's use one cache lines
    public static final int INITIAL_CAPACITY = (64 - 24) / 2;

    // static empty version; DON'T MODIFY IT
    public static final CheaperCharBuffer EMPTY = new CheaperCharBuffer(0);

    // the � character
    private static final char REPLACEMENT_CHARACTER = '\uFFFD';

    /**
     * Constructs an XMLCharBuffer with a default size.
     */
    public CheaperCharBuffer() {
        this.data_ = new char[INITIAL_CAPACITY];
        this.length_ = 0;
        this.growBy_ = CAPACITY_GROWTH;
    }

    /**
     * Constructs an XMLCharBuffer with a desired size.
     *
     * @param startSize the size of the buffer to start with
     */
    public CheaperCharBuffer(final int startSize) {
        this(startSize, CAPACITY_GROWTH);
    }

    /**
     * Constructs an XMLCharBuffer with a desired size.
     *
     * @param startSize the size of the buffer to start with
     * @param growBy by how much do we want to grow when needed
     */
    public CheaperCharBuffer(final int startSize, final int growBy) {
        this.data_ = new char[startSize];
        this.length_ = 0;
        this.growBy_ = Math.max(1, growBy);
    }

    /**
     * Constructs an XMLCharBuffer from another buffer. Copies the data
     * over. The new buffer capacity matches the length of the source.
     *
     * @param src the source buffer to copy from
     */
    public CheaperCharBuffer(final CheaperCharBuffer src) {
        this(src, 0);
    }

    /**
     * Constructs an XMLCharBuffer from another buffer. Copies the data
     * over. You can add more capacity on top of the source length. If
     * you specify 0, the capacity will match the src length.
     *
     * @param src the source buffer to copy from
     * @param addCapacity how much capacity to add to origin length
     */
    public CheaperCharBuffer(final CheaperCharBuffer src, final int addCapacity) {
        this.data_ = Arrays.copyOf(src.data_, src.length_ + Math.max(0, addCapacity));
        this.length_ = src.length();
        this.growBy_ = Math.max(1, CAPACITY_GROWTH);
    }

    /**
     * Constructs an XMLCharBuffer from a string. To avoid
     * too much allocation, we just take the string array as is and
     * don't allocate extra space in the first place.
     *
     * @param src the string to copy from
     */
    public CheaperCharBuffer(final String src) {
        this.data_ = src.toCharArray();
        this.length_ = src.length();
        this.growBy_ = CAPACITY_GROWTH;
    }

    /**
     * Constructs an XMLString structure preset with the specified values.
     * There will not be any room to grow, if you need that, construct an
     * empty one and append.
     *
     * <p>There are not range checks performed. Make sure your data is correct.
     *
     * @param ch     The character array, must not be null
     * @param offset The offset into the character array.
     * @param length The length of characters from the offset.
     */
    public CheaperCharBuffer(final char[] ch, final int offset, final int length) {
        // just as big as we need it
        this(length);
        append(ch, offset, length);
    }

    /**
     * Check capacity and grow if needed automatically
     *
     * @param minimumCapacity how much space do we need at least
     */
    private void ensureCapacity(final int minimumCapacity) {
        if (minimumCapacity > this.data_.length) {
            final int newSize = Math.max(minimumCapacity + this.growBy_, (this.data_.length << 1) + 2);
            this.data_ = Arrays.copyOf(this.data_, newSize);
        }
    }

    /**
     * Returns the current max capacity without growth. Does not
     * indicate how much capacity is already in use. Use {@link #length()}
     * for that.
     *
     * @return the current capacity, not taken any usage into account
     */
    public int capacity() {
        return this.data_.length;
    }

    /**
     * Appends a single character to the buffer.
     *
     * @param c the character to append
     * @return this instance
     */
    public CheaperCharBuffer append(final char c) {
        final int oldLength = this.length_++;

        // ensureCapacity is not inlined by the compiler, so put that here for the most
        // called method of all appends. Duplicate code, but for a reason.
        if (oldLength == this.data_.length) {
            final int newSize = Math.max(oldLength + this.growBy_, (this.data_.length << 1) + 2);
            this.data_ = Arrays.copyOf(this.data_, newSize);
        }

        this.data_[oldLength] = c;

        return this;
    }

    /**
     * Append a string to this buffer without copying the string first.
     *
     * @param src the string to append
     * @return this instance
     */
    public CheaperCharBuffer append(final String src) {
        final int start = this.length_;
        this.length_ = this.length_ + src.length();
        ensureCapacity(this.length_);

        // copy char by char because we don't get a copy for free
        // from a string yet, this might change when immutable arrays
        // make it into Java, but that will not be very soon
        for (int i = 0; i < src.length(); i++) {
            this.data_[start + i] = src.charAt(i);
        }

        return this;
    }

    /**
     * Add another buffer to this one.
     *
     * @param src the buffer to append
     * @return this instance
     */
    public CheaperCharBuffer append(final CheaperCharBuffer src) {
        final int start = this.length_;
        this.length_ = this.length_ + src.length();
        ensureCapacity(this.length_);

        System.arraycopy(src.data_, 0, this.data_, start, src.length_);

        return this;
    }

    /**
     * Add data from a char array to this buffer with the ability to specify
     * a range to copy from
     *
     * @param src the source char array
     * @param offset the pos to start to copy from
     * @param length the length of the data to copy
     *
     * @return this instance
     */
    public CheaperCharBuffer append(final char[] src, final int offset, final int length) {
        final int start = this.length_;
        this.length_ = start + length;

        ensureCapacity(this.length_);

        System.arraycopy(src, offset, this.data_, start, length);

        return this;
    }

    /**
     * Returns the current length
     *
     * @return the length of the charbuffer data
     */
    public int length() {
        return length_;
    }

    /**
     * Tell us how much the capacity grows if needed
     *
     * @return the value that determines how much we grow the backing
     *      array in case we have to
     */
    public int getGrowBy() {
        return this.growBy_;
    }

    /**
     * Resets the buffer to 0 length. It won't resize it to avoid memory
     * churn.
     *
     * @return this instance for fluid programming
     */
    public CheaperCharBuffer clear() {
        this.length_ = 0;

        return this;
    }

    /**
     * Resets the buffer to 0 length and sets the new data. This
     * is a little cheaper than clear().append(c) depending on
     * the where  and the inlining decisions.
     *
     * @param c the char to set
     * @return this instance for fluid programming
     */
    public CheaperCharBuffer clearAndAppend(final char c) {
        this.length_ = 0;

        if (this.data_.length > 0) {
            this.data_[this.length_] = c;
            this.length_++;
        }
        else {
            // the rare case when we don't have any buffer at hand
            append(c);
        }

        return this;
    }

    /**
     * Does this buffer end with this string? If we check for
     * the empty string, we get true. If we would support JDK 11, we could
     * use Arrays.mismatch and be way faster.
     *
     * @param s the string to check the end against
     * @return true of the end matches the buffer, false otherwise
     */
    public boolean endsWith(final String s) {
        // length does not match, cannot be the end
        if (this.length_ < s.length()) {
            return false;
        }

        // check the string by each char, avoids a copy of the string
        final int start = this.length_ - s.length();

        // change this to Arrays.mismatch when going JDK 11 or higher
        for (int i = 0; i < s.length(); i++) {
            if (this.data_[i + start] != s.charAt(i)) {
                return false;
            }
        }

        return true;
    }

    /**
     * Reduces the buffer to the content between start and end marker when
     * only whitespaces are found before the startMarker as well as after the end marker.
     * If both strings overlap due to identical characters such as "foo" and "oof"
     * and the buffer is " foof ", we don't do anything.
     *
     * <p>If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side.
     *
     * @param startMarker the start string to find, must not be null
     * @param endMarker the end string to find, must not be null
     * @return this instance
     *
     * @deprecated Use the new method {@link #trimToContent(String, String)} instead.
     */
    public CheaperCharBuffer reduceToContent(final String startMarker, final String endMarker) {
        return trimToContent(startMarker, endMarker);
    }

    /**
     * Reduces the buffer to the content between start and end marker when
     * only whitespaces are found before the startMarker as well as after the end marker.
     * If both strings overlap due to identical characters such as "foo" and "oof"
     * and the buffer is " foof ", we don't do anything.
     *
     * <p>If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side.
     *
     * @param startMarker the start string to find, must not be null
     * @param endMarker the end string to find, must not be null
     * @return this instance
     */
    public CheaperCharBuffer trimToContent(final String startMarker, final String endMarker) {
        // if both are longer or same length than content, don't do anything
        final int markerLength = startMarker.length() + endMarker.length();
        if (markerLength >= this.length_) {
            return this;
        }

        // run over starting whitespaces
        int sPos = 0;
        for (; sPos < this.length_ - markerLength; sPos++) {
            if (!Character.isWhitespace(this.data_[sPos])) {
                break;
            }
        }

        // run over ending whitespaces
        int ePos = this.length_ - 1;
        for (; ePos > sPos - markerLength; ePos--) {
            if (!Character.isWhitespace(this.data_[ePos])) {
                break;
            }
        }

        // if we have less content than marker length, give up
        // this also helps when markers overlap such as
        // <!-- and --> and the string is " <!---> "
        if (ePos - sPos + 1 < markerLength) {
            return this;
        }

        // check the start
        for (int i = 0; i < startMarker.length(); i++) {
            if (startMarker.charAt(i) != this.data_[i + sPos]) {
                // no start match, stop and don't do anything
                return this;
            }
        }

        // check the end, ePos is when the first good char
        // occurred
        final int endStartCheckPos = ePos - endMarker.length() + 1;
        for (int i = 0; i < endMarker.length(); i++) {
            if (endMarker.charAt(i) != this.data_[endStartCheckPos + i]) {
                // no start match, stop and don't do anything
                return this;
            }
        }

        // shift left and cut length
        final int newLength = ePos - sPos + 1 - markerLength;
        System.arraycopy(this.data_,
                sPos + startMarker.length(),
                this.data_,
                0, newLength);
        this.length_ = newLength;

        return this;
    }

    /**
     * Check if we have only whitespaces
     *
     * @return true if we have only whitespace, false otherwise
     */
    public boolean isWhitespace() {
        for (int i = 0; i < this.length_; i++) {
            if (!Character.isWhitespace(this.data_[i])) {
                return false;
            }
        }
        return true;
    }

    /**
     * Trims the string similar to {@link java.lang.String#trim()}
     *
     * @return a string with removed whitespace at the beginning and the end
     */
    public CheaperCharBuffer trim() {
        // clean the end first, because it is cheap
        return trimTrailing().trimLeading();
    }

    /**
     * Removes all whitespace before the first non-whitespace char.
     * If all are whitespaces, we get an empty buffer
     *
     * @return this instance
     */
    public CheaperCharBuffer trimLeading() {
        // run over starting whitespace
        int sPos = 0;
        for (; sPos < this.length_; sPos++) {
            if (!Character.isWhitespace(this.data_[sPos])) {
                break;
            }
        }

        if (sPos == 0) {
            // nothing to do
            return this;
        }
        else if (sPos == this.length_) {
            // only whitespace
            this.length_ = 0;
            return this;
        }

        // shift left
        final int newLength = this.length_ - sPos;
        System.arraycopy(this.data_,
                sPos,
                this.data_,
                0, newLength);
        this.length_ = newLength;

        return this;
    }

    /**
     * Removes all whitespace at the end.
     * If all are whitespace, we get an empty buffer
     *
     * @return this instance
     *
     * @deprecated Use {@link #trimTrailing()} instead.
     */
    public CheaperCharBuffer trimWhitespaceAtEnd() {
        return trimTrailing();
    }

    /**
     * Removes all whitespace at the end.
     * If all are whitespace, we get an empty buffer
     *
     * @return this instance
     */
    public CheaperCharBuffer trimTrailing() {
        // run over ending whitespaces
        int ePos = this.length_ - 1;
        for (; ePos >= 0; ePos--) {
            if (!Character.isWhitespace(this.data_[ePos])) {
                break;
            }
        }

        this.length_ = ePos + 1;

        return this;
    }

    /**
     * Shortens the buffer by that many positions. If the count is
     * larger than the length, we get just an empty buffer. If you pass in negative
     * values, we are failing, likely often silently. It is all about performance and
     * not a general all-purpose API.
     *
     * @param count a positive number, no runtime checks, if count is larger than
     *      length, we get length = 0
     * @return this instance
     */
    public CheaperCharBuffer shortenBy(final int count) {
        final int newLength = this.length_ - count;
        this.length_ = newLength < 0 ? 0 : newLength;

        return this;
    }

    /**
     * Get the characters as char array, this will be a copy!
     *
     * @return a copy of the underlying char darta
     */
    public char[] getChars() {
        return Arrays.copyOf(this.data_, this.length_);
    }

    /**
     * Returns a string representation of this buffer. This will be a copy
     * operation. If the buffer is emoty, we get a constant empty String back
     * to avoid any overhead.
     *
     * @return a string of the content of this buffer
     */
    @Override
    public String toString() {
        if (this.length_ > 0) {
            return new String(this.data_, 0, this.length_);
        }
        else {
            return "";
        }
    }

    /**
     * Returns the char a the given position. Will complain if
     * we try to read outside the range. We do a range check here
     * because we might not notice when we are within the buffer
     * but outside the current length.
     *
     * @param index the position to read from
     * @return the char at the position
     * @throws IndexOutOfBoundsException
     *      in case one tries to read outside of valid buffer range
     */
    @Override
    public char charAt(final int index) {
        if (index > this.length_ - 1 || index < 0) {
            throw new IndexOutOfBoundsException(
                    "Tried to read outside of the valid buffer data");
        }

        return this.data_[index];
    }

    /**
     * Returns the char at the given position. No checks are
     * performed. It is up to the caller to make sure we
     * read correctly. Reading outside of the array will
     * cause an {@link IndexOutOfBoundsException} but using an
     * incorrect position in the array (such as beyond length)
     * might stay unnoticed! This is a performance method,
     * use at your own risk.
     *
     * @param index the position to read from
     * @return the char at the position
     */
    public char unsafeCharAt(final int index) {
        return this.data_[index];
    }

    /**
     * Returns a content copy of this buffer
     *
     * @return a copy of this buffer, the capacity might differ
     */
    @Override
    public CheaperCharBuffer clone() {
        return new CheaperCharBuffer(this);
    }

    /**
     * Returns a <code>CharSequence</code> that is a subsequence of this sequence.
     * The subsequence starts with the <code>char</code> value at the specified index and
     * ends with the <code>char</code> value at index <tt>end - 1</tt>.  The length
     * (in <code>char</code>s) of the
     * returned sequence is <tt>end - start</tt>, so if <tt>start == end</tt>
     * then an empty sequence is returned.
     *
     * @param   start   the start index, inclusive
     * @param   end     the end index, exclusive
     *
     * @return  the specified subsequence
     *
     * @throws  IndexOutOfBoundsException
     *          if <tt>start</tt> or <tt>end</tt> are negative,
     *          if <tt>end</tt> is greater than <tt>length()</tt>,
     *          or if <tt>start</tt> is greater than <tt>end</tt>
     *
     * @return a charsequence of this buffer
     */
    @Override
    public CharSequence subSequence(final int start, final int end) {
        if (start < 0) {
            throw new StringIndexOutOfBoundsException(start);
        }
        if (end > this.length_) {
            throw new StringIndexOutOfBoundsException(end);
        }

        final int l = end - start;
        if (l < 0) {
            throw new StringIndexOutOfBoundsException(l);
        }

        return new String(this.data_, start, l);
    }

    /**
     * Two buffers are identical when the length and
     * the content of the backing array (only for the
     * data in view) are identical.
     *
     * @param o the object to compare with
     * @return true if length and array content match, false otherwise
     */
    @Override
    public boolean equals(final Object o) {
        if (o instanceof CharSequence) {
            final CharSequence ob = (CharSequence) o;

            if (ob.length() != this.length_) {
                return false;
            }

            // ok, in JDK 11 or up, we could use an
            // Arrays.mismatch, but we cannot do that
            // due to JDK 8 compatibility
            for (int i = 0; i < this.length_; i++) {
                if (ob.charAt(i) != this.data_[i]) {
                    return false;
                }
            }

            // length and content match, be happy
            return true;
        }

        return false;
    }

    /**
     * We don't cache the hashcode because we mutate often. Don't use this in
     * hashmaps as key. But you can use that to look up in a hashmap against
     * a string using the CharSequence interface.
     *
     * @return the hashcode, similar to what a normal string would deliver
     */
    @Override
    public int hashCode() {
        int h = 0;

        for (int i = 0; i < this.length_; i++) {
            h = ((h << 5) - h) + this.data_[i];
        }

        return h;
    }

    /**
     * Append a character to an XMLCharBuffer. The character is an int value, and
     * can either be a single UTF-16 character or a supplementary character
     * represented by two UTF-16 code points.
     *
     * @param value The character value.
     * @return this instance for fluid programming
     *
     * @throws IllegalArgumentException if the specified
     *          {@code codePoint} is not a valid Unicode code point.
     */
    public CheaperCharBuffer appendCodePoint(final int value) {
        if (value <= Character.MAX_VALUE) {
            return this.append((char) value);
        }
        else {
            try {
                final char[] chars = Character.toChars(value);
                return this.append(chars, 0, chars.length);
            }
            catch (final IllegalArgumentException e) {
                // when value is not valid as UTF-16
                this.append(REPLACEMENT_CHARACTER);
                throw e;
            }
        }
    }
}