/* * Copyright 2023 The original authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.rschwietzke; import java.util.Arrays; /** *

This class is meant to replaces the old {@link CheaperCharBuffer} in all areas * where performance and memory-efficency is key. XMLString compatibility * remains in place in case one has used that in their own code. * *

This buffer is mutable and when you use it, make sure you work with * it responsibly. In many cases, we will reuse the buffer to avoid fresh * memory allocations, hence you have to pay attention to its usage pattern. * It is not meant to be a general String replacement. * *

This class avoids many of the standard runtime checks that will result * in a runtime or array exception anyway. Why check twice and raise the * same exception? * * @author René Schwietzke * @since 3.10.0 */ public class CheaperCharBuffer implements CharSequence { // our data, can grow - that is not safe and has be altered from the original code // to allow speed public char[] data_; // the current size of the string data public int length_; // the current size of the string data private final int growBy_; // how much do we grow if needed, half a cache line public static final int CAPACITY_GROWTH = 64 / 2; // what is our start size? // a cache line is 64 byte mostly, the overhead is mostly 24 bytes // a char is two bytes, let's use one cache lines public static final int INITIAL_CAPACITY = (64 - 24) / 2; // static empty version; DON'T MODIFY IT public static final CheaperCharBuffer EMPTY = new CheaperCharBuffer(0); // the � character private static final char REPLACEMENT_CHARACTER = '\uFFFD'; /** * Constructs an XMLCharBuffer with a default size. */ public CheaperCharBuffer() { this.data_ = new char[INITIAL_CAPACITY]; this.length_ = 0; this.growBy_ = CAPACITY_GROWTH; } /** * Constructs an XMLCharBuffer with a desired size. * * @param startSize the size of the buffer to start with */ public CheaperCharBuffer(final int startSize) { this(startSize, CAPACITY_GROWTH); } /** * Constructs an XMLCharBuffer with a desired size. * * @param startSize the size of the buffer to start with * @param growBy by how much do we want to grow when needed */ public CheaperCharBuffer(final int startSize, final int growBy) { this.data_ = new char[startSize]; this.length_ = 0; this.growBy_ = Math.max(1, growBy); } /** * Constructs an XMLCharBuffer from another buffer. Copies the data * over. The new buffer capacity matches the length of the source. * * @param src the source buffer to copy from */ public CheaperCharBuffer(final CheaperCharBuffer src) { this(src, 0); } /** * Constructs an XMLCharBuffer from another buffer. Copies the data * over. You can add more capacity on top of the source length. If * you specify 0, the capacity will match the src length. * * @param src the source buffer to copy from * @param addCapacity how much capacity to add to origin length */ public CheaperCharBuffer(final CheaperCharBuffer src, final int addCapacity) { this.data_ = Arrays.copyOf(src.data_, src.length_ + Math.max(0, addCapacity)); this.length_ = src.length(); this.growBy_ = Math.max(1, CAPACITY_GROWTH); } /** * Constructs an XMLCharBuffer from a string. To avoid * too much allocation, we just take the string array as is and * don't allocate extra space in the first place. * * @param src the string to copy from */ public CheaperCharBuffer(final String src) { this.data_ = src.toCharArray(); this.length_ = src.length(); this.growBy_ = CAPACITY_GROWTH; } /** * Constructs an XMLString structure preset with the specified values. * There will not be any room to grow, if you need that, construct an * empty one and append. * *

There are not range checks performed. Make sure your data is correct. * * @param ch The character array, must not be null * @param offset The offset into the character array. * @param length The length of characters from the offset. */ public CheaperCharBuffer(final char[] ch, final int offset, final int length) { // just as big as we need it this(length); append(ch, offset, length); } /** * Check capacity and grow if needed automatically * * @param minimumCapacity how much space do we need at least */ private void ensureCapacity(final int minimumCapacity) { if (minimumCapacity > this.data_.length) { final int newSize = Math.max(minimumCapacity + this.growBy_, (this.data_.length << 1) + 2); this.data_ = Arrays.copyOf(this.data_, newSize); } } /** * Returns the current max capacity without growth. Does not * indicate how much capacity is already in use. Use {@link #length()} * for that. * * @return the current capacity, not taken any usage into account */ public int capacity() { return this.data_.length; } /** * Appends a single character to the buffer. * * @param c the character to append * @return this instance */ public CheaperCharBuffer append(final char c) { final int oldLength = this.length_++; // ensureCapacity is not inlined by the compiler, so put that here for the most // called method of all appends. Duplicate code, but for a reason. if (oldLength == this.data_.length) { final int newSize = Math.max(oldLength + this.growBy_, (this.data_.length << 1) + 2); this.data_ = Arrays.copyOf(this.data_, newSize); } this.data_[oldLength] = c; return this; } /** * Append a string to this buffer without copying the string first. * * @param src the string to append * @return this instance */ public CheaperCharBuffer append(final String src) { final int start = this.length_; this.length_ = this.length_ + src.length(); ensureCapacity(this.length_); // copy char by char because we don't get a copy for free // from a string yet, this might change when immutable arrays // make it into Java, but that will not be very soon for (int i = 0; i < src.length(); i++) { this.data_[start + i] = src.charAt(i); } return this; } /** * Add another buffer to this one. * * @param src the buffer to append * @return this instance */ public CheaperCharBuffer append(final CheaperCharBuffer src) { final int start = this.length_; this.length_ = this.length_ + src.length(); ensureCapacity(this.length_); System.arraycopy(src.data_, 0, this.data_, start, src.length_); return this; } /** * Add data from a char array to this buffer with the ability to specify * a range to copy from * * @param src the source char array * @param offset the pos to start to copy from * @param length the length of the data to copy * * @return this instance */ public CheaperCharBuffer append(final char[] src, final int offset, final int length) { final int start = this.length_; this.length_ = start + length; ensureCapacity(this.length_); System.arraycopy(src, offset, this.data_, start, length); return this; } /** * Returns the current length * * @return the length of the charbuffer data */ public int length() { return length_; } /** * Tell us how much the capacity grows if needed * * @return the value that determines how much we grow the backing * array in case we have to */ public int getGrowBy() { return this.growBy_; } /** * Resets the buffer to 0 length. It won't resize it to avoid memory * churn. * * @return this instance for fluid programming */ public CheaperCharBuffer clear() { this.length_ = 0; return this; } /** * Resets the buffer to 0 length and sets the new data. This * is a little cheaper than clear().append(c) depending on * the where and the inlining decisions. * * @param c the char to set * @return this instance for fluid programming */ public CheaperCharBuffer clearAndAppend(final char c) { this.length_ = 0; if (this.data_.length > 0) { this.data_[this.length_] = c; this.length_++; } else { // the rare case when we don't have any buffer at hand append(c); } return this; } /** * Does this buffer end with this string? If we check for * the empty string, we get true. If we would support JDK 11, we could * use Arrays.mismatch and be way faster. * * @param s the string to check the end against * @return true of the end matches the buffer, false otherwise */ public boolean endsWith(final String s) { // length does not match, cannot be the end if (this.length_ < s.length()) { return false; } // check the string by each char, avoids a copy of the string final int start = this.length_ - s.length(); // change this to Arrays.mismatch when going JDK 11 or higher for (int i = 0; i < s.length(); i++) { if (this.data_[i + start] != s.charAt(i)) { return false; } } return true; } /** * Reduces the buffer to the content between start and end marker when * only whitespaces are found before the startMarker as well as after the end marker. * If both strings overlap due to identical characters such as "foo" and "oof" * and the buffer is " foof ", we don't do anything. * *

If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side. * * @param startMarker the start string to find, must not be null * @param endMarker the end string to find, must not be null * @return this instance * * @deprecated Use the new method {@link #trimToContent(String, String)} instead. */ public CheaperCharBuffer reduceToContent(final String startMarker, final String endMarker) { return trimToContent(startMarker, endMarker); } /** * Reduces the buffer to the content between start and end marker when * only whitespaces are found before the startMarker as well as after the end marker. * If both strings overlap due to identical characters such as "foo" and "oof" * and the buffer is " foof ", we don't do anything. * *

If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side. * * @param startMarker the start string to find, must not be null * @param endMarker the end string to find, must not be null * @return this instance */ public CheaperCharBuffer trimToContent(final String startMarker, final String endMarker) { // if both are longer or same length than content, don't do anything final int markerLength = startMarker.length() + endMarker.length(); if (markerLength >= this.length_) { return this; } // run over starting whitespaces int sPos = 0; for (; sPos < this.length_ - markerLength; sPos++) { if (!Character.isWhitespace(this.data_[sPos])) { break; } } // run over ending whitespaces int ePos = this.length_ - 1; for (; ePos > sPos - markerLength; ePos--) { if (!Character.isWhitespace(this.data_[ePos])) { break; } } // if we have less content than marker length, give up // this also helps when markers overlap such as // and the string is " " if (ePos - sPos + 1 < markerLength) { return this; } // check the start for (int i = 0; i < startMarker.length(); i++) { if (startMarker.charAt(i) != this.data_[i + sPos]) { // no start match, stop and don't do anything return this; } } // check the end, ePos is when the first good char // occurred final int endStartCheckPos = ePos - endMarker.length() + 1; for (int i = 0; i < endMarker.length(); i++) { if (endMarker.charAt(i) != this.data_[endStartCheckPos + i]) { // no start match, stop and don't do anything return this; } } // shift left and cut length final int newLength = ePos - sPos + 1 - markerLength; System.arraycopy(this.data_, sPos + startMarker.length(), this.data_, 0, newLength); this.length_ = newLength; return this; } /** * Check if we have only whitespaces * * @return true if we have only whitespace, false otherwise */ public boolean isWhitespace() { for (int i = 0; i < this.length_; i++) { if (!Character.isWhitespace(this.data_[i])) { return false; } } return true; } /** * Trims the string similar to {@link java.lang.String#trim()} * * @return a string with removed whitespace at the beginning and the end */ public CheaperCharBuffer trim() { // clean the end first, because it is cheap return trimTrailing().trimLeading(); } /** * Removes all whitespace before the first non-whitespace char. * If all are whitespaces, we get an empty buffer * * @return this instance */ public CheaperCharBuffer trimLeading() { // run over starting whitespace int sPos = 0; for (; sPos < this.length_; sPos++) { if (!Character.isWhitespace(this.data_[sPos])) { break; } } if (sPos == 0) { // nothing to do return this; } else if (sPos == this.length_) { // only whitespace this.length_ = 0; return this; } // shift left final int newLength = this.length_ - sPos; System.arraycopy(this.data_, sPos, this.data_, 0, newLength); this.length_ = newLength; return this; } /** * Removes all whitespace at the end. * If all are whitespace, we get an empty buffer * * @return this instance * * @deprecated Use {@link #trimTrailing()} instead. */ public CheaperCharBuffer trimWhitespaceAtEnd() { return trimTrailing(); } /** * Removes all whitespace at the end. * If all are whitespace, we get an empty buffer * * @return this instance */ public CheaperCharBuffer trimTrailing() { // run over ending whitespaces int ePos = this.length_ - 1; for (; ePos >= 0; ePos--) { if (!Character.isWhitespace(this.data_[ePos])) { break; } } this.length_ = ePos + 1; return this; } /** * Shortens the buffer by that many positions. If the count is * larger than the length, we get just an empty buffer. If you pass in negative * values, we are failing, likely often silently. It is all about performance and * not a general all-purpose API. * * @param count a positive number, no runtime checks, if count is larger than * length, we get length = 0 * @return this instance */ public CheaperCharBuffer shortenBy(final int count) { final int newLength = this.length_ - count; this.length_ = newLength < 0 ? 0 : newLength; return this; } /** * Get the characters as char array, this will be a copy! * * @return a copy of the underlying char darta */ public char[] getChars() { return Arrays.copyOf(this.data_, this.length_); } /** * Returns a string representation of this buffer. This will be a copy * operation. If the buffer is emoty, we get a constant empty String back * to avoid any overhead. * * @return a string of the content of this buffer */ @Override public String toString() { if (this.length_ > 0) { return new String(this.data_, 0, this.length_); } else { return ""; } } /** * Returns the char a the given position. Will complain if * we try to read outside the range. We do a range check here * because we might not notice when we are within the buffer * but outside the current length. * * @param index the position to read from * @return the char at the position * @throws IndexOutOfBoundsException * in case one tries to read outside of valid buffer range */ @Override public char charAt(final int index) { if (index > this.length_ - 1 || index < 0) { throw new IndexOutOfBoundsException( "Tried to read outside of the valid buffer data"); } return this.data_[index]; } /** * Returns the char at the given position. No checks are * performed. It is up to the caller to make sure we * read correctly. Reading outside of the array will * cause an {@link IndexOutOfBoundsException} but using an * incorrect position in the array (such as beyond length) * might stay unnoticed! This is a performance method, * use at your own risk. * * @param index the position to read from * @return the char at the position */ public char unsafeCharAt(final int index) { return this.data_[index]; } /** * Returns a content copy of this buffer * * @return a copy of this buffer, the capacity might differ */ @Override public CheaperCharBuffer clone() { return new CheaperCharBuffer(this); } /** * Returns a CharSequence that is a subsequence of this sequence. * The subsequence starts with the char value at the specified index and * ends with the char value at index end - 1. The length * (in chars) of the * returned sequence is end - start, so if start == end * then an empty sequence is returned. * * @param start the start index, inclusive * @param end the end index, exclusive * * @return the specified subsequence * * @throws IndexOutOfBoundsException * if start or end are negative, * if end is greater than length(), * or if start is greater than end * * @return a charsequence of this buffer */ @Override public CharSequence subSequence(final int start, final int end) { if (start < 0) { throw new StringIndexOutOfBoundsException(start); } if (end > this.length_) { throw new StringIndexOutOfBoundsException(end); } final int l = end - start; if (l < 0) { throw new StringIndexOutOfBoundsException(l); } return new String(this.data_, start, l); } /** * Two buffers are identical when the length and * the content of the backing array (only for the * data in view) are identical. * * @param o the object to compare with * @return true if length and array content match, false otherwise */ @Override public boolean equals(final Object o) { if (o instanceof CharSequence) { final CharSequence ob = (CharSequence) o; if (ob.length() != this.length_) { return false; } // ok, in JDK 11 or up, we could use an // Arrays.mismatch, but we cannot do that // due to JDK 8 compatibility for (int i = 0; i < this.length_; i++) { if (ob.charAt(i) != this.data_[i]) { return false; } } // length and content match, be happy return true; } return false; } /** * We don't cache the hashcode because we mutate often. Don't use this in * hashmaps as key. But you can use that to look up in a hashmap against * a string using the CharSequence interface. * * @return the hashcode, similar to what a normal string would deliver */ @Override public int hashCode() { int h = 0; for (int i = 0; i < this.length_; i++) { h = ((h << 5) - h) + this.data_[i]; } return h; } /** * Append a character to an XMLCharBuffer. The character is an int value, and * can either be a single UTF-16 character or a supplementary character * represented by two UTF-16 code points. * * @param value The character value. * @return this instance for fluid programming * * @throws IllegalArgumentException if the specified * {@code codePoint} is not a valid Unicode code point. */ public CheaperCharBuffer appendCodePoint(final int value) { if (value <= Character.MAX_VALUE) { return this.append((char) value); } else { try { final char[] chars = Character.toChars(value); return this.append(chars, 0, chars.length); } catch (final IllegalArgumentException e) { // when value is not valid as UTF-16 this.append(REPLACEMENT_CHARACTER); throw e; } } } }