| | | 1 | | // Copyright (c) 2020-2024 dotBunny Inc. |
| | | 2 | | // dotBunny licenses this file to you under the BSL-1.0 license. |
| | | 3 | | // See the LICENSE file in the project root for more information. |
| | | 4 | | |
| | | 5 | | using System; |
| | | 6 | | using Unity.Mathematics; |
| | | 7 | | |
| | | 8 | | namespace GDX |
| | | 9 | | { |
| | | 10 | | /// <summary> |
| | | 11 | | /// A segmented collection of <see cref="char" />. |
| | | 12 | | /// </summary> |
| | | 13 | | public struct SegmentedString |
| | | 14 | | { |
| | | 15 | | /// <summary> |
| | | 16 | | /// The initial array of characters. |
| | | 17 | | /// </summary> |
| | | 18 | | char[] m_Characters; |
| | | 19 | | |
| | | 20 | | /// <summary> |
| | | 21 | | /// Datastore of word segment information. |
| | | 22 | | /// </summary> |
| | | 23 | | /// <remarks> |
| | | 24 | | /// <list type="table"> |
| | | 25 | | /// <listheader> |
| | | 26 | | /// <term>Axis</term> |
| | | 27 | | /// <description>Typical Usage</description> |
| | | 28 | | /// </listheader> |
| | | 29 | | /// <item> |
| | | 30 | | /// <term>x</term> |
| | | 31 | | /// <description>The start offset in <see cref="m_Characters" /> of a word.</description> |
| | | 32 | | /// </item> |
| | | 33 | | /// <item> |
| | | 34 | | /// <term>y</term> |
| | | 35 | | /// <description>The length of the word.</description> |
| | | 36 | | /// </item> |
| | | 37 | | /// <item> |
| | | 38 | | /// <term>z</term> |
| | | 39 | | /// <description>The calculated <see cref="StringExtensions.GetStableHashCode" /> for the word.</des |
| | | 40 | | /// </item> |
| | | 41 | | /// </list> |
| | | 42 | | /// </remarks> |
| | | 43 | | int3[] m_Segments; |
| | | 44 | | |
| | | 45 | | /// <summary> |
| | | 46 | | /// The number of words. |
| | | 47 | | /// </summary> |
| | | 48 | | int m_Count; |
| | | 49 | | |
| | | 50 | | /// <summary> |
| | | 51 | | /// The calculated <see cref="StringExtensions.GetStableHashCode" /> for the entirety of <see cref="m_Charac |
| | | 52 | | /// </summary> |
| | | 53 | | int m_HashCode; |
| | | 54 | | |
| | | 55 | | /// <summary> |
| | | 56 | | /// Get the <see cref="m_Characters" /> array. |
| | | 57 | | /// </summary> |
| | | 58 | | /// <returns>A <see cref="char" /> array.</returns> |
| | | 59 | | public char[] AsCharArray() |
| | 1 | 60 | | { |
| | 1 | 61 | | return m_Characters; |
| | 1 | 62 | | } |
| | | 63 | | |
| | | 64 | | public char[] AsCharArray(int segmentIndex) |
| | 1 | 65 | | { |
| | 1 | 66 | | char[] returnArray = new char[m_Segments[segmentIndex].y]; |
| | 1 | 67 | | Array.Copy(m_Characters, m_Segments[segmentIndex].x, |
| | | 68 | | returnArray, 0, m_Segments[segmentIndex].y); |
| | 1 | 69 | | return returnArray; |
| | 1 | 70 | | } |
| | | 71 | | |
| | | 72 | | public string AsString() |
| | 1 | 73 | | { |
| | 1 | 74 | | return new string(m_Characters); |
| | 1 | 75 | | } |
| | | 76 | | |
| | | 77 | | public string AsString(int segmentIndex) |
| | 5 | 78 | | { |
| | 5 | 79 | | return new string(m_Characters, m_Segments[segmentIndex].x, m_Segments[segmentIndex].y); |
| | 5 | 80 | | } |
| | | 81 | | |
| | | 82 | | |
| | | 83 | | public int GetCount() |
| | 1 | 84 | | { |
| | 1 | 85 | | return m_Count; |
| | 1 | 86 | | } |
| | | 87 | | |
| | | 88 | | |
| | | 89 | | public override int GetHashCode() |
| | 1 | 90 | | { |
| | 1 | 91 | | return m_HashCode; |
| | 1 | 92 | | } |
| | | 93 | | |
| | | 94 | | public int GetHashCode(int segmentIndex) |
| | 3 | 95 | | { |
| | 3 | 96 | | return m_Segments[segmentIndex].z; |
| | 3 | 97 | | } |
| | | 98 | | |
| | | 99 | | public int GetOffset(int segmentIndex) |
| | 1 | 100 | | { |
| | 1 | 101 | | return m_Segments[segmentIndex].x; |
| | 1 | 102 | | } |
| | | 103 | | |
| | | 104 | | public int GetSegmentLength(int segmentIndex) |
| | 1 | 105 | | { |
| | 1 | 106 | | return m_Segments[segmentIndex].y; |
| | 1 | 107 | | } |
| | | 108 | | |
| | | 109 | | public static SegmentedString SplitOnNonAlphaNumericToLower(string targetString) |
| | 1 | 110 | | { |
| | 1 | 111 | | SegmentedString returnValue = new SegmentedString |
| | | 112 | | { |
| | | 113 | | // Copy to a new character array that we will maintain |
| | | 114 | | m_Characters = targetString.ToCharArray() |
| | | 115 | | }; |
| | | 116 | | |
| | 1 | 117 | | int charactersLength = returnValue.m_Characters.Length; |
| | 1 | 118 | | returnValue.m_Segments = new int3[charactersLength]; |
| | | 119 | | |
| | 1 | 120 | | bool isInsideSegment = false; |
| | | 121 | | |
| | 80 | 122 | | for (int i = 0; i < charactersLength; i++) |
| | 39 | 123 | | { |
| | | 124 | | // Convert our character to its ascii value |
| | 39 | 125 | | int c = returnValue.m_Characters[i]; |
| | | 126 | | |
| | | 127 | | // Check character value and shift it if necessary (32) |
| | 39 | 128 | | if (c >= StringExtensions.AsciiUpperCaseStart && c <= StringExtensions.AsciiUpperCaseEnd) |
| | 5 | 129 | | { |
| | 5 | 130 | | c ^= StringExtensions.AsciiCaseShift; |
| | | 131 | | |
| | | 132 | | // Update value |
| | 5 | 133 | | returnValue.m_Characters[i] = (char)c; |
| | 5 | 134 | | } |
| | | 135 | | |
| | | 136 | | // Check our first character |
| | 39 | 137 | | bool isValid = |
| | | 138 | | (c >= StringExtensions.AsciiLowerCaseStart && c <= StringExtensions.AsciiLowerCaseEnd) || |
| | | 139 | | (c >= StringExtensions.AsciiNumberStart && c <= StringExtensions.AsciiNumberEnd); |
| | | 140 | | |
| | | 141 | | // If we are valid, but not in a segment |
| | 39 | 142 | | if (isValid && !isInsideSegment) |
| | 8 | 143 | | { |
| | | 144 | | // Mark start spot |
| | 8 | 145 | | returnValue.m_Segments[returnValue.m_Count].x = i; |
| | | 146 | | |
| | 8 | 147 | | isInsideSegment = true; |
| | 8 | 148 | | } |
| | | 149 | | |
| | 39 | 150 | | if (!isValid && isInsideSegment) |
| | 7 | 151 | | { |
| | | 152 | | // Close out this iteration of a segment |
| | 7 | 153 | | isInsideSegment = false; |
| | 7 | 154 | | returnValue.m_Segments[returnValue.m_Count].y = i - returnValue.m_Segments[returnValue.m_Count].x; |
| | 7 | 155 | | returnValue.m_Count++; |
| | 7 | 156 | | } |
| | 39 | 157 | | } |
| | | 158 | | |
| | | 159 | | // Finish segment if we didnt before |
| | 1 | 160 | | if (isInsideSegment) |
| | 1 | 161 | | { |
| | 1 | 162 | | returnValue.m_Segments[returnValue.m_Count].y = |
| | | 163 | | charactersLength - returnValue.m_Segments[returnValue.m_Count].x; |
| | 1 | 164 | | returnValue.m_Count++; |
| | 1 | 165 | | } |
| | | 166 | | |
| | 1 | 167 | | return returnValue; |
| | 1 | 168 | | } |
| | | 169 | | |
| | | 170 | | public static SegmentedString SplitOnNonAlphaNumericToLowerHashed(string targetString) |
| | 8 | 171 | | { |
| | 8 | 172 | | SegmentedString returnValue = new SegmentedString |
| | | 173 | | { |
| | | 174 | | // Copy to a new character array that we will maintain |
| | | 175 | | m_Characters = targetString.ToCharArray() |
| | | 176 | | }; |
| | | 177 | | |
| | 8 | 178 | | int charactersLength = returnValue.m_Characters.Length; |
| | 8 | 179 | | returnValue.m_Segments = new int3[charactersLength]; |
| | | 180 | | |
| | 8 | 181 | | int segmentHashA = 5381; |
| | 8 | 182 | | int segmentHashB = segmentHashA; |
| | 8 | 183 | | int hashA = 5381; |
| | 8 | 184 | | int hashB = hashA; |
| | 8 | 185 | | bool useAlternateHash = false; |
| | 8 | 186 | | bool useAlternateSegmentHash = false; |
| | 8 | 187 | | bool isInsideSegment = false; |
| | | 188 | | |
| | 640 | 189 | | for (int i = 0; i < charactersLength; i++) |
| | 312 | 190 | | { |
| | | 191 | | // Convert our character to its ascii value |
| | 312 | 192 | | int c = returnValue.m_Characters[i]; |
| | | 193 | | |
| | | 194 | | // Check character value and shift it if necessary (32) |
| | 312 | 195 | | if (c >= StringExtensions.AsciiUpperCaseStart && c <= StringExtensions.AsciiUpperCaseEnd) |
| | 40 | 196 | | { |
| | 40 | 197 | | c ^= StringExtensions.AsciiCaseShift; |
| | | 198 | | |
| | | 199 | | // Update value |
| | 40 | 200 | | returnValue.m_Characters[i] = (char)c; |
| | 40 | 201 | | } |
| | | 202 | | |
| | | 203 | | |
| | | 204 | | // Hash character for overall hashing |
| | | 205 | | // Flopping hash |
| | 312 | 206 | | if (!useAlternateHash) |
| | 160 | 207 | | { |
| | 160 | 208 | | hashA = ((hashA << 5) + hashA) ^ c; |
| | 160 | 209 | | useAlternateHash = true; |
| | 160 | 210 | | } |
| | | 211 | | else |
| | 152 | 212 | | { |
| | 152 | 213 | | hashB = ((hashB << 5) + hashB) ^ c; |
| | 152 | 214 | | useAlternateHash = false; |
| | 152 | 215 | | } |
| | | 216 | | |
| | | 217 | | // Check our first character |
| | 312 | 218 | | bool isValid = |
| | | 219 | | (c >= StringExtensions.AsciiLowerCaseStart && c <= StringExtensions.AsciiLowerCaseEnd) || |
| | | 220 | | (c >= StringExtensions.AsciiNumberStart && c <= StringExtensions.AsciiNumberEnd); |
| | | 221 | | |
| | | 222 | | // If we are valid, but not in a segment |
| | 312 | 223 | | if (isValid && !isInsideSegment) |
| | 64 | 224 | | { |
| | | 225 | | // Reset hashes |
| | 64 | 226 | | segmentHashA = 5381; |
| | 64 | 227 | | segmentHashB = segmentHashA; |
| | 64 | 228 | | useAlternateSegmentHash = false; |
| | | 229 | | |
| | | 230 | | // Mark start spot |
| | 64 | 231 | | returnValue.m_Segments[returnValue.m_Count].x = i; |
| | | 232 | | |
| | 64 | 233 | | isInsideSegment = true; |
| | 64 | 234 | | } |
| | | 235 | | |
| | 312 | 236 | | if (isValid) |
| | 240 | 237 | | { |
| | | 238 | | // Flopping hash |
| | 240 | 239 | | if (!useAlternateSegmentHash) |
| | 136 | 240 | | { |
| | 136 | 241 | | segmentHashA = ((segmentHashA << 5) + segmentHashA) ^ c; |
| | 136 | 242 | | useAlternateSegmentHash = true; |
| | 136 | 243 | | } |
| | | 244 | | else |
| | 104 | 245 | | { |
| | 104 | 246 | | segmentHashB = ((segmentHashB << 5) + segmentHashB) ^ c; |
| | 104 | 247 | | useAlternateSegmentHash = false; |
| | 104 | 248 | | } |
| | 240 | 249 | | } |
| | | 250 | | |
| | 312 | 251 | | if (!isValid && isInsideSegment) |
| | 56 | 252 | | { |
| | | 253 | | // Close out this iteration of a segment |
| | 56 | 254 | | isInsideSegment = false; |
| | 56 | 255 | | returnValue.m_Segments[returnValue.m_Count].y = i - returnValue.m_Segments[returnValue.m_Count].x; |
| | 56 | 256 | | returnValue.m_Segments[returnValue.m_Count].z = segmentHashA + segmentHashB * 1566083941; |
| | 56 | 257 | | returnValue.m_Count++; |
| | 56 | 258 | | } |
| | 312 | 259 | | } |
| | | 260 | | |
| | | 261 | | // Finish segment if we didnt before |
| | 8 | 262 | | if (isInsideSegment) |
| | 8 | 263 | | { |
| | 8 | 264 | | returnValue.m_Segments[returnValue.m_Count].y = |
| | | 265 | | charactersLength - returnValue.m_Segments[returnValue.m_Count].x; |
| | 8 | 266 | | returnValue.m_Segments[returnValue.m_Count].z = segmentHashA + segmentHashB * 1566083941; |
| | 8 | 267 | | returnValue.m_Count++; |
| | 8 | 268 | | } |
| | | 269 | | |
| | | 270 | | // Save final hash |
| | 8 | 271 | | returnValue.m_HashCode = hashA + hashB * 1566083941; |
| | | 272 | | |
| | 8 | 273 | | return returnValue; |
| | 8 | 274 | | } |
| | | 275 | | } |
| | | 276 | | } |