| | 1 | | // Copyright (c) 2020-2024 dotBunny Inc. |
| | 2 | | // dotBunny licenses this file to you under the BSL-1.0 license. |
| | 3 | | // See the LICENSE file in the project root for more information. |
| | 4 | |
|
| | 5 | | using System; |
| | 6 | | using Unity.Mathematics; |
| | 7 | |
|
| | 8 | | namespace GDX |
| | 9 | | { |
| | 10 | | /// <summary> |
| | 11 | | /// A segmented collection of <see cref="char" />. |
| | 12 | | /// </summary> |
| | 13 | | public struct SegmentedString |
| | 14 | | { |
| | 15 | | /// <summary> |
| | 16 | | /// The initial array of characters. |
| | 17 | | /// </summary> |
| | 18 | | char[] m_Characters; |
| | 19 | |
|
| | 20 | | /// <summary> |
| | 21 | | /// Datastore of word segment information. |
| | 22 | | /// </summary> |
| | 23 | | /// <remarks> |
| | 24 | | /// <list type="table"> |
| | 25 | | /// <listheader> |
| | 26 | | /// <term>Axis</term> |
| | 27 | | /// <description>Typical Usage</description> |
| | 28 | | /// </listheader> |
| | 29 | | /// <item> |
| | 30 | | /// <term>x</term> |
| | 31 | | /// <description>The start offset in <see cref="m_Characters" /> of a word.</description> |
| | 32 | | /// </item> |
| | 33 | | /// <item> |
| | 34 | | /// <term>y</term> |
| | 35 | | /// <description>The length of the word.</description> |
| | 36 | | /// </item> |
| | 37 | | /// <item> |
| | 38 | | /// <term>z</term> |
| | 39 | | /// <description>The calculated <see cref="StringExtensions.GetStableHashCode" /> for the word.</des |
| | 40 | | /// </item> |
| | 41 | | /// </list> |
| | 42 | | /// </remarks> |
| | 43 | | int3[] m_Segments; |
| | 44 | |
|
| | 45 | | /// <summary> |
| | 46 | | /// The number of words. |
| | 47 | | /// </summary> |
| | 48 | | int m_Count; |
| | 49 | |
|
| | 50 | | /// <summary> |
| | 51 | | /// The calculated <see cref="StringExtensions.GetStableHashCode" /> for the entirety of <see cref="m_Charac |
| | 52 | | /// </summary> |
| | 53 | | int m_HashCode; |
| | 54 | |
|
| | 55 | | /// <summary> |
| | 56 | | /// Get the <see cref="m_Characters" /> array. |
| | 57 | | /// </summary> |
| | 58 | | /// <returns>A <see cref="char" /> array.</returns> |
| | 59 | | public char[] AsCharArray() |
| 1 | 60 | | { |
| 1 | 61 | | return m_Characters; |
| 1 | 62 | | } |
| | 63 | |
|
| | 64 | | public char[] AsCharArray(int segmentIndex) |
| 1 | 65 | | { |
| 1 | 66 | | char[] returnArray = new char[m_Segments[segmentIndex].y]; |
| 1 | 67 | | Array.Copy(m_Characters, m_Segments[segmentIndex].x, |
| | 68 | | returnArray, 0, m_Segments[segmentIndex].y); |
| 1 | 69 | | return returnArray; |
| 1 | 70 | | } |
| | 71 | |
|
| | 72 | | public string AsString() |
| 1 | 73 | | { |
| 1 | 74 | | return new string(m_Characters); |
| 1 | 75 | | } |
| | 76 | |
|
| | 77 | | public string AsString(int segmentIndex) |
| 5 | 78 | | { |
| 5 | 79 | | return new string(m_Characters, m_Segments[segmentIndex].x, m_Segments[segmentIndex].y); |
| 5 | 80 | | } |
| | 81 | |
|
| | 82 | |
|
| | 83 | | public int GetCount() |
| 1 | 84 | | { |
| 1 | 85 | | return m_Count; |
| 1 | 86 | | } |
| | 87 | |
|
| | 88 | |
|
| | 89 | | public override int GetHashCode() |
| 1 | 90 | | { |
| 1 | 91 | | return m_HashCode; |
| 1 | 92 | | } |
| | 93 | |
|
| | 94 | | public int GetHashCode(int segmentIndex) |
| 3 | 95 | | { |
| 3 | 96 | | return m_Segments[segmentIndex].z; |
| 3 | 97 | | } |
| | 98 | |
|
| | 99 | | public int GetOffset(int segmentIndex) |
| 1 | 100 | | { |
| 1 | 101 | | return m_Segments[segmentIndex].x; |
| 1 | 102 | | } |
| | 103 | |
|
| | 104 | | public int GetSegmentLength(int segmentIndex) |
| 1 | 105 | | { |
| 1 | 106 | | return m_Segments[segmentIndex].y; |
| 1 | 107 | | } |
| | 108 | |
|
| | 109 | | public static SegmentedString SplitOnNonAlphaNumericToLower(string targetString) |
| 1 | 110 | | { |
| 1 | 111 | | SegmentedString returnValue = new SegmentedString |
| | 112 | | { |
| | 113 | | // Copy to a new character array that we will maintain |
| | 114 | | m_Characters = targetString.ToCharArray() |
| | 115 | | }; |
| | 116 | |
|
| 1 | 117 | | int charactersLength = returnValue.m_Characters.Length; |
| 1 | 118 | | returnValue.m_Segments = new int3[charactersLength]; |
| | 119 | |
|
| 1 | 120 | | bool isInsideSegment = false; |
| | 121 | |
|
| 80 | 122 | | for (int i = 0; i < charactersLength; i++) |
| 39 | 123 | | { |
| | 124 | | // Convert our character to its ascii value |
| 39 | 125 | | int c = returnValue.m_Characters[i]; |
| | 126 | |
|
| | 127 | | // Check character value and shift it if necessary (32) |
| 39 | 128 | | if (c >= StringExtensions.AsciiUpperCaseStart && c <= StringExtensions.AsciiUpperCaseEnd) |
| 5 | 129 | | { |
| 5 | 130 | | c ^= StringExtensions.AsciiCaseShift; |
| | 131 | |
|
| | 132 | | // Update value |
| 5 | 133 | | returnValue.m_Characters[i] = (char)c; |
| 5 | 134 | | } |
| | 135 | |
|
| | 136 | | // Check our first character |
| 39 | 137 | | bool isValid = |
| | 138 | | (c >= StringExtensions.AsciiLowerCaseStart && c <= StringExtensions.AsciiLowerCaseEnd) || |
| | 139 | | (c >= StringExtensions.AsciiNumberStart && c <= StringExtensions.AsciiNumberEnd); |
| | 140 | |
|
| | 141 | | // If we are valid, but not in a segment |
| 39 | 142 | | if (isValid && !isInsideSegment) |
| 8 | 143 | | { |
| | 144 | | // Mark start spot |
| 8 | 145 | | returnValue.m_Segments[returnValue.m_Count].x = i; |
| | 146 | |
|
| 8 | 147 | | isInsideSegment = true; |
| 8 | 148 | | } |
| | 149 | |
|
| 39 | 150 | | if (!isValid && isInsideSegment) |
| 7 | 151 | | { |
| | 152 | | // Close out this iteration of a segment |
| 7 | 153 | | isInsideSegment = false; |
| 7 | 154 | | returnValue.m_Segments[returnValue.m_Count].y = i - returnValue.m_Segments[returnValue.m_Count].x; |
| 7 | 155 | | returnValue.m_Count++; |
| 7 | 156 | | } |
| 39 | 157 | | } |
| | 158 | |
|
| | 159 | | // Finish segment if we didnt before |
| 1 | 160 | | if (isInsideSegment) |
| 1 | 161 | | { |
| 1 | 162 | | returnValue.m_Segments[returnValue.m_Count].y = |
| | 163 | | charactersLength - returnValue.m_Segments[returnValue.m_Count].x; |
| 1 | 164 | | returnValue.m_Count++; |
| 1 | 165 | | } |
| | 166 | |
|
| 1 | 167 | | return returnValue; |
| 1 | 168 | | } |
| | 169 | |
|
| | 170 | | public static SegmentedString SplitOnNonAlphaNumericToLowerHashed(string targetString) |
| 8 | 171 | | { |
| 8 | 172 | | SegmentedString returnValue = new SegmentedString |
| | 173 | | { |
| | 174 | | // Copy to a new character array that we will maintain |
| | 175 | | m_Characters = targetString.ToCharArray() |
| | 176 | | }; |
| | 177 | |
|
| 8 | 178 | | int charactersLength = returnValue.m_Characters.Length; |
| 8 | 179 | | returnValue.m_Segments = new int3[charactersLength]; |
| | 180 | |
|
| 8 | 181 | | int segmentHashA = 5381; |
| 8 | 182 | | int segmentHashB = segmentHashA; |
| 8 | 183 | | int hashA = 5381; |
| 8 | 184 | | int hashB = hashA; |
| 8 | 185 | | bool useAlternateHash = false; |
| 8 | 186 | | bool useAlternateSegmentHash = false; |
| 8 | 187 | | bool isInsideSegment = false; |
| | 188 | |
|
| 640 | 189 | | for (int i = 0; i < charactersLength; i++) |
| 312 | 190 | | { |
| | 191 | | // Convert our character to its ascii value |
| 312 | 192 | | int c = returnValue.m_Characters[i]; |
| | 193 | |
|
| | 194 | | // Check character value and shift it if necessary (32) |
| 312 | 195 | | if (c >= StringExtensions.AsciiUpperCaseStart && c <= StringExtensions.AsciiUpperCaseEnd) |
| 40 | 196 | | { |
| 40 | 197 | | c ^= StringExtensions.AsciiCaseShift; |
| | 198 | |
|
| | 199 | | // Update value |
| 40 | 200 | | returnValue.m_Characters[i] = (char)c; |
| 40 | 201 | | } |
| | 202 | |
|
| | 203 | |
|
| | 204 | | // Hash character for overall hashing |
| | 205 | | // Flopping hash |
| 312 | 206 | | if (!useAlternateHash) |
| 160 | 207 | | { |
| 160 | 208 | | hashA = ((hashA << 5) + hashA) ^ c; |
| 160 | 209 | | useAlternateHash = true; |
| 160 | 210 | | } |
| | 211 | | else |
| 152 | 212 | | { |
| 152 | 213 | | hashB = ((hashB << 5) + hashB) ^ c; |
| 152 | 214 | | useAlternateHash = false; |
| 152 | 215 | | } |
| | 216 | |
|
| | 217 | | // Check our first character |
| 312 | 218 | | bool isValid = |
| | 219 | | (c >= StringExtensions.AsciiLowerCaseStart && c <= StringExtensions.AsciiLowerCaseEnd) || |
| | 220 | | (c >= StringExtensions.AsciiNumberStart && c <= StringExtensions.AsciiNumberEnd); |
| | 221 | |
|
| | 222 | | // If we are valid, but not in a segment |
| 312 | 223 | | if (isValid && !isInsideSegment) |
| 64 | 224 | | { |
| | 225 | | // Reset hashes |
| 64 | 226 | | segmentHashA = 5381; |
| 64 | 227 | | segmentHashB = segmentHashA; |
| 64 | 228 | | useAlternateSegmentHash = false; |
| | 229 | |
|
| | 230 | | // Mark start spot |
| 64 | 231 | | returnValue.m_Segments[returnValue.m_Count].x = i; |
| | 232 | |
|
| 64 | 233 | | isInsideSegment = true; |
| 64 | 234 | | } |
| | 235 | |
|
| 312 | 236 | | if (isValid) |
| 240 | 237 | | { |
| | 238 | | // Flopping hash |
| 240 | 239 | | if (!useAlternateSegmentHash) |
| 136 | 240 | | { |
| 136 | 241 | | segmentHashA = ((segmentHashA << 5) + segmentHashA) ^ c; |
| 136 | 242 | | useAlternateSegmentHash = true; |
| 136 | 243 | | } |
| | 244 | | else |
| 104 | 245 | | { |
| 104 | 246 | | segmentHashB = ((segmentHashB << 5) + segmentHashB) ^ c; |
| 104 | 247 | | useAlternateSegmentHash = false; |
| 104 | 248 | | } |
| 240 | 249 | | } |
| | 250 | |
|
| 312 | 251 | | if (!isValid && isInsideSegment) |
| 56 | 252 | | { |
| | 253 | | // Close out this iteration of a segment |
| 56 | 254 | | isInsideSegment = false; |
| 56 | 255 | | returnValue.m_Segments[returnValue.m_Count].y = i - returnValue.m_Segments[returnValue.m_Count].x; |
| 56 | 256 | | returnValue.m_Segments[returnValue.m_Count].z = segmentHashA + segmentHashB * 1566083941; |
| 56 | 257 | | returnValue.m_Count++; |
| 56 | 258 | | } |
| 312 | 259 | | } |
| | 260 | |
|
| | 261 | | // Finish segment if we didnt before |
| 8 | 262 | | if (isInsideSegment) |
| 8 | 263 | | { |
| 8 | 264 | | returnValue.m_Segments[returnValue.m_Count].y = |
| | 265 | | charactersLength - returnValue.m_Segments[returnValue.m_Count].x; |
| 8 | 266 | | returnValue.m_Segments[returnValue.m_Count].z = segmentHashA + segmentHashB * 1566083941; |
| 8 | 267 | | returnValue.m_Count++; |
| 8 | 268 | | } |
| | 269 | |
|
| | 270 | | // Save final hash |
| 8 | 271 | | returnValue.m_HashCode = hashA + hashB * 1566083941; |
| | 272 | |
|
| 8 | 273 | | return returnValue; |
| 8 | 274 | | } |
| | 275 | | } |
| | 276 | | } |