1 /** 2 nogc strings 3 4 Copyright: 5 Copyright © 2023-2025, Kitsunebi Games 6 Copyright © 2023-2025, Inochi2D Project 7 8 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 Authors: Luna Nielsen 10 */ 11 module nulib..string; 12 import numem.core.hooks; 13 import numem.core.traits; 14 import numem.core.memory; 15 import numem.core.meta; 16 import numem; 17 import nulib.collections.internal.marray; 18 import nulib.text.unicode : 19 encode, 20 decode; 21 22 // 23 // STRING TRAITS 24 // 25 26 27 28 29 /// Gets whether the provided type is some type of string. 30 enum isSomeString(T) = 31 isSomeSafeString!T || 32 isSomeCString!T; 33 34 /** 35 Gets whether the provided type is some type of string 36 which is length denoted and therefore "safe" 37 */ 38 enum isSomeSafeString(T) = 39 isSomeNString!T || 40 isSomeDString!T; 41 42 43 /// Gets whether the provided type is some type of nstring. 44 enum isSomeNString(T) = 45 is(inout(T) == inout(StringImpl!C), C) && isSomeChar!C; 46 47 /// Gets whether the provided type is some type of null terminated C string. 48 enum isSomeCString(T) = 49 is(T == C*, C) && isSomeChar!C; 50 51 /// Gets whether the provided type is some type of D string slice. 52 enum isSomeDString(T) = 53 (is(immutable(T) == immutable(C[]), C) && isSomeChar!C) || 54 (is(T : C2[], C2) && isSomeChar!C2); 55 56 /// Gets whether the provided type is a character 57 enum isSomeChar(T) = 58 is(T : char) || is(T : wchar) || is(T : dchar); 59 60 /** 61 Gets whether $(D T) is convertible to any form of $(D nstring) 62 */ 63 enum isStringable(T) = 64 __traits(hasMember, T, "toString") && 65 isSomeString!(ReturnType!(T.toString)); 66 67 /** 68 Gets the size of the element in a string-ish type in bytes. 69 */ 70 enum StringCharSize(T) = 71 StringCharType!T.sizeof; 72 73 /** 74 Gets the type of the element in a string-ish type. 75 */ 76 template StringCharType(T) { 77 static if (isSomeString!T) { 78 static if(isSomeNString!T) 79 alias StringCharType = Unqual!(T.CharType); 80 else 81 alias StringCharType = Unqual!(typeof(T.init[0].init)); 82 } else { 83 alias StringCharType = void; 84 } 85 } 86 87 88 // 89 // NSTRING ALIASES 90 // 91 92 /** 93 A @nogc UTF-8 string 94 95 Note: 96 $(D nstring) is passed $(B by value), this effectively means 97 that if you do not pass it as $(D ref) you will end up copying 98 the contents of the string. 99 100 See_Also: 101 $(D nwstring) 102 $(D ndstring) 103 */ 104 alias nstring = StringImpl!(char); 105 106 /** 107 A @nogc UTF-16 string 108 109 Note: 110 $(D nwstring) is passed $(B by value), this effectively means 111 that if you do not pass it as $(D ref) you will end up copying 112 the contents of the string. 113 114 See_Also: 115 $(D nstring) 116 $(D ndstring) 117 */ 118 alias nwstring = StringImpl!(wchar); 119 120 /** 121 A @nogc UTF-32 string 122 123 Note: 124 $(D ndstring) is passed $(B by value), this effectively means 125 that if you do not pass it as $(D ref) you will end up copying 126 the contents of the string. 127 128 See_Also: 129 $(D nstring) 130 $(D nwstring) 131 */ 132 alias ndstring = StringImpl!(dchar); 133 134 135 // 136 // STRING IMPLEMENTATION. 137 // 138 139 140 141 /** 142 The underlying implementation of a numem string. 143 */ 144 struct StringImpl(T) if (isSomeChar!T) { 145 @nogc: 146 private: 147 alias SelfType = typeof(this); 148 alias MemoryT = immutable(T)[]; 149 150 // Backing slice of the string. 151 immutable(T)[] memory = null; 152 size_t flags; 153 154 /** 155 Flag which indicates that the string is read-only. 156 */ 157 enum size_t STRFLAG_READONLY = 0x01; 158 159 // Resizing algorithm 160 pragma(inline, true) 161 void resizeImpl(size_t newLength) @trusted { 162 if (flags & STRFLAG_READONLY) 163 this.takeOwnershipImpl(); 164 165 if (newLength == 0) { 166 if (memory.ptr !is null) 167 memory.nu_resize(0); 168 169 nogc_zeroinit(memory); 170 return; 171 } 172 173 // NOTE: nu_terminatd re-allocates the slice twice, 174 // As such we put a smaller implementation here. 175 memory = memory.nu_resize(newLength+1); 176 (cast(T*)memory.ptr)[newLength] = '\0'; 177 memory = memory[0..$-1]; 178 } 179 180 // Range set algorithm 181 pragma(inline, true) 182 void setRangeImpl(inout(T)[] dst, inout(T)[] src) { 183 if (memory) 184 nu_memmove(cast(void*)dst.ptr, cast(void*)src.ptr, src.length*T.sizeof); 185 } 186 187 // Char set algorithm 188 pragma(inline, true) 189 void setCharImpl(void* at, T c) { 190 if (memory) 191 *(cast(T*)at) = c; 192 } 193 194 // Takes ownership of string. 195 pragma(inline, true) 196 void takeOwnershipImpl() { 197 if (flags & STRFLAG_READONLY) { 198 this.memory = memory.nu_idup(); 199 this.flags &= ~STRFLAG_READONLY; 200 } 201 } 202 203 // Resets the string. 204 pragma(inline, true) 205 void resetImpl() { 206 this.memory = null; 207 this.flags = 0; 208 } 209 210 // Makes a copy of the given string transformed 211 // to fit the encoding of this string. 212 pragma(inline, true) 213 MemoryT otherToSelf(U)(auto ref U in_) 214 if(isSomeString!U) { 215 static if (is(StringCharType!SelfType == StringCharType!U)) { 216 auto val = in_.sliceof.nu_dup(); 217 return val.nu_terminate(); 218 } else { 219 220 // Otherwise we need to do unicode conversion. 221 auto dec = decode(in_.sliceof, false); 222 auto enc = encode!SelfType(dec, false); 223 return enc.take(); 224 } 225 } 226 227 public: 228 alias value this; 229 230 /** 231 The type of character the string contains. 232 */ 233 alias CharType = T; 234 235 /** 236 The length of the string. 237 */ 238 @property size_t length() @safe nothrow { return memory.length; } 239 240 /** 241 Gets the length of the string, with the null terminator. 242 */ 243 @property size_t realLength() @safe nothrow { return memory.ptr ? memory.length+1 : 0; } 244 245 /** 246 The length of the string, in bytes. 247 */ 248 @property size_t usage() @safe nothrow { return memory.length*T.sizeof; } 249 250 /** 251 Whether the string is empty. 252 */ 253 @property bool empty() @safe nothrow { return memory.length == 0; } 254 255 /** 256 Gets a C string pointer to the nstring. 257 */ 258 @property const(T)* ptr() @system nothrow { return cast(const(T)*)memory.ptr; } 259 260 /** 261 Gets the internal memory slice. 262 */ 263 @property immutable(T)[] value() inout @trusted nothrow pure { return memory; } 264 265 // Aliases for legacy purposes. 266 alias toDString = value; 267 alias toCString = ptr; 268 269 ~this() { 270 this.resizeImpl(0); 271 } 272 273 /** 274 Creates a string from a string from any other UTF encoding. 275 */ 276 this(U)(auto ref U rhs) @system 277 if (isSomeString!U) { 278 if (__ctfe) { 279 static if (is(StringCharType!U == StringCharType!SelfType)) { 280 this.flags |= STRFLAG_READONLY; 281 this.memory = cast(MemoryT)rhs.sliceof; 282 } 283 } else { 284 if (rhs) { 285 static if (is(StringCharType!U == StringCharType!SelfType)) { 286 this.memory = cast(MemoryT)rhs.sliceof.nu_dup(); 287 nu_terminate(memory); 288 } else { 289 auto val = otherToSelf(rhs.sliceof); 290 this.memory = val; 291 } 292 } else { 293 nogc_zeroinit(this.memory); 294 } 295 } 296 } 297 298 this(Args...)(auto ref Args args) @system 299 if(allSatisfy!(.isSomeString, Args)) { 300 if (__ctfe) { 301 this.flags |= STRFLAG_READONLY; 302 foreach(arg; args) 303 this.memory ~= cast(MemoryT)arg.sliceof; 304 this.memory ~= "\0"; 305 306 } else { 307 308 // Get combined length of strings. 309 size_t sz = 0; 310 static foreach(arg; args) { 311 sz += arg.sliceof.length; 312 } 313 314 // Allocate new string 315 char[] buffer = nu_malloca!T(sz); 316 size_t i; // Write offset 317 MemoryT tmp; 318 static foreach(arg; args) { 319 tmp = otherToSelf(arg.sliceof); 320 buffer[i..i+tmp.length] = tmp[0..$]; 321 i += tmp.length; 322 nu_freea(tmp); 323 } 324 325 // Teminate result. 326 this.memory = cast(MemoryT)buffer; 327 nu_terminate(memory); 328 } 329 } 330 331 /** 332 Copy-constructor 333 */ 334 this(ref return scope inout(SelfType) rhs) inout @trusted { 335 if (__ctfe) { 336 this.flags |= STRFLAG_READONLY; 337 this.memory = rhs.memory; 338 } else if (rhs) { 339 this.memory = rhs.memory.nu_idup; 340 } else { 341 nogc_zeroinit(cast(T[])this.memory); 342 } 343 } 344 345 /** 346 Constructs a string with the given size. 347 The contents of the string will be zero-initialized. 348 */ 349 this(uint size) { 350 if (__ctfe) { } else { 351 this.resize(size); 352 nogc_zeroinit(this.memory); 353 } 354 } 355 356 /** 357 Move "constructor" 358 */ 359 void opPostMove(ref typeof(this) other) { 360 this.memory = other.memory; 361 nogc_zeroinit(other.memory); 362 } 363 364 /** 365 Clears the string, equivalent to resizing it to 0. 366 */ 367 void clear() { 368 this.resizeImpl(0); 369 } 370 371 /** 372 Flips the endianness of the string's contents. 373 374 Note: 375 This is no-op for UTF-8 strings. 376 377 Returns: 378 The string instance. 379 */ 380 auto ref flipEndian() { 381 static if (CharType.sizeof > 1) { 382 383 import nulib.memory.endian : nu_etoh, ALT_ENDIAN; 384 cast(void)nu_etoh!(CharType, ALT_ENDIAN)(cast(CharType[])memory[0..$]); 385 } 386 387 return this; 388 } 389 390 /** 391 Reverses the contents of the string 392 393 Returns: 394 The string instance. 395 */ 396 auto ref reverse() { 397 auto mmemory = cast(CharType[])memory; 398 foreach(i; 0..memory.length/2) { 399 auto a = memory[i]; 400 auto b = memory[$-(i+1)]; 401 402 mmemory[i] = b; 403 mmemory[$-(i+1)] = a; 404 } 405 406 return this; 407 } 408 409 /** 410 Take ownership of the memory owned by the string. 411 412 If the string is tagged as read-only a copy of the string 413 is returned. 414 415 Returns: 416 The memory which was owned by the nulib string, 417 the nulib string is reset in the process. 418 */ 419 immutable(T)[] take() { 420 this.takeOwnershipImpl(); 421 422 auto mem = this.memory; 423 this.resetImpl(); 424 return mem; 425 } 426 427 /** 428 Resizes the string. 429 430 Params: 431 newLength = The amount of characters the string should be. 432 433 Note: 434 The contents when increasing the size of a string with this 435 function is undefined. A null terminator will be appended 436 automatically. 437 */ 438 void resize(size_t newLength) { 439 this.resizeImpl(newLength); 440 } 441 442 /** 443 Sets the value of the string. 444 445 Params: 446 other = The string to set this string to. 447 448 Notes: 449 This function will directly replace the internal store, 450 as such, you are responsible for freeing prior memory 451 where relevant. 452 */ 453 void opAssign(U)(U other) @trusted 454 if (isSomeString!U) { 455 static if (is(StringCharType!U == StringCharType!SelfType)) { 456 if (!(flags & STRFLAG_READONLY) && memory.ptr) 457 nu_free(cast(void*)this.memory.ptr); 458 459 this.memory = other.sliceof.nu_dup(); 460 nu_terminate(memory); 461 462 // Take ownership of our new memory. 463 if (flags & STRFLAG_READONLY) 464 flags &= ~STRFLAG_READONLY; 465 } else { 466 auto val = otherToSelf(other.sliceof); 467 this.memory = val; 468 } 469 } 470 471 /** 472 Appends a character to this string. 473 */ 474 void opOpAssign(string op, U)(auto ref inout(U) value) @trusted 475 if (op == "~" && isSomeChar!U) { 476 477 // Don't insert null terminators. 478 if (value == cast(U)0) 479 return; 480 481 this.resizeImpl(length+1); 482 this.setCharImpl(cast(void*)(&memory[$-1]), value); 483 } 484 485 /** 486 Appends a string to this string. 487 */ 488 void opOpAssign(string op, U)(auto ref U other) @trusted 489 if (op == "~" && isSomeString!U) { 490 491 // Skip appending empty strings. 492 if (other.sliceof.length == 0) 493 return; 494 495 size_t start = memory.length; 496 static if (!is(StringCharType!U == StringCharType!SelfType)) { 497 498 // We want the null terminator, so use this ugly pointer 499 // arithmetic. We know enc will always have it anyways. 500 auto otherSlice = otherToSelf(other); 501 502 this.resizeImpl(memory.length+otherSlice.length); 503 this.setRangeImpl(memory[start..$], otherSlice[0..$]); 504 505 nu_freea(otherSlice); 506 } else { 507 auto otherSlice = other.sliceof; 508 if (isOverlapping(memory, otherSlice)) { 509 auto tmp = otherSlice.nu_dup(); 510 511 this.resizeImpl(memory.length+tmp.length); 512 this.setRangeImpl(memory[start..$], tmp[0..$]); 513 tmp = tmp.nu_resize(0); 514 return; 515 } 516 517 this.resizeImpl(memory.length+otherSlice.length); 518 this.setRangeImpl(memory[start..$], otherSlice[0..$]); 519 } 520 } 521 522 /** 523 Makes a nstring appended to this string. 524 */ 525 auto opBinary(string op, R)(auto ref inout R rhs) inout 526 if (op == "~") { 527 528 // We can't be sure that the given strings won't be freed if we 529 // passed them in to opOpAssign, as such we copy them. 530 auto lhsv = this.sliceof.nu_dup(); 531 auto rhsv = rhs.sliceof.nu_dup(); 532 533 SelfType result; 534 result ~= lhsv; 535 result ~= rhsv; 536 537 nu_freea(lhsv); 538 nu_freea(rhsv); 539 return result; 540 } 541 542 /** 543 Makes a nstring prepended to this string. 544 */ 545 auto opBinaryRight(string op, R)(auto ref inout R lhs) inout 546 if (op == "~") { 547 548 // We can't be sure that the given strings won't be freed if we 549 // passed them in to opOpAssign, as such we copy them. 550 auto lhsv = lhs.sliceof.nu_dup(); 551 auto rhsv = this.sliceof.nu_dup(); 552 553 SelfType result; 554 result ~= lhsv; 555 result ~= rhsv; 556 557 nu_freea(lhsv); 558 nu_freea(rhsv); 559 return result; 560 } 561 } 562 563 @("nstring: char append") 564 unittest { 565 // appending a char 566 nstring s; 567 nwstring ws; 568 ndstring ds; 569 s ~= 'c'; 570 ws ~= '\u4567'; 571 ds ~= '\U0000ABCD'; 572 assert(s == "c" 573 && ws == "\u4567"w 574 && ds == "\U0000ABCD"d); 575 576 // Not working yet: append to itself 577 s ~= s; 578 assert(s == "cc"); 579 } 580 581 @("nstring: append") 582 unittest { 583 const(char)* cstr1 = "a zero-terminated string"; 584 const(wchar)* cstr2 = "hey"; 585 const(dchar)* cstr3 = "ho"; 586 587 nstring s; 588 s ~= cast(string)null; 589 s ~= ""; 590 s ~= cstr1; 591 assert(s == "a zero-terminated string"); 592 593 nwstring ws; 594 ws ~= cstr2; 595 assert(ws.length == 3); 596 597 ndstring wd; 598 wd ~= cstr3; 599 assert(wd == "ho"d); 600 } 601 602 @("nstring: concat") 603 unittest { 604 auto str1 = nstring("Hello, ") ~ "world!"; 605 assert(str1 == "Hello, world!", str1.sliceof); 606 607 auto str2 = nstring("Hello, ", "world", " 2!"w); 608 assert(str2 == "Hello, world 2!"); 609 } 610 611 @("nstring: concat convert") 612 unittest { 613 import std.utf : toUTF8; 614 615 auto str1 = nstring("Hello, ") ~ nwstring("world!"w); 616 assert(str1 == "Hello, world!", str1.sliceof); 617 618 auto str2 = ndstring("Hello, ") ~ nstring("world!"); 619 assert(str2 == "Hello, world!"d, str2.sliceof.toUTF8); 620 621 auto str3 = nstring("Hello, ") ~ "world!"w; 622 assert(str3 == "Hello, world!", str3.sliceof.toUTF8); 623 } 624 625 @("nstring: reverse") 626 unittest { 627 nstring str = "Test"; 628 assert(str.reverse() == "tseT"); 629 } 630 631 @("nstring: flipEndian") 632 unittest { 633 nwstring str = "Test"w; 634 assert(str.flipEndian() == "\u5400\u6500\u7300\u7400"); // "Test" UTF-16 code points, but endian flipped. 635 } 636 637 @("nstring: string in map") 638 unittest { 639 import nulib.collections.map : map; 640 map!(nstring, int) kv; 641 kv[nstring("uwu")] = 42; 642 643 assert(kv[nstring("uwu")] == 42); 644 } 645 646 @("nstring: length") 647 unittest { 648 nstring str = "Test string"; 649 assert(str.usage() == 11); 650 assert(str.length() == 11); 651 assert(str.realLength() == 12); 652 } 653 654 @("nstring: emptiness") 655 unittest { 656 nstring str; 657 658 assert(str.empty()); 659 660 // Should add null terminator. 661 str.clear(); 662 assert(str.empty); 663 assert(str.ptr is null); 664 } 665 666 // 667 // C and D string handling utilities 668 // 669 670 /** 671 Gets the slice equivalent of the input string. 672 */ 673 auto sliceof(T)(auto ref T str) @nogc nothrow 674 if(isSomeString!T) { 675 if (__ctfe) { 676 return str[0..(str.stringof.length)]; 677 } else { 678 static if (isSomeCString!T) { 679 return str[0..nu_strlen(str)]; 680 } else { 681 return str[0..$]; 682 } 683 } 684 } 685 686 @("sliceof") 687 unittest { 688 const(char)* str1 = "Hello, world!"; 689 const(char)[] str2 = "Hello, world!"; 690 nstring str3 = "Hello, world!"; 691 692 assert(str1.sliceof == str1.sliceof); 693 assert(str3.sliceof == str2.sliceof, str3.sliceof); 694 } 695 696 /** 697 Gets a slice from a null-terminated string. 698 699 Params: 700 str = the null terminated string to slice. 701 702 Returns: 703 A new slice over the string, stopping before the null terminator. 704 If $(D str) is not null terminated the return value 705 is undefined and likely corrupted. 706 */ 707 inout(T)[] fromStringz(T)(inout(T)* str) @system @nogc pure nothrow 708 if (isSomeChar!T) { 709 return str ? str[0 .. nu_strlen!T(str)] : null; 710 } 711 712 /** 713 Gets the length of a null-terminated string. 714 715 Params: 716 str = the string to check the length of. 717 718 Returns: 719 The length of the string in code units. 720 If $(D str) is not null terminated the return value 721 is undefined. 722 */ 723 size_t nu_strlen(T)(inout(T)* str) @system @nogc pure nothrow 724 if (isSomeChar!T) { 725 const(T)* p = str; 726 while (*p) 727 ++p; 728 729 return p - str; 730 }