1 /**
2     nogc strings
3 
4     Copyright:
5         Copyright © 2023-2025, Kitsunebi Games
6         Copyright © 2023-2025, Inochi2D Project
7     
8     License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9     Authors:   Luna Nielsen
10 */
11 module nulib..string;
12 import numem.core.hooks;
13 import numem.core.traits;
14 import numem.core.memory;
15 import numem.core.meta;
16 import numem;
17 import nulib.collections.internal.marray;
18 import nulib.text.unicode : 
19     encode, 
20     decode;
21 
22 //
23 //              STRING TRAITS
24 //
25 
26 
27 
28 
29 /// Gets whether the provided type is some type of string.
30 enum isSomeString(T) =
31     isSomeSafeString!T ||
32     isSomeCString!T;
33 
34 /**
35     Gets whether the provided type is some type of string
36     which is length denoted and therefore "safe"
37 */
38 enum isSomeSafeString(T) =
39     isSomeNString!T ||
40     isSomeDString!T;
41 
42 
43 /// Gets whether the provided type is some type of nstring.
44 enum isSomeNString(T) = 
45     is(inout(T) == inout(StringImpl!C), C) && isSomeChar!C;
46 
47 /// Gets whether the provided type is some type of null terminated C string.
48 enum isSomeCString(T) =
49     is(T == C*, C) && isSomeChar!C;
50 
51 /// Gets whether the provided type is some type of D string slice.
52 enum isSomeDString(T) =
53     (is(immutable(T) == immutable(C[]), C) && isSomeChar!C) ||
54     (is(T : C2[], C2) && isSomeChar!C2);
55 
56 /// Gets whether the provided type is a character
57 enum isSomeChar(T) =
58     is(T : char) || is(T : wchar) || is(T : dchar);
59 
60 /**
61     Gets whether $(D T) is convertible to any form of $(D nstring)
62 */
63 enum isStringable(T) = 
64     __traits(hasMember, T, "toString") &&
65     isSomeString!(ReturnType!(T.toString));
66 
67 /**
68     Gets the size of the element in a string-ish type in bytes.
69 */
70 enum StringCharSize(T) =
71     StringCharType!T.sizeof;
72 
73 /**
74     Gets the type of the element in a string-ish type.
75 */
76 template StringCharType(T) {
77     static if (isSomeString!T) {
78         static if(isSomeNString!T)
79             alias StringCharType = Unqual!(T.CharType);
80         else
81             alias StringCharType = Unqual!(typeof(T.init[0].init));
82     } else {
83         alias StringCharType = void;
84     }
85 }
86 
87 
88 //
89 //              NSTRING ALIASES
90 //
91 
92 /**
93     A @nogc UTF-8 string
94 
95     Note:
96         $(D nstring) is passed $(B by value), this effectively means
97         that if you do not pass it as $(D ref) you will end up copying
98         the contents of the string.
99     
100     See_Also:
101         $(D nwstring)
102         $(D ndstring)
103 */
104 alias nstring = StringImpl!(char);
105 
106 /**
107     A @nogc UTF-16 string
108 
109     Note:
110         $(D nwstring) is passed $(B by value), this effectively means
111         that if you do not pass it as $(D ref) you will end up copying
112         the contents of the string.
113     
114     See_Also:
115         $(D nstring)
116         $(D ndstring)
117 */
118 alias nwstring = StringImpl!(wchar);
119 
120 /**
121     A @nogc UTF-32 string
122 
123     Note:
124         $(D ndstring) is passed $(B by value), this effectively means
125         that if you do not pass it as $(D ref) you will end up copying
126         the contents of the string.
127     
128     See_Also:
129         $(D nstring)
130         $(D nwstring)
131 */
132 alias ndstring = StringImpl!(dchar);
133 
134 
135 //
136 //          STRING IMPLEMENTATION.
137 //
138 
139 
140 
141 /**
142     The underlying implementation of a numem string.
143 */
144 struct StringImpl(T) if (isSomeChar!T) {
145 @nogc:
146 private:
147     alias SelfType = typeof(this);
148     alias MemoryT = immutable(T)[];
149     
150     // Backing slice of the string.
151     immutable(T)[] memory = null;
152     size_t flags;
153 
154     /**
155         Flag which indicates that the string is read-only.
156     */
157     enum size_t STRFLAG_READONLY = 0x01;
158 
159     // Resizing algorithm
160     pragma(inline, true)
161     void resizeImpl(size_t newLength) @trusted {
162         if (flags & STRFLAG_READONLY)
163             this.takeOwnershipImpl();
164 
165         if (newLength == 0) {
166             if (memory.ptr !is null)
167                 memory.nu_resize(0);
168             
169             nogc_zeroinit(memory);
170             return;
171         }
172 
173         // NOTE: nu_terminatd re-allocates the slice twice,
174         // As such we put a smaller implementation here.
175         memory = memory.nu_resize(newLength+1);
176         (cast(T*)memory.ptr)[newLength] = '\0';
177         memory = memory[0..$-1];
178     }
179 
180     // Range set algorithm
181     pragma(inline, true)
182     void setRangeImpl(inout(T)[] dst, inout(T)[] src) {
183         if (memory)
184             nu_memmove(cast(void*)dst.ptr, cast(void*)src.ptr, src.length*T.sizeof);
185     }
186 
187     // Char set algorithm
188     pragma(inline, true)
189     void setCharImpl(void* at, T c) {
190         if (memory)
191             *(cast(T*)at) = c;
192     }
193 
194     // Takes ownership of string.
195     pragma(inline, true)
196     void takeOwnershipImpl() {
197         if (flags & STRFLAG_READONLY) {
198             this.memory = memory.nu_idup();
199             this.flags &= ~STRFLAG_READONLY;
200         }
201     }
202 
203     // Resets the string.
204     pragma(inline, true)
205     void resetImpl() {
206         this.memory = null;
207         this.flags = 0;
208     }
209 
210     // Makes a copy of the given string transformed
211     // to fit the encoding of this string.
212     pragma(inline, true)
213     MemoryT otherToSelf(U)(auto ref U in_) 
214     if(isSomeString!U) {
215         static if (is(StringCharType!SelfType == StringCharType!U)) {
216             auto val = in_.sliceof.nu_dup();
217             return val.nu_terminate();
218         } else {
219 
220             // Otherwise we need to do unicode conversion.
221             auto dec = decode(in_.sliceof, false);
222             auto enc = encode!SelfType(dec, false);
223             return enc.take();
224         }
225     }
226 
227 public:
228     alias value this;
229 
230     /**
231         The type of character the string contains.
232     */
233     alias CharType = T;
234 
235     /**
236         The length of the string.
237     */
238     @property size_t length() @safe nothrow { return memory.length; }
239 
240     /**
241         Gets the length of the string, with the null terminator.
242     */
243     @property size_t realLength() @safe nothrow { return memory.ptr ? memory.length+1 : 0; }
244     
245     /**
246         The length of the string, in bytes.
247     */
248     @property size_t usage() @safe nothrow { return memory.length*T.sizeof; }
249 
250     /**
251         Whether the string is empty.
252     */
253     @property bool empty() @safe nothrow { return memory.length == 0; }
254 
255     /**
256         Gets a C string pointer to the nstring.
257     */
258     @property const(T)* ptr() @system nothrow { return cast(const(T)*)memory.ptr; }
259 
260     /**
261         Gets the internal memory slice.
262     */
263     @property immutable(T)[] value() inout @trusted nothrow pure { return memory; }
264 
265     // Aliases for legacy purposes.
266     alias toDString = value;
267     alias toCString = ptr;
268 
269     ~this() {
270         this.resizeImpl(0);
271     }
272 
273     /**
274         Creates a string from a string from any other UTF encoding.
275     */
276     this(U)(auto ref U rhs) @system
277     if (isSomeString!U) {
278         if (__ctfe) {
279             static if (is(StringCharType!U == StringCharType!SelfType)) {
280                 this.flags |= STRFLAG_READONLY;
281                 this.memory = cast(MemoryT)rhs.sliceof;
282             }
283         } else {
284             if (rhs) {
285                 static if (is(StringCharType!U == StringCharType!SelfType)) {
286                     this.memory = cast(MemoryT)rhs.sliceof.nu_dup();
287                     nu_terminate(memory);
288                 } else {
289                     auto val = otherToSelf(rhs.sliceof);
290                     this.memory = val;
291                 }
292             } else {
293                 nogc_zeroinit(this.memory);
294             }
295         }
296     }
297 
298     this(Args...)(auto ref Args args) @system
299     if(allSatisfy!(.isSomeString, Args)) {
300         if (__ctfe) {
301             this.flags |= STRFLAG_READONLY;
302             foreach(arg; args)
303                 this.memory ~= cast(MemoryT)arg.sliceof;
304             this.memory ~= "\0";
305             
306         } else {
307 
308             // Get combined length of strings.
309             size_t sz = 0;
310             static foreach(arg; args) {
311                 sz += arg.sliceof.length;
312             }
313 
314             // Allocate new string
315             char[] buffer = nu_malloca!T(sz);
316             size_t i; // Write offset
317             MemoryT tmp;
318             static foreach(arg; args) {
319                 tmp = otherToSelf(arg.sliceof);
320                 buffer[i..i+tmp.length] = tmp[0..$];
321                 i += tmp.length;
322                 nu_freea(tmp);
323             }
324             
325             // Teminate result.
326             this.memory = cast(MemoryT)buffer;
327             nu_terminate(memory);
328         }
329     }
330 
331     /**
332         Copy-constructor
333     */
334     this(ref return scope inout(SelfType) rhs) inout @trusted {
335         if (__ctfe) {
336             this.flags |= STRFLAG_READONLY;
337             this.memory = rhs.memory;
338         } else if (rhs) {
339             this.memory = rhs.memory.nu_idup;
340         } else {
341             nogc_zeroinit(cast(T[])this.memory);
342         }
343     }
344 
345     /**
346         Constructs a string with the given size.
347         The contents of the string will be zero-initialized.
348     */
349     this(uint size) {
350         if (__ctfe) { } else {
351             this.resize(size);
352             nogc_zeroinit(this.memory);
353         }
354     }
355 
356     /**
357         Move "constructor"
358     */
359     void opPostMove(ref typeof(this) other) {
360         this.memory = other.memory;
361         nogc_zeroinit(other.memory);
362     }
363 
364     /**
365         Clears the string, equivalent to resizing it to 0.
366     */
367     void clear() {
368         this.resizeImpl(0);
369     }
370 
371     /**
372         Flips the endianness of the string's contents.
373 
374         Note:
375             This is no-op for UTF-8 strings.
376 
377         Returns:
378             The string instance.
379     */
380     auto ref flipEndian() {
381         static if (CharType.sizeof > 1) {
382 
383             import nulib.memory.endian : nu_etoh, ALT_ENDIAN;
384             cast(void)nu_etoh!(CharType, ALT_ENDIAN)(cast(CharType[])memory[0..$]);
385         }
386 
387         return this;
388     }
389 
390     /**
391         Reverses the contents of the string
392 
393         Returns:
394             The string instance.
395     */
396     auto ref reverse() {
397         auto mmemory = cast(CharType[])memory;
398         foreach(i; 0..memory.length/2) {
399             auto a = memory[i];
400             auto b = memory[$-(i+1)];
401 
402             mmemory[i] = b;
403             mmemory[$-(i+1)] = a;
404         }
405 
406         return this;
407     }
408 
409     /**
410         Take ownership of the memory owned by the string.
411 
412         If the string is tagged as read-only a copy of the string
413         is returned.
414 
415         Returns:
416             The memory which was owned by the nulib string,
417             the nulib string is reset in the process.
418     */
419     immutable(T)[] take() {
420         this.takeOwnershipImpl();
421         
422         auto mem = this.memory;
423         this.resetImpl();
424         return mem;
425     }
426 
427     /**
428         Resizes the string.
429 
430         Params:
431             newLength = The amount of characters the string should be.
432 
433         Note:
434             The contents when increasing the size of a string with this
435             function is undefined. A null terminator will be appended 
436             automatically.
437     */
438     void resize(size_t newLength) {
439         this.resizeImpl(newLength);
440     }
441 
442     /**
443         Sets the value of the string.
444 
445         Params:
446             other = The string to set this string to.
447 
448         Notes:
449             This function will directly replace the internal store,
450             as such, you are responsible for freeing prior memory
451             where relevant.
452     */
453     void opAssign(U)(U other) @trusted
454     if (isSomeString!U) {
455         static if (is(StringCharType!U == StringCharType!SelfType)) {
456             if (!(flags & STRFLAG_READONLY) && memory.ptr) 
457                 nu_free(cast(void*)this.memory.ptr);
458             
459             this.memory = other.sliceof.nu_dup();
460             nu_terminate(memory);
461 
462             // Take ownership of our new memory.
463             if (flags & STRFLAG_READONLY) 
464                 flags &= ~STRFLAG_READONLY;
465         } else {
466             auto val = otherToSelf(other.sliceof);
467             this.memory = val;
468         }
469     }
470 
471     /**
472         Appends a character to this string.
473     */
474     void opOpAssign(string op, U)(auto ref inout(U) value) @trusted
475     if (op == "~" && isSomeChar!U) {
476         
477         // Don't insert null terminators.
478         if (value == cast(U)0)
479             return;
480         
481         this.resizeImpl(length+1);
482         this.setCharImpl(cast(void*)(&memory[$-1]), value);
483     }
484 
485     /**
486         Appends a string to this string.
487     */
488     void opOpAssign(string op, U)(auto ref U other) @trusted
489     if (op == "~" && isSomeString!U) {
490         
491         // Skip appending empty strings.
492         if (other.sliceof.length == 0)
493             return;
494 
495         size_t start = memory.length;
496         static if (!is(StringCharType!U == StringCharType!SelfType)) {
497 
498             // We want the null terminator, so use this ugly pointer
499             // arithmetic. We know enc will always have it anyways.
500             auto otherSlice = otherToSelf(other);
501             
502             this.resizeImpl(memory.length+otherSlice.length);
503             this.setRangeImpl(memory[start..$], otherSlice[0..$]);
504 
505             nu_freea(otherSlice);
506         } else {
507             auto otherSlice = other.sliceof;
508             if (isOverlapping(memory, otherSlice)) {
509                 auto tmp = otherSlice.nu_dup();
510 
511                 this.resizeImpl(memory.length+tmp.length);
512                 this.setRangeImpl(memory[start..$], tmp[0..$]);
513                 tmp = tmp.nu_resize(0);
514                 return;
515             }
516 
517             this.resizeImpl(memory.length+otherSlice.length);
518             this.setRangeImpl(memory[start..$], otherSlice[0..$]);
519         }
520     }
521 
522     /**
523         Makes a nstring appended to this string.
524     */
525     auto opBinary(string op, R)(auto ref inout R rhs) inout
526     if (op == "~") {
527 
528         // We can't be sure that the given strings won't be freed if we
529         // passed them in to opOpAssign, as such we copy them.
530         auto lhsv = this.sliceof.nu_dup();
531         auto rhsv = rhs.sliceof.nu_dup();
532 
533         SelfType result;
534         result ~= lhsv;
535         result ~= rhsv;
536 
537         nu_freea(lhsv);
538         nu_freea(rhsv);
539         return result;
540     }
541 
542     /**
543         Makes a nstring prepended to this string.
544     */
545     auto opBinaryRight(string op, R)(auto ref inout R lhs) inout
546     if (op == "~") {
547 
548         // We can't be sure that the given strings won't be freed if we
549         // passed them in to opOpAssign, as such we copy them.
550         auto lhsv = lhs.sliceof.nu_dup();
551         auto rhsv = this.sliceof.nu_dup();
552 
553         SelfType result;
554         result ~= lhsv;
555         result ~= rhsv;
556 
557         nu_freea(lhsv);
558         nu_freea(rhsv);
559         return result;
560     }
561 }
562 
563 @("nstring: char append")
564 unittest {
565     // appending a char
566     nstring s;
567     nwstring ws;
568     ndstring ds;
569     s  ~= 'c';
570     ws ~= '\u4567';
571     ds ~= '\U0000ABCD';
572     assert(s == "c" 
573        && ws == "\u4567"w 
574        && ds == "\U0000ABCD"d);
575 
576     // Not working yet: append to itself
577     s ~= s;
578     assert(s == "cc");
579 }
580 
581 @("nstring: append")
582 unittest {
583     const(char)* cstr1 = "a zero-terminated string";
584     const(wchar)* cstr2 = "hey";
585     const(dchar)* cstr3 = "ho";
586 
587     nstring s;
588     s ~= cast(string)null;
589     s ~= "";
590     s ~= cstr1;
591     assert(s == "a zero-terminated string");
592 
593     nwstring ws;
594     ws ~= cstr2;
595     assert(ws.length == 3);
596 
597     ndstring wd;
598     wd ~= cstr3;
599     assert(wd == "ho"d);
600 }
601 
602 @("nstring: concat")
603 unittest {
604     auto str1 = nstring("Hello, ") ~ "world!";
605     assert(str1 == "Hello, world!", str1.sliceof);
606 
607     auto str2 = nstring("Hello, ", "world", " 2!"w);
608     assert(str2 == "Hello, world 2!");
609 }
610 
611 @("nstring: concat convert")
612 unittest {
613     import std.utf : toUTF8;
614 
615     auto str1 = nstring("Hello, ") ~ nwstring("world!"w);
616     assert(str1 == "Hello, world!", str1.sliceof);
617 
618     auto str2 = ndstring("Hello, ") ~ nstring("world!");
619     assert(str2 == "Hello, world!"d, str2.sliceof.toUTF8);
620 
621     auto str3 = nstring("Hello, ") ~ "world!"w;
622     assert(str3 == "Hello, world!", str3.sliceof.toUTF8);
623 }
624 
625 @("nstring: reverse")
626 unittest {
627     nstring str = "Test";
628     assert(str.reverse() == "tseT");
629 }
630 
631 @("nstring: flipEndian")
632 unittest {
633     nwstring str = "Test"w;
634     assert(str.flipEndian() == "\u5400\u6500\u7300\u7400"); // "Test" UTF-16 code points, but endian flipped.
635 }
636 
637 @("nstring: string in map")
638 unittest {
639     import nulib.collections.map : map;
640     map!(nstring, int) kv;
641     kv[nstring("uwu")] = 42;
642 
643     assert(kv[nstring("uwu")] == 42);
644 }
645 
646 @("nstring: length")
647 unittest {
648     nstring str = "Test string";
649     assert(str.usage() == 11);
650     assert(str.length() == 11);
651     assert(str.realLength() == 12);
652 }
653 
654 @("nstring: emptiness")
655 unittest {
656     nstring str;
657 
658     assert(str.empty());
659 
660     // Should add null terminator.
661     str.clear();
662     assert(str.empty);
663     assert(str.ptr is null);
664 }
665 
666 //
667 //      C and D string handling utilities
668 //
669 
670 /**
671     Gets the slice equivalent of the input string.
672 */
673 auto sliceof(T)(auto ref T str) @nogc nothrow
674 if(isSomeString!T) {
675     if (__ctfe) {
676         return str[0..(str.stringof.length)];
677     } else {
678         static if (isSomeCString!T) {
679             return str[0..nu_strlen(str)];
680         } else {
681             return str[0..$];
682         }
683     }
684 }
685 
686 @("sliceof")
687 unittest {
688     const(char)* str1 = "Hello, world!";
689     const(char)[] str2 = "Hello, world!";
690     nstring str3 = "Hello, world!";
691     
692     assert(str1.sliceof == str1.sliceof);
693     assert(str3.sliceof == str2.sliceof, str3.sliceof);
694 }
695 
696 /**
697     Gets a slice from a null-terminated string.
698 
699     Params:
700         str = the null terminated string to slice.
701 
702     Returns:
703         A new slice over the string, stopping before the null terminator.
704         If $(D str) is not null terminated the return value
705         is undefined and likely corrupted.
706 */
707 inout(T)[] fromStringz(T)(inout(T)* str) @system @nogc pure nothrow
708 if (isSomeChar!T) {
709     return str ? str[0 .. nu_strlen!T(str)] : null;
710 }
711 
712 /**
713     Gets the length of a null-terminated string.
714 
715     Params:
716         str = the string to check the length of.
717     
718     Returns:
719         The length of the string in code units.
720         If $(D str) is not null terminated the return value
721         is undefined.
722 */
723 size_t nu_strlen(T)(inout(T)* str) @system @nogc pure nothrow
724 if (isSomeChar!T) {
725     const(T)* p = str;
726     while (*p)
727         ++p;
728     
729     return p - str;
730 }