1 /**
2     Text Encoding Handling
3     
4     Copyright:
5         Copyright © 2023-2025, Kitsunebi Games
6         Copyright © 2023-2025, Inochi2D Project
7     
8     License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9     Authors:   Luna Nielsen
10 */
11 module nulib.text.encoding;
12 import nulib.string;
13 import nulib.text.ascii;
14 import nulib.text.unicode;
15 import nulib.text.unicode.utf8;
16 import nulib.text.unicode.utf16;
17 
18 /**
19     Currently supported encodings
20 */
21 enum Encoding {
22 
23     /**
24         Unknown encoding
25     */
26     unknown,
27 
28     /**
29         ASCII
30     */
31     ascii,
32     
33     /**
34         UTF-8
35     */
36     utf8,
37 
38     /**
39         UTF-16
40     */
41     utf16,
42 
43     /**
44         UTF-16 w/ BOM
45     */
46     utf16LE,
47 
48     /**
49         UTF-16 w/ BOM
50     */
51     utf16BE,
52 
53     /**
54     
55     */
56     utf32
57 }
58 
59 /**
60     Gets the encoding of a run of text.
61 */
62 Encoding getEncoding(T)(auto ref T str) @nogc if (isSomeString!T) {
63     static if (StringCharSize!T == 1) {
64         nstring nstr = str;
65 
66         foreach(char c; str[]) {
67             if (!isASCII(c)) {
68                 if (validate(nstr))
69                     return Encoding.utf8;
70                 else
71                     return Encoding.unknown;
72             }
73         }
74         return Encoding.ascii;
75 
76     } else static if (StringCharSize!T == 2) {
77         
78         nwstring nstr = str;
79         auto bom = getBOM(nstr);
80         if (bom != 0) {
81             return bom == 0x0000FEFF ? 
82                 Encoding.utf16BE : 
83                 Encoding.utf16LE;
84         } else if (validate(nstr)) {
85 
86             return Encoding.utf16;
87         }
88         return Encoding.unknown;
89 
90     } else static if (StringCharSize!T == 4) {
91 
92         return validate(str) ? 
93             Encoding.utf32 : 
94             Encoding.unknown;
95     } else {
96 
97         return Encoding.unknown;
98     }
99 } 
100 
101 @("Get encoding")
102 unittest {
103     import std.stdio : writeln;
104 
105     assert("Hello, world!".getEncoding() == Encoding.ascii);
106     assert("あえおう".getEncoding() == Encoding.utf8);
107 
108     assert("Hello, world!"w.getEncoding() == Encoding.utf16);
109     assert("\uFEFFHello, world!"w.getEncoding() == Encoding.utf16BE);
110     assert("\uFFFEHello, world!"w.getEncoding() == Encoding.utf16LE);
111 }