1 module keyvalues.parser;
2 
3 import std.algorithm;
4 import std.array;
5 import std.string;
6 import std.uni;
7 
8 import keyvalues.keyvalue;
9 import keyvalues.stack;
10 
11 /++
12     Parse a string of KeyValues text into a KeyValue object.
13 +/
14 KeyValue parseKeyValues(string text)
15 {
16     return text.lex.parse;
17 }
18 
19 private:
20 
21 enum TokenType
22 {
23     str,
24     objectStart,
25     objectEnd
26 }
27 
28 struct Position
29 {
30     uint line = 1;
31     uint column = 1;
32     
33     string toString()
34     {
35         return "line %d col %d".format(line, column);
36     }
37 }
38 
39 struct Token
40 {
41     TokenType type;
42     string value;
43     Position position;
44     
45     //For testing, ignores Position
46     bool opEquals(typeof(this) other)
47     {
48         return other.type == type && other.value == value;
49     }
50 }
51 
52 struct PositionTracker
53 {
54     private string data;
55     private Position _position;
56     
57     alias data this; //forward all other range primitives
58     
59     void popFront()
60     {
61         data.popFront;
62         
63         _position.column++;
64         
65         if(data.empty)
66             return;
67         
68         if(data.front == '\n')
69         {
70             _position.line++;
71             _position.column = 1;
72         }
73     }
74     
75     @property Position position()
76     {
77         return _position;
78     }
79 }
80 
81 void error(Args)(string fmt, Args args)
82 {
83     throw new Exception(fmt.format(args));
84 }
85 
86 Token[] lex(string text)
87 {
88     Appender!(Token[]) result;
89     auto keyvaluesText = PositionTracker(text);
90     
91     void put(TokenType type, lazy string value = null)
92     {
93         auto pos = keyvaluesText.position;
94         
95         result.put(Token(type, value, pos));
96     }
97     
98     while(!keyvaluesText.empty)
99     {
100         switch(keyvaluesText.front)
101         {
102             case '"':
103                 put(TokenType.str, keyvaluesText.lexQuotedString);
104                 
105                 break;
106             case '{':
107                 put(TokenType.objectStart);
108                 keyvaluesText.popFront;
109                 
110                 break;
111             case '}':
112                 put(TokenType.objectEnd);
113                 keyvaluesText.popFront;
114                 
115                 break;
116             default:
117                 if(keyvaluesText.front.isWhite)
118                 {
119                     while(!keyvaluesText.empty && keyvaluesText.front.isWhite)
120                         keyvaluesText.popFront;
121                     
122                     break;
123                 }
124                 
125                 put(TokenType.str, keyvaluesText.lexBareString);
126         }
127     }
128     
129     return result.data;
130 }
131 
132 string lexBareString(ref PositionTracker keyvaluesText)
133 {
134     Appender!string result;
135     
136     loop:
137     while(!keyvaluesText.empty)
138     {
139         switch(keyvaluesText.front)
140         {
141             case '"':
142                 error("Unexpected start of string at %s", keyvaluesText.position);
143                 
144                 break;
145             case '{':
146             case '}':
147                 break loop;
148             default:
149                 if(keyvaluesText.front.isWhite)
150                     break loop;
151                 
152                 result.put(keyvaluesText.front);
153                 keyvaluesText.popFront;
154         }
155     }
156     
157     return result.data;
158 }
159 
160 string lexQuotedString(ref PositionTracker keyvaluesText)
161 {
162     Appender!string result;
163     auto stringPosition = keyvaluesText.position;
164     
165     keyvaluesText.popFront; //opening quote
166     
167     loop:
168     while(!keyvaluesText.empty)
169     {
170         switch(keyvaluesText.front)
171         {
172             case '"':
173                 break loop;
174             case '\\':
175                 keyvaluesText.popFront;
176                 
177                 if(keyvaluesText.empty)
178                     error("Unterminated escape sequence at %s", keyvaluesText.position);
179                 
180                 switch(keyvaluesText.front)
181                 {
182                     case 'n':
183                         result.put("\n");
184                         
185                         break;
186                     case 't':
187                         result.put("\t");
188                         
189                         break;
190                     default:
191                         result.put(keyvaluesText.front);
192                 }
193                 
194                 keyvaluesText.popFront;
195                 
196                 break;
197             default:
198                 result.put(keyvaluesText.front);
199                 keyvaluesText.popFront;
200         }
201     }
202     
203     if(keyvaluesText.empty)
204         error("Quoted string at %s has no closing quote", stringPosition);
205     
206     keyvaluesText.popFront; //closing quote
207     
208     return result.data;
209 }
210 
211 unittest
212 {
213     with(TokenType)
214     {
215         assert(
216             `abc def`.lex == [
217                 Token(str, "abc"),
218                 Token(str, "def"),
219             ]
220         );
221         assert(
222             `"abc def" ghi`.lex == [
223                 Token(str, "abc def"),
224                 Token(str, "ghi"),
225             ]
226         );
227         assert(
228             `"abc def\"" ghi`.lex == [
229                 Token(str, `abc def"`),
230                 Token(str, "ghi"),
231             ]
232         );
233         assert(
234             `abc { def ghi }`.lex == [
235                 Token(str, "abc"),
236                 Token(objectStart),
237                 Token(str, "def"),
238                 Token(str, "ghi"),
239                 Token(objectEnd),
240             ]
241         );
242         assert(
243             `abc "{" def "}"`.lex == [
244                 Token(str, "abc"),
245                 Token(str, "{"),
246                 Token(str, "def"),
247                 Token(str, "}"),
248             ]
249         );
250     }
251 }
252 
253 KeyValue parse(Token[] tokens)
254 {
255     auto objects = Stack!KeyValue(2);
256     
257     objects.push(KeyValue("root", true));
258     
259     while(!tokens.empty)
260         final switch(tokens.front.type) with(TokenType)
261         {
262             case str:
263                 auto keyPosition = tokens.front.position;
264                 auto nextValue = KeyValue(tokens.front.value);
265                 
266                 tokens.popFront;
267                 
268                 if(tokens.empty)
269                     error("Key at %s does not have an associated value", keyPosition);
270                 
271                 final switch(tokens.front.type)
272                 {
273                     case str:
274                         nextValue.value = tokens.front.value;
275                         objects.top.subkeys ~= nextValue;
276                         
277                         tokens.popFront;
278                         
279                         break;
280                     case objectStart:
281                         nextValue.hasSubkeys = true;
282                         
283                         objects.push(nextValue);
284                         tokens.popFront;
285                         
286                         break;
287                     case objectEnd:
288                         error("Unexpected object close at %s", tokens.front.position);
289                 }
290                 
291                 break;
292             case objectStart:
293                 error("Unexpected object open at %s", tokens.front.position);
294                 
295                 break;
296             case objectEnd:
297                 auto obj = objects.pop;
298                 
299                 if(objects.empty)
300                     error("Unmatched object close at %s", tokens.front.position);
301                 
302                 objects.top.subkeys ~= obj;
303                 
304                 tokens.popFront;
305         }
306     
307     return objects.pop;
308 }
309 
310 unittest
311 {
312     import std.stdio;
313     
314     auto kv = `abc def`.parseKeyValues;
315     
316     assert(kv.key == "root");
317     assert(kv.hasSubkeys);
318     assert(kv.subkeys.length == 1);
319     
320     kv = kv.subkeys[0];
321     
322     assert(kv.key == "abc");
323     assert(!kv.hasSubkeys);
324     assert(kv.value == "def");
325     
326     kv = `"abc def" ghi`.parseKeyValues.subkeys[0];
327     
328     assert(kv.key == "abc def");
329     assert(!kv.hasSubkeys);
330     assert(kv.value == "ghi");
331     
332     kv = `"abc def\"" ghi`.parseKeyValues.subkeys[0];
333     
334     assert(kv.key == `abc def"`);
335     assert(!kv.hasSubkeys);
336     assert(kv.value == "ghi");
337     
338     kv = `abc { def ghi }`.parseKeyValues.subkeys[0];
339     
340     assert(kv.key == "abc");
341     assert(kv.hasSubkeys);
342     assert(kv.subkeys[0].key == "def");
343     assert(!kv.subkeys[0].hasSubkeys);
344     assert(kv.subkeys[0].value == "ghi");
345     
346     kv = `abc "{" def "}"`.parseKeyValues;
347     
348     assert(kv.key == "root");
349     assert(kv.hasSubkeys);
350     assert(kv.subkeys.length == 2);
351     assert(kv.subkeys[0].key == "abc");
352     assert(!kv.subkeys[0].hasSubkeys);
353     assert(kv.subkeys[0].value == "{");
354     assert(kv.subkeys[1].key == "def");
355     assert(!kv.subkeys[1].hasSubkeys);
356     assert(kv.subkeys[1].value == "}");
357     
358     kv = `abc def abc ghi abc jkl`.parseKeyValues;
359     
360     assert(kv.subkeys.length == 3);
361     assert(kv["abc"].length == 3);
362 }