1 module keyvalues.parser; 2 3 import std.algorithm; 4 import std.array; 5 import std.string; 6 import std.uni; 7 8 import keyvalues.keyvalue; 9 import keyvalues.stack; 10 11 /++ 12 Parse a string of KeyValues text into a KeyValue object. 13 +/ 14 KeyValue parseKeyValues(string text) 15 { 16 return text.lex.parse; 17 } 18 19 private: 20 21 enum TokenType 22 { 23 str, 24 objectStart, 25 objectEnd 26 } 27 28 struct Position 29 { 30 uint line = 1; 31 uint column = 1; 32 33 string toString() 34 { 35 return "line %d col %d".format(line, column); 36 } 37 } 38 39 struct Token 40 { 41 TokenType type; 42 string value; 43 Position position; 44 45 //For testing, ignores Position 46 bool opEquals(typeof(this) other) 47 { 48 return other.type == type && other.value == value; 49 } 50 } 51 52 struct PositionTracker 53 { 54 private string data; 55 private Position _position; 56 57 alias data this; //forward all other range primitives 58 59 void popFront() 60 { 61 data.popFront; 62 63 _position.column++; 64 65 if(data.empty) 66 return; 67 68 if(data.front == '\n') 69 { 70 _position.line++; 71 _position.column = 1; 72 } 73 } 74 75 @property Position position() 76 { 77 return _position; 78 } 79 } 80 81 void error(Args)(string fmt, Args args) 82 { 83 throw new Exception(fmt.format(args)); 84 } 85 86 Token[] lex(string text) 87 { 88 Appender!(Token[]) result; 89 auto keyvaluesText = PositionTracker(text); 90 91 void put(TokenType type, lazy string value = null) 92 { 93 auto pos = keyvaluesText.position; 94 95 result.put(Token(type, value, pos)); 96 } 97 98 while(!keyvaluesText.empty) 99 { 100 switch(keyvaluesText.front) 101 { 102 case '"': 103 put(TokenType.str, keyvaluesText.lexQuotedString); 104 105 break; 106 case '{': 107 put(TokenType.objectStart); 108 keyvaluesText.popFront; 109 110 break; 111 case '}': 112 put(TokenType.objectEnd); 113 keyvaluesText.popFront; 114 115 break; 116 default: 117 if(keyvaluesText.front.isWhite) 118 { 119 while(!keyvaluesText.empty && keyvaluesText.front.isWhite) 120 keyvaluesText.popFront; 121 122 break; 123 } 124 125 put(TokenType.str, keyvaluesText.lexBareString); 126 } 127 } 128 129 return result.data; 130 } 131 132 string lexBareString(ref PositionTracker keyvaluesText) 133 { 134 Appender!string result; 135 136 loop: 137 while(!keyvaluesText.empty) 138 { 139 switch(keyvaluesText.front) 140 { 141 case '"': 142 error("Unexpected start of string at %s", keyvaluesText.position); 143 144 break; 145 case '{': 146 case '}': 147 break loop; 148 default: 149 if(keyvaluesText.front.isWhite) 150 break loop; 151 152 result.put(keyvaluesText.front); 153 keyvaluesText.popFront; 154 } 155 } 156 157 return result.data; 158 } 159 160 string lexQuotedString(ref PositionTracker keyvaluesText) 161 { 162 Appender!string result; 163 auto stringPosition = keyvaluesText.position; 164 165 keyvaluesText.popFront; //opening quote 166 167 loop: 168 while(!keyvaluesText.empty) 169 { 170 switch(keyvaluesText.front) 171 { 172 case '"': 173 break loop; 174 case '\\': 175 keyvaluesText.popFront; 176 177 if(keyvaluesText.empty) 178 error("Unterminated escape sequence at %s", keyvaluesText.position); 179 180 switch(keyvaluesText.front) 181 { 182 case 'n': 183 result.put("\n"); 184 185 break; 186 case 't': 187 result.put("\t"); 188 189 break; 190 default: 191 result.put(keyvaluesText.front); 192 } 193 194 keyvaluesText.popFront; 195 196 break; 197 default: 198 result.put(keyvaluesText.front); 199 keyvaluesText.popFront; 200 } 201 } 202 203 if(keyvaluesText.empty) 204 error("Quoted string at %s has no closing quote", stringPosition); 205 206 keyvaluesText.popFront; //closing quote 207 208 return result.data; 209 } 210 211 unittest 212 { 213 with(TokenType) 214 { 215 assert( 216 `abc def`.lex == [ 217 Token(str, "abc"), 218 Token(str, "def"), 219 ] 220 ); 221 assert( 222 `"abc def" ghi`.lex == [ 223 Token(str, "abc def"), 224 Token(str, "ghi"), 225 ] 226 ); 227 assert( 228 `"abc def\"" ghi`.lex == [ 229 Token(str, `abc def"`), 230 Token(str, "ghi"), 231 ] 232 ); 233 assert( 234 `abc { def ghi }`.lex == [ 235 Token(str, "abc"), 236 Token(objectStart), 237 Token(str, "def"), 238 Token(str, "ghi"), 239 Token(objectEnd), 240 ] 241 ); 242 assert( 243 `abc "{" def "}"`.lex == [ 244 Token(str, "abc"), 245 Token(str, "{"), 246 Token(str, "def"), 247 Token(str, "}"), 248 ] 249 ); 250 } 251 } 252 253 KeyValue parse(Token[] tokens) 254 { 255 auto objects = Stack!KeyValue(2); 256 257 objects.push(KeyValue("root", true)); 258 259 while(!tokens.empty) 260 final switch(tokens.front.type) with(TokenType) 261 { 262 case str: 263 auto keyPosition = tokens.front.position; 264 auto nextValue = KeyValue(tokens.front.value); 265 266 tokens.popFront; 267 268 if(tokens.empty) 269 error("Key at %s does not have an associated value", keyPosition); 270 271 final switch(tokens.front.type) 272 { 273 case str: 274 nextValue.value = tokens.front.value; 275 objects.top.subkeys ~= nextValue; 276 277 tokens.popFront; 278 279 break; 280 case objectStart: 281 nextValue.hasSubkeys = true; 282 283 objects.push(nextValue); 284 tokens.popFront; 285 286 break; 287 case objectEnd: 288 error("Unexpected object close at %s", tokens.front.position); 289 } 290 291 break; 292 case objectStart: 293 error("Unexpected object open at %s", tokens.front.position); 294 295 break; 296 case objectEnd: 297 auto obj = objects.pop; 298 299 if(objects.empty) 300 error("Unmatched object close at %s", tokens.front.position); 301 302 objects.top.subkeys ~= obj; 303 304 tokens.popFront; 305 } 306 307 return objects.pop; 308 } 309 310 unittest 311 { 312 import std.stdio; 313 314 auto kv = `abc def`.parseKeyValues; 315 316 assert(kv.key == "root"); 317 assert(kv.hasSubkeys); 318 assert(kv.subkeys.length == 1); 319 320 kv = kv.subkeys[0]; 321 322 assert(kv.key == "abc"); 323 assert(!kv.hasSubkeys); 324 assert(kv.value == "def"); 325 326 kv = `"abc def" ghi`.parseKeyValues.subkeys[0]; 327 328 assert(kv.key == "abc def"); 329 assert(!kv.hasSubkeys); 330 assert(kv.value == "ghi"); 331 332 kv = `"abc def\"" ghi`.parseKeyValues.subkeys[0]; 333 334 assert(kv.key == `abc def"`); 335 assert(!kv.hasSubkeys); 336 assert(kv.value == "ghi"); 337 338 kv = `abc { def ghi }`.parseKeyValues.subkeys[0]; 339 340 assert(kv.key == "abc"); 341 assert(kv.hasSubkeys); 342 assert(kv.subkeys[0].key == "def"); 343 assert(!kv.subkeys[0].hasSubkeys); 344 assert(kv.subkeys[0].value == "ghi"); 345 346 kv = `abc "{" def "}"`.parseKeyValues; 347 348 assert(kv.key == "root"); 349 assert(kv.hasSubkeys); 350 assert(kv.subkeys.length == 2); 351 assert(kv.subkeys[0].key == "abc"); 352 assert(!kv.subkeys[0].hasSubkeys); 353 assert(kv.subkeys[0].value == "{"); 354 assert(kv.subkeys[1].key == "def"); 355 assert(!kv.subkeys[1].hasSubkeys); 356 assert(kv.subkeys[1].value == "}"); 357 358 kv = `abc def abc ghi abc jkl`.parseKeyValues; 359 360 assert(kv.subkeys.length == 3); 361 assert(kv["abc"].length == 3); 362 }