| /* |
| * plist - An open source library to parse and generate property lists |
| * Copyright (C) 2014 Daniel Dreibrodt |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| package com.dd.plist; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.UnsupportedEncodingException; |
| import java.nio.CharBuffer; |
| import java.nio.charset.CharacterCodingException; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetEncoder; |
| import java.text.ParseException; |
| import java.text.StringCharacterIterator; |
| import java.util.LinkedList; |
| import java.util.List; |
| |
| /** |
| * Parser for ASCII property lists. Supports Apple OS X/iOS and GnuStep/NeXTSTEP format. |
| * This parser is based on the recursive descent paradigm, but the underlying grammar |
| * is not explicitely defined. |
| * <p/> |
| * Resources on ASCII property list format: |
| * <ul> |
| * <li><a href="https://developer.apple.com/library/mac/#documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html> |
| * Property List Programming Guide - Old-Style ASCII Property Lists |
| * </a></li> |
| * <li><a href="http://www.gnustep.org/resources/documentation/Developer/Base/Reference/NSPropertyList.html"> |
| * GnuStep - NSPropertyListSerialization class documentation |
| * </a></li> |
| * </ul> |
| * |
| * @author Daniel Dreibrodt |
| */ |
| public class ASCIIPropertyListParser { |
| |
| /** |
| * Parses an ASCII property list file. |
| * |
| * @param f The ASCII property list file. |
| * @return The root object of the property list. This is usally a NSDictionary but can also be a NSArray. |
| * @throws Exception When an error occurs during parsing. |
| */ |
| public static NSObject parse(File f) throws IOException, ParseException { |
| return parse(new FileInputStream(f)); |
| } |
| |
| /** |
| * Parses an ASCII property list from an input stream. |
| * |
| * @param in The input stream that points to the property list's data. |
| * @return The root object of the property list. This is usally a NSDictionary but can also be a NSArray. |
| * @throws Exception When an error occurs during parsing. |
| */ |
| public static NSObject parse(InputStream in) throws ParseException, IOException { |
| byte[] buf = PropertyListParser.readAll(in); |
| in.close(); |
| return parse(buf); |
| } |
| |
| /** |
| * Parses an ASCII property list from a byte array. |
| * |
| * @param bytes The ASCII property list data. |
| * @return The root object of the property list. This is usally a NSDictionary but can also be a NSArray. |
| * @throws Exception When an error occurs during parsing. |
| */ |
| public static NSObject parse(byte[] bytes) throws ParseException { |
| ASCIIPropertyListParser parser = new ASCIIPropertyListParser(bytes); |
| return parser.parse(); |
| } |
| |
| public static final char WHITESPACE_SPACE = ' '; |
| public static final char WHITESPACE_TAB = '\t'; |
| public static final char WHITESPACE_NEWLINE = '\n'; |
| public static final char WHITESPACE_CARRIAGE_RETURN = '\r'; |
| |
| public static final char ARRAY_BEGIN_TOKEN = '('; |
| public static final char ARRAY_END_TOKEN = ')'; |
| public static final char ARRAY_ITEM_DELIMITER_TOKEN = ','; |
| |
| public static final char DICTIONARY_BEGIN_TOKEN = '{'; |
| public static final char DICTIONARY_END_TOKEN = '}'; |
| public static final char DICTIONARY_ASSIGN_TOKEN = '='; |
| public static final char DICTIONARY_ITEM_DELIMITER_TOKEN = ';'; |
| |
| public static final char QUOTEDSTRING_BEGIN_TOKEN = '"'; |
| public static final char QUOTEDSTRING_END_TOKEN = '"'; |
| public static final char QUOTEDSTRING_ESCAPE_TOKEN = '\\'; |
| |
| public static final char DATA_BEGIN_TOKEN = '<'; |
| public static final char DATA_END_TOKEN = '>'; |
| |
| public static final char DATA_GSOBJECT_BEGIN_TOKEN = '*'; |
| public static final char DATA_GSDATE_BEGIN_TOKEN = 'D'; |
| public static final char DATA_GSBOOL_BEGIN_TOKEN = 'B'; |
| public static final char DATA_GSBOOL_TRUE_TOKEN = 'Y'; |
| public static final char DATA_GSBOOL_FALSE_TOKEN = 'N'; |
| public static final char DATA_GSINT_BEGIN_TOKEN = 'I'; |
| public static final char DATA_GSREAL_BEGIN_TOKEN = 'R'; |
| |
| public static final char DATE_DATE_FIELD_DELIMITER = '-'; |
| public static final char DATE_TIME_FIELD_DELIMITER = ':'; |
| public static final char DATE_GS_DATE_TIME_DELIMITER = ' '; |
| public static final char DATE_APPLE_DATE_TIME_DELIMITER = 'T'; |
| public static final char DATE_APPLE_END_TOKEN = 'Z'; |
| |
| public static final char COMMENT_BEGIN_TOKEN = '/'; |
| public static final char MULTILINE_COMMENT_SECOND_TOKEN = '*'; |
| public static final char SINGLELINE_COMMENT_SECOND_TOKEN = '/'; |
| public static final char MULTILINE_COMMENT_END_TOKEN = '/'; |
| |
| /** |
| * Property list source data |
| */ |
| private byte[] data; |
| /** |
| * Current parsing index |
| */ |
| private int index; |
| |
| /** |
| * Only allow subclasses to change instantiation. |
| */ |
| protected ASCIIPropertyListParser() { |
| |
| } |
| |
| /** |
| * Creates a new parser for the given property list content. |
| * |
| * @param propertyListContent The content of the property list that is to be parsed. |
| */ |
| private ASCIIPropertyListParser(byte[] propertyListContent) { |
| data = propertyListContent; |
| } |
| |
| /** |
| * Checks whether the given sequence of symbols can be accepted. |
| * |
| * @param sequence The sequence of tokens to look for. |
| * @return Whether the given tokens occur at the current parsing position. |
| */ |
| private boolean acceptSequence(char... sequence) { |
| for (int i = 0; i < sequence.length; i++) { |
| if (data[index + i] != sequence[i]) |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| * Checks whether the given symbols can be accepted, that is, if one |
| * of the given symbols is found at the current parsing position. |
| * |
| * @param acceptableSymbols The symbols to check. |
| * @return Whether one of the symbols can be accepted or not. |
| */ |
| private boolean accept(char... acceptableSymbols) { |
| boolean symbolPresent = false; |
| for (char c : acceptableSymbols) { |
| if (data[index] == c) |
| symbolPresent = true; |
| } |
| return symbolPresent; |
| } |
| |
| /** |
| * Checks whether the given symbol can be accepted, that is, if |
| * the given symbols is found at the current parsing position. |
| * |
| * @param acceptableSymbol The symbol to check. |
| * @return Whether the symbol can be accepted or not. |
| */ |
| private boolean accept(char acceptableSymbol) { |
| return data[index] == acceptableSymbol; |
| } |
| |
| /** |
| * Expects the input to have one of the given symbols at the current parsing position. |
| * |
| * @param expectedSymbols The expected symbols. |
| * @throws ParseException If none of the expected symbols could be found. |
| */ |
| private void expect(char... expectedSymbols) throws ParseException { |
| if (!accept(expectedSymbols)) { |
| String excString = "Expected '" + expectedSymbols[0] + "'"; |
| for (int i = 1; i < expectedSymbols.length; i++) { |
| excString += " or '" + expectedSymbols[i] + "'"; |
| } |
| excString += " but found '" + (char) data[index] + "'"; |
| throw new ParseException(excString, index); |
| } |
| } |
| |
| /** |
| * Expects the input to have the given symbol at the current parsing position. |
| * |
| * @param expectedSymbol The expected symbol. |
| * @throws ParseException If the expected symbol could be found. |
| */ |
| private void expect(char expectedSymbol) throws ParseException { |
| if (!accept(expectedSymbol)) |
| throw new ParseException("Expected '" + expectedSymbol + "' but found '" + (char) data[index] + "'", index); |
| } |
| |
| /** |
| * Reads an expected symbol. |
| * |
| * @param symbol The symbol to read. |
| * @throws ParseException If the expected symbol could not be read. |
| */ |
| private void read(char symbol) throws ParseException { |
| expect(symbol); |
| index++; |
| } |
| |
| /** |
| * Skips the current symbol. |
| */ |
| private void skip() { |
| index++; |
| } |
| |
| /** |
| * Skips several symbols |
| * |
| * @param numSymbols The amount of symbols to skip. |
| */ |
| private void skip(int numSymbols) { |
| index += numSymbols; |
| } |
| |
| /** |
| * Skips all whitespaces and comments from the current parsing position onward. |
| */ |
| private void skipWhitespacesAndComments() { |
| boolean commentSkipped; |
| do { |
| commentSkipped = false; |
| |
| //Skip whitespaces |
| while (accept(WHITESPACE_CARRIAGE_RETURN, WHITESPACE_NEWLINE, WHITESPACE_SPACE, WHITESPACE_TAB)) { |
| skip(); |
| } |
| |
| //Skip single line comments "//..." |
| if (acceptSequence(COMMENT_BEGIN_TOKEN, SINGLELINE_COMMENT_SECOND_TOKEN)) { |
| skip(2); |
| readInputUntil(WHITESPACE_CARRIAGE_RETURN, WHITESPACE_NEWLINE); |
| commentSkipped = true; |
| } |
| //Skip multi line comments "/* ... */" |
| else if (acceptSequence(COMMENT_BEGIN_TOKEN, MULTILINE_COMMENT_SECOND_TOKEN)) { |
| skip(2); |
| while (true) { |
| if (acceptSequence(MULTILINE_COMMENT_SECOND_TOKEN, MULTILINE_COMMENT_END_TOKEN)) { |
| skip(2); |
| break; |
| } |
| skip(); |
| } |
| commentSkipped = true; |
| } |
| } |
| while (commentSkipped); //if a comment was skipped more whitespace or another comment can follow, so skip again |
| } |
| |
| private String toUtf8String(ByteArrayOutputStream stream) { |
| try { |
| return stream.toString("UTF-8"); |
| } catch (UnsupportedEncodingException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| |
| /** |
| * Reads input until one of the given symbols is found. |
| * |
| * @param symbols The symbols that can occur after the string to read. |
| * @return The input until one the given symbols. |
| */ |
| private String readInputUntil(char... symbols) { |
| ByteArrayOutputStream stringBytes = new ByteArrayOutputStream(); |
| while (!accept(symbols)) { |
| stringBytes.write(data[index]); |
| skip(); |
| } |
| return toUtf8String(stringBytes); |
| } |
| |
| /** |
| * Reads input until the given symbol is found. |
| * |
| * @param symbol The symbol that can occur after the string to read. |
| * @return The input until the given symbol. |
| */ |
| private String readInputUntil(char symbol) { |
| ByteArrayOutputStream stringBytes = new ByteArrayOutputStream(); |
| while (!accept(symbol)) { |
| stringBytes.write(data[index]); |
| skip(); |
| } |
| return toUtf8String(stringBytes); |
| } |
| |
| /** |
| * Parses the property list from the beginning and returns the root object |
| * of the property list. |
| * |
| * @return The root object of the property list. This can either be a NSDictionary or a NSArray. |
| * @throws ParseException When an error occured during parsing |
| */ |
| public NSObject parse() throws ParseException { |
| index = 0; |
| skipWhitespacesAndComments(); |
| expect(DICTIONARY_BEGIN_TOKEN, ARRAY_BEGIN_TOKEN, COMMENT_BEGIN_TOKEN); |
| try { |
| return parseObject(); |
| } catch (ArrayIndexOutOfBoundsException ex) { |
| throw new ParseException("Reached end of input unexpectedly.", index); |
| } |
| } |
| |
| /** |
| * Parses the NSObject found at the current position in the property list |
| * data stream. |
| * |
| * @return The parsed NSObject. |
| * @see ASCIIPropertyListParser#index |
| */ |
| private NSObject parseObject() throws ParseException { |
| switch (data[index]) { |
| case ARRAY_BEGIN_TOKEN: { |
| return parseArray(); |
| } |
| case DICTIONARY_BEGIN_TOKEN: { |
| return parseDictionary(); |
| } |
| case DATA_BEGIN_TOKEN: { |
| return parseData(); |
| } |
| case QUOTEDSTRING_BEGIN_TOKEN: { |
| String quotedString = parseQuotedString(); |
| //apple dates are quoted strings of length 20 and after the 4 year digits a dash is found |
| if (quotedString.length() == 20 && quotedString.charAt(4) == DATE_DATE_FIELD_DELIMITER) { |
| try { |
| return new NSDate(quotedString); |
| } catch (Exception ex) { |
| //not a date? --> return string |
| return new NSString(quotedString); |
| } |
| } else { |
| return new NSString(quotedString); |
| } |
| } |
| default: { |
| //0-9 |
| if (data[index] > 0x2F && data[index] < 0x3A) { |
| //could be a date or just a string |
| return parseDateString(); |
| } else { |
| //non-numerical -> string or boolean |
| String parsedString = parseString(); |
| return new NSString(parsedString); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Parses an array from the current parsing position. |
| * The prerequisite for calling this method is, that an array begin token has been read. |
| * |
| * @return The array found at the parsing position. |
| */ |
| private NSArray parseArray() throws ParseException { |
| //Skip begin token |
| skip(); |
| skipWhitespacesAndComments(); |
| List<NSObject> objects = new LinkedList<NSObject>(); |
| while (!accept(ARRAY_END_TOKEN)) { |
| objects.add(parseObject()); |
| skipWhitespacesAndComments(); |
| if (accept(ARRAY_ITEM_DELIMITER_TOKEN)) { |
| skip(); |
| } else { |
| break; //must have reached end of array |
| } |
| skipWhitespacesAndComments(); |
| } |
| //parse end token |
| read(ARRAY_END_TOKEN); |
| return new NSArray(objects.toArray(new NSObject[objects.size()])); |
| } |
| |
| /** |
| * Parses a dictionary from the current parsing position. |
| * The prerequisite for calling this method is, that a dictionary begin token has been read. |
| * |
| * @return The dictionary found at the parsing position. |
| */ |
| private NSDictionary parseDictionary() throws ParseException { |
| //Skip begin token |
| skip(); |
| skipWhitespacesAndComments(); |
| NSDictionary dict = new NSDictionary(); |
| while (!accept(DICTIONARY_END_TOKEN)) { |
| //Parse key |
| String keyString; |
| if (accept(QUOTEDSTRING_BEGIN_TOKEN)) { |
| keyString = parseQuotedString(); |
| } else { |
| keyString = parseString(); |
| } |
| skipWhitespacesAndComments(); |
| |
| //Parse assign token |
| read(DICTIONARY_ASSIGN_TOKEN); |
| skipWhitespacesAndComments(); |
| |
| NSObject object = parseObject(); |
| dict.put(keyString, object); |
| skipWhitespacesAndComments(); |
| read(DICTIONARY_ITEM_DELIMITER_TOKEN); |
| skipWhitespacesAndComments(); |
| } |
| //skip end token |
| skip(); |
| return dict; |
| } |
| |
| /** |
| * Parses a data object from the current parsing position. |
| * This can either be a NSData object or a GnuStep NSNumber or NSDate. |
| * The prerequisite for calling this method is, that a data begin token has been read. |
| * |
| * @return The data object found at the parsing position. |
| */ |
| private NSObject parseData() throws ParseException { |
| NSObject obj = null; |
| //Skip begin token |
| skip(); |
| if (accept(DATA_GSOBJECT_BEGIN_TOKEN)) { |
| skip(); |
| expect(DATA_GSBOOL_BEGIN_TOKEN, DATA_GSDATE_BEGIN_TOKEN, DATA_GSINT_BEGIN_TOKEN, DATA_GSREAL_BEGIN_TOKEN); |
| if (accept(DATA_GSBOOL_BEGIN_TOKEN)) { |
| //Boolean |
| skip(); |
| expect(DATA_GSBOOL_TRUE_TOKEN, DATA_GSBOOL_FALSE_TOKEN); |
| if (accept(DATA_GSBOOL_TRUE_TOKEN)) { |
| obj = new NSNumber(true); |
| } else { |
| obj = new NSNumber(false); |
| } |
| //Skip the parsed boolean token |
| skip(); |
| } else if (accept(DATA_GSDATE_BEGIN_TOKEN)) { |
| //Date |
| skip(); |
| String dateString = readInputUntil(DATA_END_TOKEN); |
| obj = new NSDate(dateString); |
| } else if (accept(DATA_GSINT_BEGIN_TOKEN, DATA_GSREAL_BEGIN_TOKEN)) { |
| //Number |
| skip(); |
| String numberString = readInputUntil(DATA_END_TOKEN); |
| obj = new NSNumber(numberString); |
| } |
| //parse data end token |
| read(DATA_END_TOKEN); |
| } else { |
| String dataString = readInputUntil(DATA_END_TOKEN); |
| dataString = dataString.replaceAll("\\s+", ""); |
| |
| int numBytes = dataString.length() / 2; |
| byte[] bytes = new byte[numBytes]; |
| for (int i = 0; i < bytes.length; i++) { |
| String byteString = dataString.substring(i * 2, i * 2 + 2); |
| int byteValue = Integer.parseInt(byteString, 16); |
| bytes[i] = (byte) byteValue; |
| } |
| obj = new NSData(bytes); |
| |
| //skip end token |
| skip(); |
| } |
| |
| return obj; |
| } |
| |
| /** |
| * Attempts to parse a plain string as a date if possible. |
| * |
| * @return A NSDate if the string represents such an object. Otherwise a NSString is returned. |
| */ |
| private NSObject parseDateString() { |
| String numericalString = parseString(); |
| if (numericalString.length() > 4 && numericalString.charAt(4) == DATE_DATE_FIELD_DELIMITER) { |
| try { |
| return new NSDate(numericalString); |
| } catch(Exception ex) { |
| //An exception occurs if the string is not a date but just a string |
| } |
| } |
| return new NSString(numericalString); |
| } |
| |
| /** |
| * Parses a plain string from the current parsing position. |
| * The string is made up of all characters to the next whitespace, delimiter token or assignment token. |
| * |
| * @return The string found at the current parsing position. |
| */ |
| private String parseString() { |
| return readInputUntil(WHITESPACE_SPACE, WHITESPACE_TAB, WHITESPACE_NEWLINE, WHITESPACE_CARRIAGE_RETURN, |
| ARRAY_ITEM_DELIMITER_TOKEN, DICTIONARY_ITEM_DELIMITER_TOKEN, DICTIONARY_ASSIGN_TOKEN, ARRAY_END_TOKEN); |
| } |
| |
| /** |
| * Parses a quoted string from the current parsing position. |
| * The prerequisite for calling this method is, that a quoted string begin token has been read. |
| * |
| * @return The quoted string found at the parsing method with all special characters unescaped. |
| * @throws ParseException If an error occured during parsing. |
| */ |
| private String parseQuotedString() throws ParseException { |
| //Skip begin token |
| skip(); |
| ByteArrayOutputStream quotedString = new ByteArrayOutputStream(); |
| boolean unescapedBackslash = true; |
| //Read from opening quotation marks to closing quotation marks and skip escaped quotation marks |
| while (data[index] != QUOTEDSTRING_END_TOKEN || (data[index - 1] == QUOTEDSTRING_ESCAPE_TOKEN && unescapedBackslash)) { |
| quotedString.write(data[index]); |
| if (accept(QUOTEDSTRING_ESCAPE_TOKEN)) { |
| unescapedBackslash = !(data[index - 1] == QUOTEDSTRING_ESCAPE_TOKEN && unescapedBackslash); |
| } |
| skip(); |
| } |
| String unescapedString; |
| try { |
| unescapedString = parseQuotedString(toUtf8String(quotedString)); |
| } catch (Exception ex) { |
| throw new ParseException("The quoted string could not be parsed.", index); |
| } |
| //skip end token |
| skip(); |
| return unescapedString; |
| } |
| |
| /** |
| * Used to encode the parsed strings |
| */ |
| private static CharsetEncoder asciiEncoder; |
| |
| /** |
| * Parses a string according to the format specified for ASCII property lists. |
| * Such strings can contain escape sequences which are unescaped in this method. |
| * |
| * @param s The escaped string according to the ASCII property list format, without leading and trailing quotation marks. |
| * @return The unescaped string in UTF-8 or ASCII format, depending on the contained characters. |
| * @throws Exception If the string could not be properly parsed. |
| */ |
| public static synchronized String parseQuotedString(String s) throws UnsupportedEncodingException, CharacterCodingException { |
| StringBuilder parsed = new StringBuilder(); |
| StringCharacterIterator iterator = new StringCharacterIterator(s); |
| char c = iterator.current(); |
| |
| while (iterator.getIndex() < iterator.getEndIndex()) { |
| switch (c) { |
| case '\\': { //An escaped sequence is following |
| parsed.append(parseEscapedSequence(iterator)); |
| break; |
| } |
| default: { |
| parsed.append(c); |
| break; |
| } |
| } |
| c = iterator.next(); |
| } |
| return parsed.toString(); |
| } |
| |
| /** |
| * Unescapes an escaped character sequence, e.g. \\u00FC. |
| * |
| * @param iterator The string character iterator pointing to the first character after the backslash |
| * @return The unescaped character |
| */ |
| private static char parseEscapedSequence(StringCharacterIterator iterator) { |
| char c = iterator.next(); |
| if (c == 'b') { |
| return '\b'; |
| } else if (c == 'n') { |
| return '\n'; |
| } else if (c == 'r') { |
| return '\r'; |
| } else if (c == 't') { |
| return '\t'; |
| } else if (c == 'U' || c == 'u') { |
| //4 digit hex Unicode value |
| String byte1 = ""; |
| byte1 += iterator.next(); |
| byte1 += iterator.next(); |
| String byte2 = ""; |
| byte2 += iterator.next(); |
| byte2 += iterator.next(); |
| return (char) ((Integer.parseInt(byte1, 16) << 8) + Integer.parseInt(byte2, 16)); |
| } else if ((c >= '0') && (c <= '7')) { |
| //3 digit octal ASCII value |
| String num = ""; |
| num += c; |
| num += iterator.next(); |
| num += iterator.next(); |
| return (char) Integer.parseInt(num, 8); |
| } else { |
| // Possibly something that needn't be escaped, but we should accept it |
| // it anyway for consistency with Apple tools. |
| return c; |
| } |
| } |
| |
| } |