ELTN Pull Parser

Frank Mitchell

Posted: 2023-04-12
Word Count: 1436
Tags: java programming

Table of Contents

This is the proposed interface for an ELTN Pull Parser.

See also the JSON Pull Parser.

API

Event

package com.frank_mitchell.eltnpp;

/**
 * Events representing the major semantic elements of an ELTN file. See the 
 * <a href="https://frank-mitchell.com/projects/eltn/">ELTN Specification</a>.
 * 
 * @author Frank Mitchell
 */
public enum EltnEvent {
    /**
     * Invalid ELTN syntax.
     */
    SYNTAX_ERROR,

    /**
     * Before first ELTN element
     */
    START_STREAM,

    /**
     * Variable name assignment outside a table (...`=`)
     */
    VAR_NAME,

    /**
     * Start of ELTN array (`[`)
     */
    START_TABLE,

    /**
     * End of ELTN array (`]`)
     */
    END_TABLE,

    /**
     * String key in an ELTN table (...`=` or `[`...`]=`)
     */
    TABLE_KEY_STRING,

    /**
     * Number key in an ELTN table (`[`...`]=`)
     */
    TABLE_KEY_NUMBER,

    /**
     * Boolean key in an ELTN table (`[`...`]=`)
     */
    TABLE_KEY_BOOLEAN,

    /**
     * ELTN nil (`nil`)
     */
    VALUE_NIL,

    /**
     * ELTN Boolean
     */
    VALUE_BOOLEAN,

    /**
     * ELTN number
     */
    VALUE_NUMBER,

    /**
     * ELTN string (`"`...`"`)
     */
    VALUE_STRING,

    /**
     * After last ELTN element
     */
    END_STREAM
}

Pull Parser

package com.frank_mitchell.eltnpp;

import java.io.IOException;

/**
 * A pull parser for an ELTN (Extended Lua Table Notation) document.
 * 
 * See the 
 * <a href="https://frank-mitchell.com/projects/eltn/">ELTN Specification</a>
 * for more information.
 *
 * @author Frank Mitchell
 */
public interface EltnPullParser {
    /**
     * Checks whether the underlying stream has more ELTN elements.
     *
     * @return whether the stream has more ELTN elements.
     * @throws IOException if the character source could not be read.
     */
    public boolean hasNext() throws IOException;

    /**
     * Advances to the next significant ELTN element in the
     * underlying stream.
     *
     * @throws IOException if the character source could not be read.
     */
    public void next() throws IOException;

    /**
     * Get the event parsed by the most recent call to {@link #next()}.
     *
     * @return most recently parsed event.
     */
    public EltnEvent getEvent();

    /**
     * Get code for this error.
     * If {@link getEvent()} is not {@link EltnEvent#SYNTAX_ERROR},
     * this method will return {@link EltnError#NO_ERROR}.
     *
     * @return the current error code, if any.
     */
    public EltnError getError();

    /**
     * Gets the raw text associated with the current event,
     * minus any surrounding whitespace.
     * Every event has associated text, although
     * {@link EltnEvent#START_TABLE} will only return "{",
     * {@link EltnEvent#END_TABLE} will only return "}",
     * and {@link EltnEvent#START_STREAM} and {@link EltnEvent#END_STREAM}
     * will only return "".
     * This can be especially useful on errors.
     *
     * @return text associated with this event.
     */
    public CharSequence getText();

    /**
     * Get the current text's offset in the character stream, if available.
     * This will be a number greater or equal to 0 indicating the number of
     * characters processed before the beginning of the text shown in
     * {@link #getText()}.  Thus the first character is at offset 0.
     *
     * @return an offset &ge; 0, or -1 if not available.
     * @see #getText()
     */
    public int getTextOffset();

    /**
     * Get the current text's line number, if available.
     * This will be a number greater than 0 indicating the number of
     * newline sequences processed before the text shown in
     * {@link #getText()}, plus 1.  Thus the first character is at line 1.
     *
     * @return an offset &gt; 0, or -1 if not available.
     * @see #getText()
     * @see #getTextOffset()
     */
    public int getTextLineNumber();

    /**
     * Get the current text's column number, if available.
     * This will be a number greater than 0 indicating the number of
     * characters processed since the last newline sequence, including
     * the first character of {@link #getText()}, plus 1.
     * Thus the first character in any line is in column 1.
     *
     * @return an offset &gt; 0, or -1 if not available.
     * @see #getText()
     * @see #getTextLineNumber()
     */
    public int getTextColumnNumber();

    /**
     * Indicates if the enclosing value is a ELTN Table.
     *
     * If this object is currently processing the contents of a ELTN Table,
     * this method will return {@code true}.
     *
     * @return {@code true} if the enclosing value is a ELTN Table.
     */
    public boolean isInTable();

    /**
     * Gets the value associated with the current event.
     *
     * On {@link EltnEvent#VAR_NAME},
     * the result is the ELTN string value for the key.
     *
     * On {@link EltnEvent#TABLE_KEY_STRING} or {@link EltnEvent#VALUE_STRING},
     * the result is the ELTN string value with all escape sequences
     * converted to their character values.
     *
     * On {@link EltnEvent#TABLE_KEY_NUMBER} or {@link EltnEvent#VALUE_NUMBER}
     * the result is the string value
     * of the number in its original form (decimal or hexadecimal).
     *
     * On {@link EltnEvent#TABLE_KEY_BOOLEAN} or {@link EltnEvent#VALUE_BOOLEAN}
     * the result is "true" or "false".
     *
     * On {@link EltnEvent#VALUE_NIL} the result is "nil".
     *
     * Otherwise the method throws an exception.
     *
     * @return  the string for the current value
     *
     * @throws IllegalStateException if the current event has no string value.
     */
    public String getString();

    /**
     * Gets the numeric value associated with the current event.
     *
     * If {@link #getEvent()} is
     * {@link EltnEvent#TABLE_KEY_NUMBER} or {@link EltnEvent#VALUE_NUMBER},
     * this method returns an unspecified subclass of Number.
     * Otherwise this method throws an exception.
     *
     * @return the value of the current ELTN Number
     *
     * @throws IllegalStateException if the current event is not a number.
     */
    public Number getNumber();

    /**
     * Gets a {@code boolean} value for the current event.
     *
     * If {@link #getEvent()} is {@link EltnEvent#VALUE_BOOLEAN} or
     * {@link EltnEvent#TABLE_KEY_BOOLEAN}, this method returns the value.
     * If {@link #getEvent()} is {@link EltnEvent#VALUE_NIL},
     * this method returns false.
     * Otherwise this method returns true.
     * This method emulates the convention in Lua that in a Boolean test
     * statement, a value of <em>nil</em> or <em>>false</em> counts as false.
     *
     * @return the Boolean value of the current ELTN object
     */
    public boolean getBoolean();
}

Pull Parser Factory

package com.frank_mitchell.eltnpp;

import com.frank_mitchell.codepoint.CodePointSource;
import com.frank_mitchell.codepoint.CodePoint;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

/**
 * Factory for a {@link EltnPullParser}.
 * Each factory creates an ELTN parser for a stream of ELTN text.
 * Text may be ASCII, UTF-8, UTF-16, or an arbitrary encoding.
 *
 * @author Frank Mitchell
 */
public interface EltnPullParserFactory {
    /**
     * Creates a parser to process UTF-16 characters.
     * In other words, a stream of Java {@code char}s.
     *
     * @param reader a stream of UTF-16 chars.
     * @return a parser for the reader.
     * @throws IOException if the reader throws an exception.
     */
    default EltnPullParser createParser(Reader reader) throws IOException {
        return createParser(CodePoint.getSource(reader, StandardCharsets.UTF_16));
    }

    /**
     * Creates a parser to process UTF-8 characters.
     *
     * @param stream a stream of UTF-8 bytes.
     * @return a parser for the stream.
     * @throws IOException if the stream throws an exception.
     */
    default EltnPullParser createUtf8Parser(InputStream stream) throws IOException {
        return createParser(stream, StandardCharsets.UTF_8);
    }

    /**
     * Creates a parser to process bytes in the specified encoding.
     *
     * @param stream a stream of bytes.
     * @param cs a character encoding.
     * @return a parser for the stream.
     * @throws IOException if the stream throws an exception.
     */
    default EltnPullParser createParser(InputStream stream, Charset cs)
            throws IOException {
        return createParser(CodePoint.getSource(stream, cs));
    }

    /**
     * Creates a parser to process a stream of Unicode code points.
     *
     * @param source a stream of code points.
     * @return a parser for the reader.
     * @throws IOException if the stream throws an exception.
     */
    EltnPullParser createParser(CodePointSource source) throws IOException;
}

Error

Note: The implementation will add more codes as it progresses, ideally one per specific parse error.

package com.frank_mitchell.eltnpp;

/**
 * Codes for common parse errors.
 *
 * @author Frank Mitchell
 */
public enum EltnError {
    /**
     * No error condition at present.
     */
    NO_ERROR,

    // TODO: other Errors here

    /**
     * Unknown error condition otherwise not enumerated.
     */
    UNKNOWN
}

MIT License

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.