add JsonTokenizer

This commit is contained in:
jbb01 2025-04-12 17:10:13 +02:00
parent 3ee456bd8f
commit 7e44eda639
No known key found for this signature in database
GPG Key ID: 83C72CB6D5442CF1
16 changed files with 616 additions and 3 deletions

View File

@ -17,6 +17,9 @@ dependencies {
compileOnly(libs.lombok) compileOnly(libs.lombok)
annotationProcessor(libs.lombok) annotationProcessor(libs.lombok)
testImplementation(libs.junit.jupiter)
testRuntimeOnly(libs.junit.platform.launcher)
} }
tasks { tasks {
@ -24,4 +27,9 @@ tasks {
options.encoding = "UTF-8" options.encoding = "UTF-8"
options.compilerArgs.add("--enable-preview") options.compilerArgs.add("--enable-preview")
} }
withType<Test> {
useJUnitPlatform()
jvmArgs("--enable-preview")
}
} }

View File

@ -1,5 +1,6 @@
package eu.jonahbauer.json; package eu.jonahbauer.json;
import eu.jonahbauer.json.tokenizer.token.JsonToken;
import lombok.Getter; import lombok.Getter;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.experimental.Accessors; import lombok.experimental.Accessors;
@ -13,7 +14,7 @@ import org.jetbrains.annotations.Nullable;
@Getter @Getter
@Accessors(fluent = true) @Accessors(fluent = true)
@RequiredArgsConstructor @RequiredArgsConstructor
public enum JsonBoolean implements JsonValue { public enum JsonBoolean implements JsonValue, JsonToken {
TRUE(true), TRUE(true),
FALSE(false), FALSE(false),
; ;

View File

@ -1,5 +1,6 @@
package eu.jonahbauer.json; package eu.jonahbauer.json;
import eu.jonahbauer.json.tokenizer.token.JsonToken;
import org.jetbrains.annotations.Contract; import org.jetbrains.annotations.Contract;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.Nullable;
@ -8,7 +9,7 @@ import org.jetbrains.annotations.Nullable;
* Java representation of a JSON boolean. Note, that JSON does not distinguish between integers and floating point * Java representation of a JSON boolean. Note, that JSON does not distinguish between integers and floating point
* numbers and therefore all numbers are stored as {@code double}. * numbers and therefore all numbers are stored as {@code double}.
*/ */
public record JsonNumber(double value) implements JsonValue { public record JsonNumber(double value) implements JsonValue, JsonToken {
public JsonNumber { public JsonNumber {
if (!Double.isFinite(value)) throw new IllegalArgumentException("value must be finite"); if (!Double.isFinite(value)) throw new IllegalArgumentException("value must be finite");
} }

View File

@ -1,5 +1,6 @@
package eu.jonahbauer.json; package eu.jonahbauer.json;
import eu.jonahbauer.json.tokenizer.token.JsonToken;
import org.jetbrains.annotations.Contract; import org.jetbrains.annotations.Contract;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.Nullable;
@ -8,7 +9,7 @@ import java.util.Objects;
import java.util.stream.IntStream; import java.util.stream.IntStream;
@SuppressWarnings("unused") @SuppressWarnings("unused")
public record JsonString(@NotNull String value) implements JsonValue, CharSequence { public record JsonString(@NotNull String value) implements JsonValue, JsonToken, CharSequence {
public static final @NotNull JsonString EMPTY = new JsonString(""); public static final @NotNull JsonString EMPTY = new JsonString("");
public JsonString { public JsonString {

View File

@ -0,0 +1,27 @@
package eu.jonahbauer.json.exceptions;
import lombok.Getter;
import org.jetbrains.annotations.NotNull;
@Getter
abstract class JsonReaderException extends JsonProcessingException {
private final int lineNumber;
private final int columnNumber;
public JsonReaderException(int line, int column, @NotNull String message) {
super(message);
this.lineNumber = line;
this.columnNumber = column;
}
public JsonReaderException(int line, int column, @NotNull String message, @NotNull Throwable cause) {
super(message, cause);
this.lineNumber = line;
this.columnNumber = column;
}
@Override
public @NotNull String getMessage() {
return super.getMessage() + " at line " + getLineNumber() + ", column " + getColumnNumber();
}
}

View File

@ -0,0 +1,14 @@
package eu.jonahbauer.json.exceptions;
import org.jetbrains.annotations.NotNull;
public class JsonTokenizerException extends JsonReaderException {
public JsonTokenizerException(int line, int column, @NotNull String message) {
super(line, column, message);
}
public JsonTokenizerException(int line, int column, @NotNull String message, @NotNull Throwable cause) {
super(line, column, message, cause);
}
}

View File

@ -0,0 +1,83 @@
package eu.jonahbauer.json.tokenizer;
import eu.jonahbauer.json.tokenizer.token.JsonToken;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
public interface JsonTokenizer extends Iterable<JsonToken> {
/**
* {@return the next token or <code>null</code> if the end of the input has been reached}
* @throws IOException if an I/O error occurs
*/
@Nullable JsonToken next() throws IOException;
/**
* {@return the line at which the previously read token appeared}
*/
int getLineNumber();
/**
* {@return the column at which the previously read token appeared}
*/
int getColumnNumber();
/**
* {@return an iterator over the tokens} The {@link Iterator#next()} and {@link Iterator#hasNext()} methods
* may throw an {@link UncheckedIOException} if an I/O error occurs.
*/
@Override
default @NotNull Iterator<@NotNull JsonToken> iterator() {
class JsonTokenizerIterator implements Iterator<@NotNull JsonToken> {
private @Nullable JsonToken next;
private boolean isValid = false;
private void ensureValid() {
if (isValid) return;
try {
next = JsonTokenizer.this.next();
isValid = true;
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
}
@Override
public boolean hasNext() {
ensureValid();
return next != null;
}
@Override
public @NotNull JsonToken next() {
ensureValid();
if (next == null) {
throw new NoSuchElementException();
}
var token = next;
next = null;
isValid = false;
return token;
}
}
return new JsonTokenizerIterator();
}
/**
* {@return a stream of tokens} When an I/O error occurs, the corresponding {@link IOException} is wrapped in an
* {@link UncheckedIOException} exception.
*/
default @NotNull Stream<@NotNull JsonToken> stream() {
return StreamSupport.stream(this.spliterator(), false);
}
}

View File

@ -0,0 +1,188 @@
package eu.jonahbauer.json.tokenizer;
import eu.jonahbauer.json.exceptions.JsonTokenizerException;
import eu.jonahbauer.json.tokenizer.reader.PushbackReader;
import eu.jonahbauer.json.tokenizer.reader.PushbackReaderImpl;
import eu.jonahbauer.json.tokenizer.token.JsonNull;
import eu.jonahbauer.json.tokenizer.token.JsonPunctuation;
import eu.jonahbauer.json.tokenizer.token.JsonToken;
import lombok.Getter;
import eu.jonahbauer.json.JsonBoolean;
import eu.jonahbauer.json.JsonNumber;
import eu.jonahbauer.json.JsonString;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.io.*;
import java.util.regex.Pattern;
public final class JsonTokenizerImpl implements JsonTokenizer {
private static final Pattern NUMBER_PATTERN = Pattern.compile("-?(?:0|[1-9]\\d*)(?:\\.\\d+)?([eE][+-]?\\d+)?");
private static final JsonPunctuation[] PUNCTUATION;
static {
PUNCTUATION = new JsonPunctuation[128];
for (var punctuation : JsonPunctuation.values()) {
PUNCTUATION[punctuation.value()] = punctuation;
}
}
private final @NotNull PushbackReader reader;
private final int[] buffer = new int[4];
@Getter
private int lineNumber;
@Getter
private int columnNumber;
public JsonTokenizerImpl(@NotNull String json) {
this(new StringReader(json));
}
public JsonTokenizerImpl(@NotNull Reader reader) {
this.reader = new PushbackReaderImpl(reader);
}
@Override
public @Nullable JsonToken next() throws IOException {
int chr;
do {
chr = reader.read();
} while (isWhitespace(chr));
lineNumber = reader.getLineNumber();
columnNumber = reader.getColumnNumber();
if (chr == PushbackReader.EOF) return null;
if (chr < 128 && PUNCTUATION[chr] != null) return PUNCTUATION[chr];
if (chr == '"') return nextString();
reader.pushback();
String token = nextToken();
return switch (token) {
case "true" -> JsonBoolean.TRUE;
case "false" -> JsonBoolean.FALSE;
case "null" -> JsonNull.NULL;
case String number when isNumberLiteral(number) -> {
try {
yield new JsonNumber(Double.parseDouble(number));
} catch (IllegalArgumentException ex) {
throw error("invalid number literal: " + number, ex);
}
}
default -> throw error("invalid token: " + token);
};
}
private boolean isWhitespace(int chr) {
return chr == ' ' || chr == '\t' || chr == '\r' || chr == '\n';
}
private boolean isPunctuation(int chr) {
return chr == '{' || chr == '}' || chr == '[' || chr == ']' || chr == ',' || chr == ':' || chr == '"';
}
private boolean isNumberLiteral(@NotNull String token) {
return NUMBER_PATTERN.matcher(token).matches();
}
private @NotNull String nextToken() throws IOException {
StringBuilder out = new StringBuilder();
while (true) {
int chr = reader.read();
if (isWhitespace(chr) || isPunctuation(chr)) {
reader.pushback();
return out.toString();
} else if (chr == -1) {
return out.toString();
} else {
out.append((char) chr);
}
}
}
private @NotNull JsonString nextString() throws IOException {
var current = new StringBuilder();
while (true) {
int chr = reader.read();
if (chr == '"') {
return new JsonString(current.toString());
} else if (chr == '\\') {
chr = reader.read();
switch (chr) {
case PushbackReader.EOF -> throw error("incomplete escape sequence in string literal");
case '"' -> current.append('"');
case '\\' -> current.append('\\');
case '/' -> current.append('/');
case 'b' -> current.append('\b');
case 'f' -> current.append('\f');
case 'n' -> current.append('\n');
case 'r' -> current.append('\r');
case 't' -> current.append('\t');
case 'u' -> {
buffer[0] = reader.read();
buffer[1] = reader.read();
buffer[2] = reader.read();
buffer[3] = reader.read();
if (buffer[0] < 0 || buffer[1] < 0 || buffer[2] < 0 || buffer[3] < 0) {
throw error("incomplete escape sequence in string literal");
}
int code = 0;
for (int i = 0; i < 4; i++) {
code *= 16;
char esc = (char) buffer[i];
if ('0' <= esc && esc <= '9') {
code += esc - '0';
} else if ('a' <= esc && esc <= 'f') {
code += 11 + (esc - 'a');
} else if ('A' <= esc && esc <= 'F') {
code += 11 + (esc - 'A');
} else {
throw error("invalid character " + toString(buffer[i]) + " in escape sequence");
}
}
current.append((char) code);
}
default -> throw error("invalid character " + toString(chr) + " in escape sequence");
}
} else if (chr == PushbackReader.EOF) {
throw error("unclosed string literal");
} else if (chr < 32) {
throw error("unescaped control character in string literal");
} else {
current.append((char) chr);
}
}
}
private @NotNull JsonTokenizerException error(@NotNull String message) {
return new JsonTokenizerException(getLineNumber(), getColumnNumber(), message);
}
private @NotNull JsonTokenizerException error(@NotNull String message, @NotNull Throwable cause) {
return new JsonTokenizerException(getLineNumber(), getColumnNumber(), message, cause);
}
private static @NotNull String toString(int chr) {
return switch (chr) {
case PushbackReader.EOF -> "EOF";
case '\t' -> "'\\t'";
case '\r' -> "'\\r'";
case '\n' -> "'\\n'";
case '\b' -> "'\\b'";
case '\f' -> "'\\f'";
default -> {
if (Character.isISOControl((char) chr)) {
yield "0x" + Integer.toHexString(chr);
} else {
yield "'" + (char) chr + "'";
}
}
};
}
}

View File

@ -0,0 +1,37 @@
package eu.jonahbauer.json.tokenizer.reader;
import org.jetbrains.annotations.Range;
import java.io.IOException;
public interface PushbackReader extends AutoCloseable {
int EOF = -1;
/**
* Reads the next character.
* @return the next character or {@link #EOF} if the end of the stream has been reached
* @throws IOException if an I/O error occurs
*/
@Range(from = -1, to = Character.MAX_VALUE) int read() throws IOException;
/**
* Pushes the reader back, making {@link #read()} return the same character as before.
* @throws IllegalStateException when called before the first read or more than once after each read
*/
void pushback();
/**
* {@return the line number of the current character}
* @throws IllegalStateException when called before the first read
*/
@Range(from = 1, to = Integer.MAX_VALUE) int getLineNumber();
/**
* {@return the column number of the current character}
* @throws IllegalStateException when called before the first read
*/
@Range(from = 1, to = Integer.MAX_VALUE) int getColumnNumber();
@Override
void close() throws IOException;
}

View File

@ -0,0 +1,72 @@
package eu.jonahbauer.json.tokenizer.reader;
import org.jetbrains.annotations.NotNull;
import java.io.IOException;
import java.io.Reader;
import java.util.Objects;
public class PushbackReaderImpl implements PushbackReader {
private final @NotNull Reader reader;
private int current = Integer.MIN_VALUE;
private int lineNumber = 1;
private int columnNumber = 0; // column number will be incremented when a character is read
private boolean pushback;
public PushbackReaderImpl(@NotNull Reader reader) {
this.reader = Objects.requireNonNull(reader, "reader");
}
@Override
public int getLineNumber() {
if (current == Integer.MIN_VALUE) {
throw new IllegalStateException("No character has been read so far.");
}
return lineNumber;
}
@Override
public int getColumnNumber() {
if (current == Integer.MIN_VALUE) {
throw new IllegalStateException("No character has been read so far.");
}
return columnNumber;
}
@Override
public void pushback() {
if (current == Integer.MIN_VALUE) {
throw new IllegalStateException("No character has been read so far.");
} else if (pushback) {
throw new IllegalStateException("Cannot push back more than one character at a time.");
}
pushback = true;
}
@Override
public int read() throws IOException {
if (pushback) {
pushback = false;
return current;
}
int result = reader.read();
if (current == '\n' && result != '\n' || current == '\r' && result != '\n' && result != '\r') {
lineNumber++;
columnNumber = 1;
} else {
columnNumber++;
}
current = result;
return result;
}
@Override
public void close() throws IOException {
reader.close();
}
}

View File

@ -0,0 +1,16 @@
package eu.jonahbauer.json.tokenizer.token;
import org.jetbrains.annotations.NotNull;
/**
* The JSON token {@code null}.
*/
@SuppressWarnings("java:S6548")
public enum JsonNull implements JsonToken {
NULL;
@Override
public @NotNull String toString() {
return "null";
}
}

View File

@ -0,0 +1,26 @@
package eu.jonahbauer.json.tokenizer.token;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.experimental.Accessors;
import org.jetbrains.annotations.NotNull;
@Getter
@Accessors(fluent = true)
@RequiredArgsConstructor
public enum JsonPunctuation implements JsonToken {
BEGIN_OBJECT('{'),
END_OBJECT('}'),
BEGIN_ARRAY('['),
END_ARRAY(']'),
VALUE_SEPARATOR(','),
NAME_SEPARATOR(':'),
;
private final char value;
@Override
public @NotNull String toString() {
return name() + "(" + value + ")";
}
}

View File

@ -0,0 +1,10 @@
package eu.jonahbauer.json.tokenizer.token;
import eu.jonahbauer.json.JsonBoolean;
import eu.jonahbauer.json.JsonNumber;
import eu.jonahbauer.json.JsonString;
/**
* Represents a JSON token.
*/
public sealed interface JsonToken permits JsonBoolean, JsonNull, JsonNumber, JsonString, JsonPunctuation { }

View File

@ -4,4 +4,6 @@ module eu.jonahbauer.json {
exports eu.jonahbauer.json; exports eu.jonahbauer.json;
exports eu.jonahbauer.json.exceptions; exports eu.jonahbauer.json.exceptions;
exports eu.jonahbauer.json.tokenizer;
exports eu.jonahbauer.json.tokenizer.token;
} }

View File

@ -0,0 +1,123 @@
package eu.jonahbauer.json.tokenizer;
import eu.jonahbauer.json.JsonBoolean;
import eu.jonahbauer.json.JsonNumber;
import eu.jonahbauer.json.JsonString;
import eu.jonahbauer.json.exceptions.JsonTokenizerException;
import eu.jonahbauer.json.tokenizer.token.JsonNull;
import eu.jonahbauer.json.tokenizer.token.JsonPunctuation;
import eu.jonahbauer.json.tokenizer.token.JsonToken;
import org.jetbrains.annotations.NotNull;
import org.junit.jupiter.api.Test;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
class JsonTokenizerImplTest {
@Test
void simpleTokens() {
test("{[]},:", JsonPunctuation.BEGIN_OBJECT, JsonPunctuation.BEGIN_ARRAY, JsonPunctuation.END_ARRAY, JsonPunctuation.END_OBJECT, JsonPunctuation.VALUE_SEPARATOR, JsonPunctuation.NAME_SEPARATOR);
}
@Test
void literals() {
test("true", JsonBoolean.TRUE);
test("false", JsonBoolean.FALSE);
test("null", JsonNull.NULL);
test("null true false", JsonNull.NULL, JsonBoolean.TRUE, JsonBoolean.FALSE);
}
@Test
void misspelledLiterals() {
assertThrows(JsonTokenizerException.class, () -> test("tru"));
assertThrows(JsonTokenizerException.class, () -> test("fal"));
assertThrows(JsonTokenizerException.class, () -> test("nu"));
assertThrows(JsonTokenizerException.class, () -> test("TRUE"));
assertThrows(JsonTokenizerException.class, () -> test("FALSE"));
assertThrows(JsonTokenizerException.class, () -> test("NULL"));
}
@Test
void stringWithoutEscapes() {
test("\"foobar\"\"baz\"", new JsonString("foobar"), new JsonString("baz"));
}
@Test
void stringWithSimpleEscapes() {
test("\"\\b\\t\\f\\r\\n\\/\\\\\\\"\"", new JsonString("\b\t\f\r\n/\\\""));
}
@Test
void stringWithUnicodeEscapes() {
test("\"\\u0041\\u0042\\u0043\"", new JsonString("ABC"));
}
@Test
void stringWithInvalidEscapeSequence() {
assertThrows(JsonTokenizerException.class, () -> test("\"\\x\""));
assertThrows(JsonTokenizerException.class, () -> test("\"\\uxxxx\""));
assertThrows(JsonTokenizerException.class, () -> test("\"\\uaa\""));
}
@Test
void stringWithControlCharacters() {
assertThrows(JsonTokenizerException.class, () -> test("\"\u0010\""));
}
@Test
void stringWithoutTrailingQuotes() {
assertThrows(JsonTokenizerException.class, () -> test("\"hello world"));
}
@Test
void number() {
test("-0", new JsonNumber(-0.0));
test("0", new JsonNumber(0.0));
test("123", new JsonNumber(123.0));
test("123.456", new JsonNumber(123.456));
test("-123", new JsonNumber(-123.0));
test("-123.456", new JsonNumber(-123.456));
test("1E10", new JsonNumber(1E10));
test("1E+10", new JsonNumber(1E+10));
test("1E-10", new JsonNumber(1E-10));
test("123.456E10", new JsonNumber(123.456E10));
test("123.456E+10", new JsonNumber(123.456E+10));
test("123.456E-10", new JsonNumber(123.456E-10));
}
@Test
void numberWithLeadingZero() {
assertThrows(JsonTokenizerException.class, () -> test("00"));
assertThrows(JsonTokenizerException.class, () -> test("-00"));
}
@Test
void numberWithEmptyIntegralPart() {
assertThrows(JsonTokenizerException.class, () -> test(".0"));
}
@Test
void numberWithEmptyFractionPart() {
assertThrows(JsonTokenizerException.class, () -> test("0."));
}
@Test
void numberWithEmptyExponentPart() {
assertThrows(JsonTokenizerException.class, () -> test("0.0E"));
}
@Test
void numberWithGrouping() {
assertThrows(JsonTokenizerException.class, () -> test("1_234"));
test("1,234", new JsonNumber(1), JsonPunctuation.VALUE_SEPARATOR, new JsonNumber(234));
}
private void test(@NotNull String json, @NotNull JsonToken @NotNull... expected) {
var tokenizer = new JsonTokenizerImpl(json);
var actual = tokenizer.stream().toList();
assertEquals(List.of(expected), actual);
}
}

View File

@ -1,7 +1,11 @@
[versions] [versions]
annotations = "24.1.0" annotations = "24.1.0"
junit = "5.12.1"
junit-launcher = "1.12.1"
lombok = "1.18.32" lombok = "1.18.32"
[libraries] [libraries]
annotations = { module = "org.jetbrains:annotations", version.ref = "annotations" } annotations = { module = "org.jetbrains:annotations", version.ref = "annotations" }
junit-jupiter = { module = "org.junit.jupiter:junit-jupiter", version.ref = "junit" }
junit-platform-launcher = { module = "org.junit.platform:junit-platform-launcher", version.ref = "junit-launcher" }
lombok = { module = "org.projectlombok:lombok", version.ref = "lombok" } lombok = { module = "org.projectlombok:lombok", version.ref = "lombok" }