Compare commits

...

12 Commits

Author SHA1 Message Date
wenshao
f6cf89ca1f Merge branch 'main' into swar_20250120 2025-01-22 20:23:34 +08:00
wenshao
1d2b7e0b50 bug fix 2025-01-22 19:46:55 +08:00
wenshao
0051da7ce4 add benchmark 2025-01-22 19:45:33 +08:00
wenshao
9f9a01760e remove unused code 2025-01-22 19:30:36 +08:00
wenshao
0451d5c1c0 optimize isASCII 2025-01-22 17:35:58 +08:00
wenshao
e4454ef5c6 add IOUtilsBench 2025-01-22 14:36:20 +08:00
wenshao
036a3681c4 fix build error 2025-01-22 01:56:55 +08:00
wenshao
bd3e4e633e fix build error 2025-01-22 01:23:50 +08:00
wenshao
686e67e6c6 codestyle 2025-01-22 01:06:40 +08:00
wenshao
4a988dbdd6 isASCIIChar 2025-01-21 17:59:28 +08:00
wenshao
8bdd208d1e swar 2025-01-21 00:57:39 +08:00
wenshao
72f2ad09bb isASCII 2025-01-21 00:12:55 +08:00
10 changed files with 232 additions and 86 deletions

View File

@ -22,11 +22,6 @@
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2-codegen</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2-extension</artifactId>

View File

@ -1,5 +1,6 @@
package com.alibaba.fastjson2.benchmark;
import com.alibaba.fastjson2.JSONException;
import com.alibaba.fastjson2.benchmark.eishay.EishayParseBinaryArrayMapping;
import com.alibaba.fastjson2.util.JDKUtils;
import org.apache.commons.io.IOUtils;
@ -11,46 +12,105 @@ import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.io.InputStream;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.util.concurrent.TimeUnit;
import static com.alibaba.fastjson2.util.JDKUtils.ARRAY_BYTE_BASE_OFFSET;
import static com.alibaba.fastjson2.util.JDKUtils.UNSAFE;
public class BytesAsciiCheck {
static byte[] bytes;
static char[] chars;
static String str;
static final MethodHandle INDEX_OF_CHAR;
static {
MethodHandle indexOfChar = null;
try {
try {
Class<?> cStringLatin1 = Class.forName("java.lang.StringLatin1");
MethodHandles.Lookup lookup = JDKUtils.trustedLookup(cStringLatin1);
indexOfChar = lookup.findStatic(
cStringLatin1,
"indexOfChar",
MethodType.methodType(int.class, byte[].class, int.class, int.class, int.class));
} catch (Throwable ignored) {
// ignore
}
} catch (Exception e) {
e.printStackTrace();
}
INDEX_OF_CHAR = indexOfChar;
try {
InputStream is = EishayParseBinaryArrayMapping.class.getClassLoader().getResourceAsStream("data/eishay.json");
String str = IOUtils.toString(is, "UTF-8");
str = IOUtils.toString(is, "UTF-8");
bytes = str.getBytes();
chars = str.toCharArray();
} catch (Exception e) {
e.printStackTrace();
}
}
@Benchmark
// @Benchmark
public void handler(Blackhole bh) throws Throwable {
bh.consume(
JDKUtils.METHOD_HANDLE_HAS_NEGATIVE.invoke(bytes, 0, bytes.length)
);
}
@Benchmark
// @Benchmark
public void lambda(Blackhole bh) throws Throwable {
bh.consume(
JDKUtils.PREDICATE_IS_ASCII.test(bytes)
);
}
@Benchmark
// @Benchmark
public void direct(Blackhole bh) throws Throwable {
bh.consume(hasNegatives(bytes, 0, bytes.length));
}
// @Benchmark
public void isASCII(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length));
}
@Benchmark
public void direct8(Blackhole bh) throws Throwable {
bh.consume(hasNegatives_8(bytes, 0, bytes.length));
public void isLatin1(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.isLatin1(chars, 0, chars.length));
}
@Benchmark
public void isASCIIJDK(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.JDKUtils.PREDICATE_IS_ASCII.test(bytes));
}
@Benchmark
public void indexOfSlash(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlash(bytes, 0, bytes.length));
}
@Benchmark
public void indexOfSlashV(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlashV(bytes, 0, bytes.length));
}
@Benchmark
public void indexOfChar(Blackhole bh) throws Throwable {
bh.consume(indexOfChar(bytes, '\'', 0, bytes.length));
}
@Benchmark
public void indexOfString(Blackhole bh) throws Throwable {
bh.consume(str.indexOf('\\'));
}
private static int indexOfChar(byte[] bytes, int ch, int fromIndex, int toIndex) {
try {
return (int) INDEX_OF_CHAR.invokeExact(bytes, ch, fromIndex, toIndex);
} catch (Throwable ignored) {
throw new JSONException("");
}
}
public static boolean hasNegatives(byte[] ba, int off, int len) {
@ -62,29 +122,13 @@ public class BytesAsciiCheck {
return false;
}
public static boolean hasNegatives_8(byte[] bytes, int off, int len) {
int i = off;
while (i + 8 <= off + len) {
if ((UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + i) & 0x8080808080808080L) != 0) {
return true;
}
i += 8;
}
for (; i < off + len; i++) {
if (bytes[i] < 0) {
return true;
}
}
return false;
}
public static void main(String[] args) throws Exception {
Options options = new OptionsBuilder()
.include(BytesAsciiCheck.class.getName())
.mode(Mode.Throughput)
.timeUnit(TimeUnit.MILLISECONDS)
.warmupIterations(3)
.threads(1)
.forks(1)
.build();
new Runner(options).run();

View File

@ -0,0 +1,45 @@
package com.alibaba.fastjson2.benchmark.wast;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.util.concurrent.TimeUnit;
public class IOUtilsBench {
static byte[] bytes;
static char[] chars;
static String str;
static {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < 1000; i++) {
buf.append(12345678);
}
str = buf.toString();
bytes = str.getBytes();
chars = str.toCharArray();
}
@Benchmark
public void digit4(Blackhole bh) throws Throwable {
for (int i = 0; i < 1000; i += 8) {
bh.consume(com.alibaba.fastjson2.util.IOUtils.digit4(bytes, 0));
}
}
public static void main(String[] args) throws Exception {
Options options = new OptionsBuilder()
.include(IOUtilsBench.class.getName())
.mode(Mode.Throughput)
.timeUnit(TimeUnit.MILLISECONDS)
.warmupIterations(3)
.threads(1)
.forks(1)
.build();
new Runner(options).run();
}
}

View File

@ -34,6 +34,19 @@ public class BytesAsciiCheckTest {
}
}
public static void isASCII() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < LOOP_COUNT; ++i) {
benchmark.isASCII(BH);
}
long millis = System.currentTimeMillis() - start;
System.out.println("BytesAsciiCheck-isASCII : " + millis);
// zulu17.40.19 : 118
}
}
public static void direct() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
@ -47,14 +60,14 @@ public class BytesAsciiCheckTest {
}
}
public static void direct8() throws Throwable {
public static void isLatin1() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < LOOP_COUNT; ++i) {
benchmark.direct8(BH);
benchmark.isLatin1(BH);
}
long millis = System.currentTimeMillis() - start;
System.out.println("BytesAsciiCheck-direct8 : " + millis);
System.out.println("BytesAsciiCheck-isASCII_chars : " + millis);
// zulu17.40.19 : 478
}
@ -75,6 +88,7 @@ public class BytesAsciiCheckTest {
// handler();
// lambda();
// direct();
// direct8();
isLatin1();
// isASCII();
}
}

View File

@ -477,7 +477,7 @@ final class JSONWriterJSONB
off = this.off;
} else {
ascii = isASCII(chars, coff, strlen);
ascii = isLatin1(chars, coff, strlen);
}
int minCapacity = (ascii ? strlen : strlen * 3) + off + 6;

View File

@ -118,7 +118,7 @@ public class IOUtils {
DIGITS_K_64[i] = c0 + v;
}
ZERO_DOT_LATIN1 = UNSAFE.getShort(new byte[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET);
ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET);
ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_CHAR_BASE_OFFSET);
}
public static void writeDigitPair(byte[] buf, int charPos, int value) {
@ -1622,20 +1622,45 @@ public class IOUtils {
}
public static int indexOfQuote(byte[] value, int quote, int fromIndex, int max) {
if (INDEX_OF_CHAR_LATIN1 == null) {
return indexOfQuote0(value, quote, fromIndex, max);
}
try {
return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, quote, fromIndex, max);
} catch (Throwable e) {
throw new JSONException(e.getMessage());
}
}
static int indexOfQuote0(byte[] value, int quote, int fromIndex, int max) {
int i = fromIndex;
long address = ARRAY_BYTE_BASE_OFFSET + fromIndex;
int upperBound = fromIndex + ((max - fromIndex) & ~7);
long vectorQuote = quote == '\'' ? 0x2727_2727_2727_2727L : 0x2222_2222_2222_2222L;
while (i < upperBound && notContains(getLongLE(value, i), vectorQuote)) {
while (i < upperBound && notContains(UNSAFE.getLong(value, address), vectorQuote)) {
i += 8;
address += 8;
}
return indexOfChar0(value, quote, i, max);
}
public static int indexOfSlash(byte[] value, int fromIndex, int max) {
if (INDEX_OF_CHAR_LATIN1 == null) {
return indexOfSlashV(value, fromIndex, max);
}
try {
return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, (int) '\\', fromIndex, max);
} catch (Throwable e) {
throw new JSONException(e.getMessage());
}
}
public static int indexOfSlashV(byte[] value, int fromIndex, int max) {
int i = fromIndex;
long address = ARRAY_BYTE_BASE_OFFSET + fromIndex;
int upperBound = fromIndex + ((max - fromIndex) & ~7);
while (i < upperBound && notContains(getLongLE(value, i), 0x5C5C5C5C5C5C5C5CL)) {
while (i < upperBound && notContains(UNSAFE.getLong(value, address), 0x5C5C5C5C5C5C5C5CL)) {
i += 8;
address += 8;
}
return indexOfChar0(value, '\\', i, max);
}
@ -1710,7 +1735,7 @@ public class IOUtils {
}
public static int getIntUnaligned(char[] bytes, int offset) {
return UNSAFE.getInt(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1));
return UNSAFE.getInt(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1));
}
public static long getLongBE(byte[] bytes, int offset) {
@ -1723,7 +1748,7 @@ public class IOUtils {
}
public static long getLongUnaligned(char[] bytes, int offset) {
return UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1));
return UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1));
}
public static long getLongLE(byte[] bytes, int offset) {
@ -1733,7 +1758,7 @@ public class IOUtils {
public static long getLongLE(char[] bytes, int offset) {
return convEndian(false,
UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1)));
UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1)));
}
public static short hex2(int i) {
@ -1833,19 +1858,36 @@ public class IOUtils {
return big == BIG_ENDIAN ? n : Short.reverseBytes(n);
}
public static boolean isASCII(char[] chars, int coff, int strlen) {
int i = coff;
for (int upperBound = coff + (strlen & ~3); i < upperBound; i += 4) {
if ((getLongLE(chars, i) & 0xFF00FF00FF00FF00L) != 0) {
return false;
}
public static boolean isLatin1(char[] chars, int off, int len) {
int upperBound = off + (len & ~7);
int end = off + len;
long address = ARRAY_CHAR_BASE_OFFSET + off;
long value = 0;
while (off < upperBound) {
value |= UNSAFE.getLong(chars, address) | UNSAFE.getLong(chars, address + 8);
address += 16;
off += 8;
}
while (off++ < end) {
value |= UNSAFE.getShort(chars, address);
address += 2;
}
return (convEndian(false, value) & 0xFF00FF00FF00FF00L) == 0;
}
for (; i < strlen; ++i) {
if (chars[i] > 0x00FF) {
return false;
}
public static boolean isASCII(byte[] bytes, int off, int len) {
int upperBound = off + (len & ~7);
int end = off + len;
long address = ARRAY_BYTE_BASE_OFFSET + off;
long value = 0;
while (off < upperBound) {
value |= UNSAFE.getLong(bytes, address);
address += 8;
off += 8;
}
return true;
while (off < end) {
value |= bytes[off++];
}
return (value & 0x8080808080808080L) == 0;
}
}

View File

@ -59,6 +59,7 @@ public class JDKUtils {
public static final MethodHandle METHOD_HANDLE_HAS_NEGATIVE;
public static final Predicate<byte[]> PREDICATE_IS_ASCII;
public static final MethodHandle INDEX_OF_CHAR_LATIN1;
static final MethodHandles.Lookup IMPL_LOOKUP;
static volatile MethodHandle CONSTRUCTOR_LOOKUP;
@ -340,6 +341,21 @@ public class JDKUtils {
METHOD_HANDLE_HAS_NEGATIVE = handle;
}
MethodHandle indexOfCharLatin1 = null;
if (JVM_VERSION > 9) {
try {
Class<?> cStringLatin1 = Class.forName("java.lang.StringLatin1");
MethodHandles.Lookup lookup = trustedLookup(cStringLatin1);
indexOfCharLatin1 = lookup.findStatic(
cStringLatin1,
"indexOfChar",
MethodType.methodType(int.class, byte[].class, int.class, int.class, int.class));
} catch (Throwable ignored) {
// ignore
}
}
INDEX_OF_CHAR_LATIN1 = indexOfCharLatin1;
Boolean compact_strings = null;
try {
if (JVM_VERSION == 8) {
@ -510,20 +526,7 @@ public class JDKUtils {
return STRING_CREATOR_JDK8.apply(chars, Boolean.TRUE);
}
public static boolean isASCII(byte[] chars) {
int i = 0;
int strlen = chars.length;
for (int upperBound = (strlen & ~7); i < upperBound; i += 8) {
if ((UNSAFE.getLong(chars, ARRAY_BYTE_BASE_OFFSET + i) & 0x8080808080808080L) != 0) {
return false;
}
}
for (; i < strlen; ++i) {
if (UNSAFE.getByte(chars, ARRAY_BYTE_BASE_OFFSET + i) < 0) {
return false;
}
}
return true;
static boolean isASCII(byte[] chars) {
return IOUtils.isASCII(chars, 0, chars.length);
}
}

View File

@ -427,7 +427,7 @@ public class IOUtilsTest {
public void test_isASCII() {
char[] chars = new char[] {'0', '1', '2', '3', '4', '5', '6', 0x80};
long v = UNSAFE.getLong(chars, ARRAY_CHAR_BASE_OFFSET);
assertTrue(IOUtils.isASCII(chars, 0, 4));
assertTrue(IOUtils.isASCII(chars, 4, 4));
assertTrue(IOUtils.isLatin1(chars, 0, 4));
assertTrue(IOUtils.isLatin1(chars, 4, 4));
}
}

View File

@ -4,11 +4,11 @@ import com.alibaba.fastjson2.function.ToByteFunction;
import org.junit.jupiter.api.Test;
import java.lang.invoke.*;
import java.util.Arrays;
import java.util.function.ToIntFunction;
import static com.alibaba.fastjson2.util.JDKUtils.*;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.*;
public class JDKUtilsTest {
@Test
@ -135,6 +135,13 @@ public class JDKUtilsTest {
assertNotNull(func);
}
@Test
public void test_isASCII() {
byte[] bytes = new byte[127];
Arrays.fill(bytes, (byte) 'a');
assertTrue(isASCII(bytes));
}
private static class PrivateBeanInt {
private byte coder;

24
pom.xml
View File

@ -62,28 +62,14 @@
</properties>
<modules>
<!--
<module>adapter</module>
-->
<module>benchmark</module>
<module>codegen</module>
<module>codegen-test</module>
<module>core</module>
<!--
<module>example-graalvm-native</module>
-->
<module>example-solon-test</module>
<module>example-spring-test</module>
<!--
<module>example-spring6-test</module>
-->
<module>extension</module>
<module>extension-jaxrs</module>
<module>extension-solon</module>
<module>extension-spring5</module>
<!--
<module>extension-spring6</module>
-->
<module>fastjson1-compatible</module>
<module>kotlin</module>
<module>safemode-test</module>
@ -1033,6 +1019,16 @@
<module>test-jdk17</module>
</modules>
</profile>
<profile>
<id>enable-codegen</id>
<activation>
<jdk>(,22]</jdk>
</activation>
<modules>
<module>codegen</module>
<module>codegen-test</module>
</modules>
</profile>
<profile>
<id>deploy-settings</id>
<activation>