1、优化正则表达式
2、正则表达式无法优化的话,可以考虑限制匹配次数,或者限制匹配时间
public class CountedCharSequence implements CharSequence {
private final CharSequence charSequence;
private long count;
public CountedCharSequence(CharSequence charSequence, long count) {
this.charSequence = charSequence;
this.count = count;
}
@Override
public String toString() {
return charSequence.toString();
}
@Override
public int length() {
return charSequence.length();
}
@Override
public char charAt(int index) {
if (index <= 0) {
throw new IllegalArgumentException("Regex match over max " + count + " times");
}
count--;
return charSequence.charAt(index);
}
@Override
public CharSequence subSequence(int start, int end) {
return charSequence.subSequence(start, end);
}
}
String str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
long start = System.nanoTime();
Pattern.compile("^(a+)+$").matcher(str).matches();
System.out.println(((System.nanoTime() - start)) + " nano seconds");
执行用时
1399200 nano seconds
long start1 = System.nanoTime();
try {
Pattern.compile("^(a+)+$").matcher(new CountedCharSequence(str, 100_000_000)).matches();
} catch (Exception e) {
System.out.println(e.getMessage());
}
System.out.println(((System.nanoTime() - start1)) + " nano seconds");
执行用时
Regex match over max 100000000 times
601800 nano seconds
import java.util.regex.Pattern;
public final class TimedCharSequence implements CharSequence {
private final CharSequence sequence;
private final long timestamp;
public TimedCharSequence(CharSequence sequence, long nanoseconds) {
this.sequence = sequence;
this.timestamp = System.nanoTime() + nanoseconds;
}
@Override
public String toString() {
return sequence.toString();
}
@Override
public int length() {
return sequence.length();
}
@Override
public char charAt(int index) {
if (timestamp < System.nanoTime()) {
throw new IllegalStateException("Regex match timeout");
}
return sequence.charAt(index);
}
@Override
public CharSequence subSequence(int start, int end) {
return sequence.subSequence(start, end);
}
}
String str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
long start = System.nanoTime();
Pattern.compile("^(a+)+$").matcher(str).matches();
System.out.println(((System.nanoTime() - start)) + " nano seconds");
执行用时:
1385400 nano seconds
long start1 = System.nanoTime();
try {
Pattern.compile("^(a+)+$").matcher(new TimedCharSequence(str, 10_000)).matches();
} catch (Exception e) {
System.out.println(e.getMessage());
}
System.out.println(((System.nanoTime() - start1)) + " nano seconds");
执行用时:
Regex match timeout
123100 nano seconds
[Ref] 限制正则表达式执行时间