From 28b9f93d13fd952ec94397b21333c4367a959d08 Mon Sep 17 00:00:00 2001 From: agnostic-apollo Date: Wed, 30 Jun 2021 03:18:44 +0500 Subject: [PATCH] Compile Url match regex once and not on every use Needed for #2146. --- .../terminal/TermuxTerminalViewClient.java | 3 +- .../com/termux/app/TermuxActivityTest.java | 4 +- .../com/termux/shared/data/DataUtils.java | 88 --------------- .../java/com/termux/shared/data/UrlUtils.java | 103 ++++++++++++++++++ 4 files changed, 107 insertions(+), 91 deletions(-) create mode 100644 termux-shared/src/main/java/com/termux/shared/data/UrlUtils.java diff --git a/app/src/main/java/com/termux/app/terminal/TermuxTerminalViewClient.java b/app/src/main/java/com/termux/app/terminal/TermuxTerminalViewClient.java index 32bc48b1..08201b57 100644 --- a/app/src/main/java/com/termux/app/terminal/TermuxTerminalViewClient.java +++ b/app/src/main/java/com/termux/app/terminal/TermuxTerminalViewClient.java @@ -21,6 +21,7 @@ import android.widget.Toast; import com.termux.R; import com.termux.app.TermuxActivity; +import com.termux.shared.data.UrlUtils; import com.termux.shared.shell.ShellUtils; import com.termux.shared.terminal.TermuxTerminalViewClientBase; import com.termux.shared.termux.TermuxConstants; @@ -603,7 +604,7 @@ public class TermuxTerminalViewClient extends TermuxTerminalViewClientBase { String text = ShellUtils.getTerminalSessionTranscriptText(session, true, true); - LinkedHashSet urlSet = DataUtils.extractUrls(text); + LinkedHashSet urlSet = UrlUtils.extractUrls(text); if (urlSet.isEmpty()) { new AlertDialog.Builder(mActivity).setMessage(R.string.title_select_url_none_found).show(); return; diff --git a/app/src/test/java/com/termux/app/TermuxActivityTest.java b/app/src/test/java/com/termux/app/TermuxActivityTest.java index 090d5cce..27613829 100644 --- a/app/src/test/java/com/termux/app/TermuxActivityTest.java +++ b/app/src/test/java/com/termux/app/TermuxActivityTest.java @@ -1,6 +1,6 @@ package com.termux.app; -import com.termux.shared.data.DataUtils; +import com.termux.shared.data.UrlUtils; import org.junit.Assert; import org.junit.Test; @@ -13,7 +13,7 @@ public class TermuxActivityTest { private void assertUrlsAre(String text, String... urls) { LinkedHashSet expected = new LinkedHashSet<>(); Collections.addAll(expected, urls); - Assert.assertEquals(expected, DataUtils.extractUrls(text)); + Assert.assertEquals(expected, UrlUtils.extractUrls(text)); } @Test diff --git a/termux-shared/src/main/java/com/termux/shared/data/DataUtils.java b/termux-shared/src/main/java/com/termux/shared/data/DataUtils.java index 368755c5..60bb9fca 100644 --- a/termux-shared/src/main/java/com/termux/shared/data/DataUtils.java +++ b/termux-shared/src/main/java/com/termux/shared/data/DataUtils.java @@ -165,92 +165,4 @@ public class DataUtils { return string == null || string.isEmpty(); } - - public static LinkedHashSet extractUrls(String text) { - - StringBuilder regex_sb = new StringBuilder(); - - regex_sb.append("("); // Begin first matching group. - regex_sb.append("(?:"); // Begin scheme group. - regex_sb.append("dav|"); // The DAV proto. - regex_sb.append("dict|"); // The DICT proto. - regex_sb.append("dns|"); // The DNS proto. - regex_sb.append("file|"); // File path. - regex_sb.append("finger|"); // The Finger proto. - regex_sb.append("ftp(?:s?)|"); // The FTP proto. - regex_sb.append("git|"); // The Git proto. - regex_sb.append("gopher|"); // The Gopher proto. - regex_sb.append("http(?:s?)|"); // The HTTP proto. - regex_sb.append("imap(?:s?)|"); // The IMAP proto. - regex_sb.append("irc(?:[6s]?)|"); // The IRC proto. - regex_sb.append("ip[fn]s|"); // The IPFS proto. - regex_sb.append("ldap(?:s?)|"); // The LDAP proto. - regex_sb.append("pop3(?:s?)|"); // The POP3 proto. - regex_sb.append("redis(?:s?)|"); // The Redis proto. - regex_sb.append("rsync|"); // The Rsync proto. - regex_sb.append("rtsp(?:[su]?)|"); // The RTSP proto. - regex_sb.append("sftp|"); // The SFTP proto. - regex_sb.append("smb(?:s?)|"); // The SAMBA proto. - regex_sb.append("smtp(?:s?)|"); // The SMTP proto. - regex_sb.append("svn(?:(?:\\+ssh)?)|"); // The Subversion proto. - regex_sb.append("tcp|"); // The TCP proto. - regex_sb.append("telnet|"); // The Telnet proto. - regex_sb.append("tftp|"); // The TFTP proto. - regex_sb.append("udp|"); // The UDP proto. - regex_sb.append("vnc|"); // The VNC proto. - regex_sb.append("ws(?:s?)"); // The Websocket proto. - regex_sb.append(")://"); // End scheme group. - regex_sb.append(")"); // End first matching group. - - - // Begin second matching group. - regex_sb.append("("); - - // User name and/or password in format 'user:pass@'. - regex_sb.append("(?:\\S+(?::\\S*)?@)?"); - - // Begin host group. - regex_sb.append("(?:"); - - // IP address (from http://www.regular-expressions.info/examples.html). - regex_sb.append("(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|"); - - // Host name or domain. - regex_sb.append("(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))?|"); - - // Just path. Used in case of 'file://' scheme. - regex_sb.append("/(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)"); - - // End host group. - regex_sb.append(")"); - - // Port number. - regex_sb.append("(?::\\d{1,5})?"); - - // Resource path with optional query string. - regex_sb.append("(?:/[a-zA-Z0-9:@%\\-._~!$&()*+,;=?/]*)?"); - - // Fragment. - regex_sb.append("(?:#[a-zA-Z0-9:@%\\-._~!$&()*+,;=?/]*)?"); - - // End second matching group. - regex_sb.append(")"); - - final Pattern urlPattern = Pattern.compile( - regex_sb.toString(), - Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); - - LinkedHashSet urlSet = new LinkedHashSet<>(); - Matcher matcher = urlPattern.matcher(text); - - while (matcher.find()) { - int matchStart = matcher.start(1); - int matchEnd = matcher.end(); - String url = text.substring(matchStart, matchEnd); - urlSet.add(url); - } - - return urlSet; - } - } diff --git a/termux-shared/src/main/java/com/termux/shared/data/UrlUtils.java b/termux-shared/src/main/java/com/termux/shared/data/UrlUtils.java new file mode 100644 index 00000000..8b1b0939 --- /dev/null +++ b/termux-shared/src/main/java/com/termux/shared/data/UrlUtils.java @@ -0,0 +1,103 @@ +package com.termux.shared.data; + +import java.util.LinkedHashSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class UrlUtils { + + public static Pattern URL_MATCH_REGEX; + + public static Pattern getUrlMatchRegex() { + if (URL_MATCH_REGEX != null) return URL_MATCH_REGEX; + + StringBuilder regex_sb = new StringBuilder(); + + regex_sb.append("("); // Begin first matching group. + regex_sb.append("(?:"); // Begin scheme group. + regex_sb.append("dav|"); // The DAV proto. + regex_sb.append("dict|"); // The DICT proto. + regex_sb.append("dns|"); // The DNS proto. + regex_sb.append("file|"); // File path. + regex_sb.append("finger|"); // The Finger proto. + regex_sb.append("ftp(?:s?)|"); // The FTP proto. + regex_sb.append("git|"); // The Git proto. + regex_sb.append("gopher|"); // The Gopher proto. + regex_sb.append("http(?:s?)|"); // The HTTP proto. + regex_sb.append("imap(?:s?)|"); // The IMAP proto. + regex_sb.append("irc(?:[6s]?)|"); // The IRC proto. + regex_sb.append("ip[fn]s|"); // The IPFS proto. + regex_sb.append("ldap(?:s?)|"); // The LDAP proto. + regex_sb.append("pop3(?:s?)|"); // The POP3 proto. + regex_sb.append("redis(?:s?)|"); // The Redis proto. + regex_sb.append("rsync|"); // The Rsync proto. + regex_sb.append("rtsp(?:[su]?)|"); // The RTSP proto. + regex_sb.append("sftp|"); // The SFTP proto. + regex_sb.append("smb(?:s?)|"); // The SAMBA proto. + regex_sb.append("smtp(?:s?)|"); // The SMTP proto. + regex_sb.append("svn(?:(?:\\+ssh)?)|"); // The Subversion proto. + regex_sb.append("tcp|"); // The TCP proto. + regex_sb.append("telnet|"); // The Telnet proto. + regex_sb.append("tftp|"); // The TFTP proto. + regex_sb.append("udp|"); // The UDP proto. + regex_sb.append("vnc|"); // The VNC proto. + regex_sb.append("ws(?:s?)"); // The Websocket proto. + regex_sb.append(")://"); // End scheme group. + regex_sb.append(")"); // End first matching group. + + + // Begin second matching group. + regex_sb.append("("); + + // User name and/or password in format 'user:pass@'. + regex_sb.append("(?:\\S+(?::\\S*)?@)?"); + + // Begin host group. + regex_sb.append("(?:"); + + // IP address (from http://www.regular-expressions.info/examples.html). + regex_sb.append("(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|"); + + // Host name or domain. + regex_sb.append("(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))?|"); + + // Just path. Used in case of 'file://' scheme. + regex_sb.append("/(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)"); + + // End host group. + regex_sb.append(")"); + + // Port number. + regex_sb.append("(?::\\d{1,5})?"); + + // Resource path with optional query string. + regex_sb.append("(?:/[a-zA-Z0-9:@%\\-._~!$&()*+,;=?/]*)?"); + + // Fragment. + regex_sb.append("(?:#[a-zA-Z0-9:@%\\-._~!$&()*+,;=?/]*)?"); + + // End second matching group. + regex_sb.append(")"); + + URL_MATCH_REGEX = Pattern.compile( + regex_sb.toString(), + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + + return URL_MATCH_REGEX; + } + + public static LinkedHashSet extractUrls(String text) { + LinkedHashSet urlSet = new LinkedHashSet<>(); + Matcher matcher = getUrlMatchRegex().matcher(text); + + while (matcher.find()) { + int matchStart = matcher.start(1); + int matchEnd = matcher.end(); + String url = text.substring(matchStart, matchEnd); + urlSet.add(url); + } + + return urlSet; + } + +}