diff --git a/app/src/main/java/com/termux/app/TermuxActivity.java b/app/src/main/java/com/termux/app/TermuxActivity.java index f02de62e..28f968ce 100644 --- a/app/src/main/java/com/termux/app/TermuxActivity.java +++ b/app/src/main/java/com/termux/app/TermuxActivity.java @@ -654,19 +654,86 @@ public final class TermuxActivity extends Activity implements ServiceConnection } static LinkedHashSet extractUrls(String text) { - // Pattern for recognizing a URL, based off RFC 3986 - // http://stackoverflow.com/questions/5713558/detect-and-extract-url-from-a-string + + StringBuilder regex_sb = new StringBuilder(); + + regex_sb.append("("); // Begin first matching group. + regex_sb.append("(?:"); // Begin scheme group. + regex_sb.append("dav|"); // The DAV proto. + regex_sb.append("dict|"); // The DICT proto. + regex_sb.append("dns|"); // The DNS proto. + regex_sb.append("file|"); // File path. + regex_sb.append("finger|"); // The Finger proto. + regex_sb.append("ftp(?:s?)|"); // The FTP proto. + regex_sb.append("git|"); // The Git proto. + regex_sb.append("gopher|"); // The Gopher proto. + regex_sb.append("http(?:s?)|"); // The HTTP proto. + regex_sb.append("imap(?:s?)|"); // The IMAP proto. + regex_sb.append("irc(?:[6s]?)|"); // The IRC proto. + regex_sb.append("ip[fn]s|"); // The IPFS proto. + regex_sb.append("ldap(?:s?)|"); // The LDAP proto. + regex_sb.append("pop3(?:s?)|"); // The POP3 proto. + regex_sb.append("redis(?:s?)|"); // The Redis proto. + regex_sb.append("rsync|"); // The Rsync proto. + regex_sb.append("rtsp(?:[su]?)|"); // The RTSP proto. + regex_sb.append("sftp|"); // The SFTP proto. + regex_sb.append("smb(?:s?)|"); // The SAMBA proto. + regex_sb.append("smtp(?:s?)|"); // The SMTP proto. + regex_sb.append("svn(?:(?:\\+ssh)?)|"); // The Subversion proto. + regex_sb.append("tcp|"); // The TCP proto. + regex_sb.append("telnet|"); // The Telnet proto. + regex_sb.append("tftp|"); // The TFTP proto. + regex_sb.append("udp|"); // The UDP proto. + regex_sb.append("vnc|"); // The VNC proto. + regex_sb.append("ws(?:s?)"); // The Websocket proto. + regex_sb.append(")://"); // End scheme group. + regex_sb.append(")"); // End first matching group. + + + // Begin second matching group. + regex_sb.append("("); + + // User name and/or password in format 'user:pass@'. + regex_sb.append("(?:\\S+(?::\\S*)?@)?"); + + // Begin host group. + regex_sb.append("(?:"); + + // IP address (from http://www.regular-expressions.info/examples.html). + regex_sb.append("(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|"); + + // Host name or domain. + regex_sb.append("(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))?|"); + + // Just path. Used in case of 'file://' scheme. + regex_sb.append("/(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)"); + + // End host group. + regex_sb.append(")"); + + // Port number. + regex_sb.append("(?::\\d{1,5})?"); + + // Resource path with optional query string. + regex_sb.append("(?:/[a-zA-Z0-9:@%\\-._~!$&()*+,;=?/]*)?"); + + // End second matching group. + regex_sb.append(")"); + final Pattern urlPattern = Pattern.compile( - "(?:^|[\\W])((ht|f)tp(s?)://|www\\.)" + "(([\\w\\-]+\\.)+?([\\w\\-.~]+/?)*" + "[\\p{Alnum}.,%_=?&#\\-+()\\[\\]*$~@!:/{};']*)", + regex_sb.toString(), Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); + LinkedHashSet urlSet = new LinkedHashSet<>(); Matcher matcher = urlPattern.matcher(text); + while (matcher.find()) { int matchStart = matcher.start(1); int matchEnd = matcher.end(); String url = text.substring(matchStart, matchEnd); urlSet.add(url); } + return urlSet; }