Skip to content

Commit 9de071f

Browse files
authored
replace URL() constructors with URI() (#4750)
rework the bug/review page substitution avoid using "$1" and use custom replacer function to make sure the matched pattern is properly escaped
1 parent ec7199e commit 9de071f

File tree

7 files changed

+168
-123
lines changed

7 files changed

+168
-123
lines changed

docker/start.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def index():
115115
API endpoint for triggering reindex.
116116
:return: message describing the outcome
117117
"""
118-
global periodic_timer
118+
global periodic_timer # noqa: F824
119119

120120
if periodic_timer:
121121
logger = logging.getLogger(__name__)
@@ -360,7 +360,7 @@ def indexer_no_projects(logger, uri, config_path, extra_indexer_options):
360360
indexer.execute()
361361

362362
logger.info("Waiting for reindex to be triggered")
363-
global periodic_timer
363+
global periodic_timer # noqa: F824
364364
periodic_timer.wait_for_event()
365365

366366

@@ -420,7 +420,7 @@ def project_syncer(logger, loglevel, uri, config_path, numworkers, env, api_time
420420
save_config(logger, uri, config_path, api_timeout)
421421

422422
logger.info("Waiting for reindex to be triggered")
423-
global periodic_timer
423+
global periodic_timer # noqa: F824
424424
periodic_timer.wait_for_event()
425425

426426

opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java

+8
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
import org.opengrok.indexer.authorization.AuthorizationStack;
6060
import org.opengrok.indexer.history.RepositoryInfo;
6161
import org.opengrok.indexer.logger.LoggerFactory;
62+
import org.opengrok.indexer.web.Util;
6263

6364
import static org.opengrok.indexer.configuration.PatternUtil.compilePattern;
6465

@@ -1642,5 +1643,12 @@ public void checkConfiguration() throws ConfigurationException {
16421643
LOGGER.log(Level.INFO, "History based reindex is on, however history cache is off. " +
16431644
"History cache has to be enabled for history based reindex.");
16441645
}
1646+
1647+
if (!Objects.isNull(getBugPage()) && !Util.isHttpUri(getBugPage())) {
1648+
throw new ConfigurationException("Bug page must be valid HTTP(S) URI");
1649+
}
1650+
if (!Objects.isNull(getReviewPage()) && !Util.isHttpUri(getReviewPage())) {
1651+
throw new ConfigurationException("Review page must be valid HTTP(S) URI");
1652+
}
16451653
}
16461654
}

opengrok-indexer/src/main/java/org/opengrok/indexer/web/Util.java

+82-76
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,13 @@
5454
import java.util.Locale;
5555
import java.util.Map;
5656
import java.util.Map.Entry;
57+
import java.util.Objects;
5758
import java.util.Optional;
5859
import java.util.TreeMap;
5960
import java.util.function.IntConsumer;
6061
import java.util.logging.Level;
6162
import java.util.logging.Logger;
63+
import java.util.regex.MatchResult;
6264
import java.util.regex.Matcher;
6365
import java.util.regex.Pattern;
6466
import java.util.stream.IntStream;
@@ -367,7 +369,7 @@ public static String breadcrumbPath(String urlPrefix, String path) {
367369
* @param path the full path from which the breadcrumb path is built
368370
* @param sep separator to use to crack the given path
369371
*
370-
* @return HTML markup fro the breadcrumb or the path itself.
372+
* @return HTML markup for the breadcrumb or the path itself.
371373
* @see #breadcrumbPath(String, String, char, String, boolean, boolean)
372374
*/
373375
public static String breadcrumbPath(String urlPrefix, String path, char sep) {
@@ -658,7 +660,7 @@ public static void encode(String s, Appendable dest) throws IOException {
658660
// special html characters
659661
dest.append("&#").append("" + (int) c).append(";");
660662
} else if (c == ' ') {
661-
// non breaking space
663+
// non-breaking space
662664
dest.append(" ");
663665
} else if (c == '\t') {
664666
dest.append("    ");
@@ -671,22 +673,6 @@ public static void encode(String s, Appendable dest) throws IOException {
671673
}
672674
}
673675

674-
/**
675-
* Encode URL.
676-
*
677-
* @param urlStr string URL
678-
* @return the encoded URL
679-
* @throws URISyntaxException URI syntax
680-
* @throws MalformedURLException URL malformed
681-
*/
682-
public static String encodeURL(String urlStr) throws URISyntaxException, MalformedURLException {
683-
URL url = new URL(urlStr);
684-
URI constructed = new URI(url.getProtocol(), url.getUserInfo(),
685-
url.getHost(), url.getPort(),
686-
url.getPath(), url.getQuery(), url.getRef());
687-
return constructed.toString();
688-
}
689-
690676
/**
691677
* Write out line information wrt. to the given annotation in the format:
692678
* {@code Linenumber Blame Author} incl. appropriate links.
@@ -939,26 +925,22 @@ public static String uriEncode(String q) {
939925
* @param dest a defined target
940926
* @throws IOException I/O
941927
*/
942-
public static void uriEncode(String str, Appendable dest)
943-
throws IOException {
928+
public static void uriEncode(String str, Appendable dest) throws IOException {
944929
String uenc = uriEncode(str);
945930
dest.append(uenc);
946931
}
947932

948933
/**
949-
* Append '&name=value" to the given buffer. If the given
950-
* <var>value</var>
951-
* is {@code null}, this method does nothing.
934+
* Append "&amp;name=value" to the given buffer. If the given <var>value</var> is {@code null},
935+
* this method does nothing.
952936
*
953937
* @param buf where to append the query string
954938
* @param key the name of the parameter to add. Append as is!
955-
* @param value the value for the given parameter. Gets automatically UTF-8
956-
* URL encoded.
939+
* @param value the value for the given parameter. Gets automatically UTF-8 URL encoded.
957940
* @throws NullPointerException if the given buffer is {@code null}.
958941
* @see #uriEncode(String)
959942
*/
960-
public static void appendQuery(StringBuilder buf, String key,
961-
String value) {
943+
public static void appendQuery(StringBuilder buf, String key, String value) {
962944

963945
if (value != null) {
964946
buf.append(AMP).append(key).append('=').append(uriEncode(value));
@@ -1454,48 +1436,50 @@ private static String generatePageLink(int page, int offset, int limit, long siz
14541436

14551437
}
14561438

1457-
14581439
/**
1459-
* Check if the string is a HTTP URL.
1440+
* Check if the string is an HTTP(S) URI (i.e. allows for relative identifiers).
14601441
*
14611442
* @param string the string to check
1462-
* @return true if it is http URL, false otherwise
1443+
* @return true if it is HTTP(S) URI, false otherwise
14631444
*/
14641445
public static boolean isHttpUri(String string) {
1465-
URL url;
1446+
URI uri;
14661447
try {
1467-
url = new URL(string);
1468-
} catch (MalformedURLException ex) {
1448+
uri = new URI(string);
1449+
} catch (URISyntaxException ex) {
14691450
return false;
14701451
}
1471-
return url.getProtocol().equals("http") || url.getProtocol().equals("https");
1452+
String scheme = uri.getScheme();
1453+
if (Objects.isNull(scheme)) {
1454+
return false;
1455+
}
1456+
return uri.getScheme().equals("http") || uri.getScheme().equals("https");
14721457
}
14731458

1474-
protected static final String REDACTED_USER_INFO = "redacted_by_OpenGrok";
1459+
static final String REDACTED_USER_INFO = "redacted_by_OpenGrok";
14751460

14761461
/**
1477-
* If given path is a URL, return the string representation with the user-info part filtered out.
1462+
* If given path is a URI, return the string representation with the user-info part filtered out.
14781463
* @param path path to object
1479-
* @return either the original string or string representation of URL with the user-info part removed
1464+
* @return either the original string (if the URI is not valid)
1465+
* or string representation of the URI with the user-info part removed
14801466
*/
1481-
public static String redactUrl(String path) {
1482-
URL url;
1467+
public static String redactUri(String path) {
1468+
URI uri;
14831469
try {
1484-
url = new URL(path);
1485-
} catch (MalformedURLException e) {
1486-
// not an URL
1470+
uri = new URI(path);
1471+
} catch (URISyntaxException e) {
14871472
return path;
14881473
}
1489-
if (url.getUserInfo() != null) {
1490-
return url.toString().replace(url.getUserInfo(),
1491-
REDACTED_USER_INFO);
1474+
if (uri.getUserInfo() != null) {
1475+
return uri.toString().replace(uri.getUserInfo(), REDACTED_USER_INFO);
14921476
} else {
14931477
return path;
14941478
}
14951479
}
14961480

14971481
/**
1498-
* Build a HTML link to the given HTTP URL. If the URL is not an http URL
1482+
* Build an HTML link to the given HTTP URL. If the URL is not an HTTP URL
14991483
* then it is returned as it was received. This has the same effect as
15001484
* invoking <code>linkify(url, true)</code>.
15011485
*
@@ -1509,7 +1493,7 @@ public static String linkify(String url) {
15091493
}
15101494

15111495
/**
1512-
* Build a html link to the given http URL. If the URL is not an http URL
1496+
* Build an HTML link to the given HTTP URL. If the URL is not an HTTP URL
15131497
* then it is returned as it was received.
15141498
*
15151499
* @param url the HTTP URL
@@ -1535,9 +1519,9 @@ public static String linkify(String url, boolean newTab) {
15351519
}
15361520

15371521
/**
1538-
* Build an anchor with given name and a pack of attributes. Automatically
1539-
* escapes href attributes and automatically escapes the name into HTML
1540-
* entities.
1522+
* Build an anchor with given name and a pack of attributes.
1523+
* Assumes the <code>href</code> attribute value is URI encoded.
1524+
* Automatically escapes the name into HTML entities.
15411525
*
15421526
* @param name displayed name of the anchor
15431527
* @param attrs map of attributes for the html element
@@ -1556,7 +1540,7 @@ public static String buildLink(String name, Map<String, String> attrs)
15561540
buffer.append("=\"");
15571541
String value = attr.getValue();
15581542
if (attr.getKey().equals("href")) {
1559-
value = Util.encodeURL(value);
1543+
value = new URI(value).toURL().toString();
15601544
}
15611545
buffer.append(value);
15621546
buffer.append("\"");
@@ -1568,9 +1552,9 @@ public static String buildLink(String name, Map<String, String> attrs)
15681552
}
15691553

15701554
/**
1571-
* Build an anchor with given name and a pack of attributes. Automatically
1572-
* escapes href attributes and automatically escapes the name into HTML
1573-
* entities.
1555+
* Build an anchor with given name and a pack of attributes.
1556+
* Assumes the <code>href</code> attribute value is URI encoded.
1557+
* Automatically escapes the name into HTML entities.
15741558
*
15751559
* @param name displayed name of the anchor
15761560
* @param url anchor's URL
@@ -1579,17 +1563,16 @@ public static String buildLink(String name, Map<String, String> attrs)
15791563
* @throws URISyntaxException URI syntax
15801564
* @throws MalformedURLException bad URL
15811565
*/
1582-
public static String buildLink(String name, String url)
1583-
throws URISyntaxException, MalformedURLException {
1566+
public static String buildLink(String name, String url) throws URISyntaxException, MalformedURLException {
15841567
Map<String, String> attrs = new TreeMap<>();
15851568
attrs.put("href", url);
15861569
return buildLink(name, attrs);
15871570
}
15881571

15891572
/**
1590-
* Build an anchor with given name and a pack of attributes. Automatically
1591-
* escapes href attributes and automatically escapes the name into HTML
1592-
* entities.
1573+
* Build an anchor with given name and a pack of attributes.
1574+
* Assumes the <code>href</code> attribute value is URI encoded.
1575+
* Automatically escapes the name into HTML entities.
15931576
*
15941577
* @param name displayed name of the anchor
15951578
* @param url anchor's URL
@@ -1610,30 +1593,52 @@ public static String buildLink(String name, String url, boolean newTab)
16101593
return buildLink(name, attrs);
16111594
}
16121595

1596+
/**
1597+
* Callback function to provide replacement value for pattern match.
1598+
* It replaces the first group of the pattern and retains the rest of the pattern.
1599+
*
1600+
* @param result {@link MatchResult} object representing pattern match
1601+
* @param text original text containing the match
1602+
* @param url URL to be used for the replacement
1603+
* @return replacement for the first group in the pattern
1604+
*/
1605+
private static String buildLinkReplacer(MatchResult result, String text, String url) {
1606+
if (result.groupCount() < 1) {
1607+
return result.group(0);
1608+
}
1609+
final String group1 = result.group(1);
1610+
final String appendedUrl = url + uriEncode(group1);
1611+
try {
1612+
StringBuilder stringBuilder = new StringBuilder();
1613+
if (result.start(0) < result.start(1)) {
1614+
stringBuilder.append(text.substring(result.start(0), result.start(1)));
1615+
}
1616+
stringBuilder.append(buildLink(group1, appendedUrl, true));
1617+
if (result.end(1) < result.end(0)) {
1618+
stringBuilder.append(text.substring(result.end(1), result.end(0)));
1619+
}
1620+
return stringBuilder.toString();
1621+
} catch (URISyntaxException | MalformedURLException e) {
1622+
LOGGER.log(Level.WARNING, "The given URL ''{0}'' is not valid", appendedUrl);
1623+
return result.group(0);
1624+
}
1625+
}
1626+
16131627
/**
16141628
* Replace all occurrences of pattern in the incoming text with the link
1615-
* named name pointing to an URL. It is possible to use the regexp pattern
1616-
* groups in name and URL when they are specified in the pattern.
1629+
* named name pointing to a URL.
16171630
*
1618-
* @param text text to replace all patterns
1631+
* @param text text to replace all patterns
16191632
* @param pattern the pattern to match
1620-
* @param name link display name
1621-
* @param url link URL
1633+
* @param url link URL
16221634
* @return the text with replaced links
16231635
*/
1624-
public static String linkifyPattern(String text, Pattern pattern, String name, String url) {
1625-
try {
1626-
String buildLink = buildLink(name, url, true);
1627-
return pattern.matcher(text).replaceAll(buildLink);
1628-
} catch (URISyntaxException | MalformedURLException ex) {
1629-
LOGGER.log(Level.WARNING, "The given URL ''{0}'' is not valid", url);
1630-
return text;
1631-
}
1636+
public static String linkifyPattern(String text, Pattern pattern, String url) {
1637+
return pattern.matcher(text).replaceAll(match -> buildLinkReplacer(match, text, url));
16321638
}
16331639

16341640
/**
1635-
* Try to complete the given URL part into full URL with server name, port,
1636-
* scheme, ...
1641+
* Try to complete the given URL part into full URL with server name, port, scheme, ...
16371642
* <dl>
16381643
* <dt>for request http://localhost:8080/source/xref/xxx and part
16391644
* /cgi-bin/user=</dt>
@@ -1655,7 +1660,8 @@ public static String completeUrl(String url, HttpServletRequest req) {
16551660
try {
16561661
if (!isHttpUri(url)) {
16571662
if (url.startsWith("/")) {
1658-
return new URI(req.getScheme(), null, req.getServerName(), req.getServerPort(), url, null, null).toString();
1663+
return new URI(req.getScheme(), null, req.getServerName(), req.getServerPort(), url,
1664+
null, null).toString();
16591665
}
16601666
var prepUrl = req.getRequestURL();
16611667
if (!url.isEmpty()) {
@@ -1665,7 +1671,7 @@ public static String completeUrl(String url, HttpServletRequest req) {
16651671
}
16661672
return url;
16671673
} catch (URISyntaxException ex) {
1668-
LOGGER.log(Level.INFO,
1674+
LOGGER.log(Level.WARNING,
16691675
String.format("Unable to convert given URL part '%s' to complete URL", url),
16701676
ex);
16711677
return url;

0 commit comments

Comments
 (0)