Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use PCRE2 instead of PCRE #153

Merged
merged 3 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis-ci/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ apt-get install -qq \
cmake \
graphviz-dev \
libjemalloc-dev \
libpcre3-dev \
libpcre2-dev \
libtool \
ninja-build \
pkg-config
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
set(CMAKE_C_STANDARD 99)

find_package(Check)
find_package(PCRE REQUIRED)
find_package(PCRE2 REQUIRED)

include(CheckSymbolExists)
include(CheckIncludeFile)
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Requirement

### Runtime Requirement

* pcre
* pcre2
* (optional) graphviz version 2.38.0 (20140413.2041)
* (optional) libjson-c-dev

Expand Down Expand Up @@ -187,13 +187,13 @@ Optimization
Simple regular expressions are optimized through a regexp pattern to opcode
translator, which translates simple patterns into small & fast scanners.

By using this method, r3 reduces the matching overhead of pcre library.
By using this method, r3 reduces the matching overhead of pcre2 library.

Optimized patterns are: `[a-z]+`, `[0-9]+`, `\d+`, `\w+`, `[^/]+`, `[^-]+` or `.*`.

Slugs without specified regular expression will be compiled into the `[^/]+` pattern. therefore, it's optimized too.

Complex regular expressions will still use libpcre to match URL (partially).
Complex regular expressions will still use libpcre2 to match URL (partially).


Performance
Expand Down Expand Up @@ -356,15 +356,15 @@ if ( $error ) {
Install
----------------------

sudo apt-get install check libpcre3 libpcre3-dev libjemalloc-dev libjemalloc1 build-essential libtool automake autoconf pkg-config
sudo apt-get install check libpcre2 libpcre2-dev libjemalloc-dev libjemalloc1 build-essential libtool automake autoconf pkg-config
sudo apt-get install graphviz-dev graphviz # if you want graphviz
./autogen.sh
./configure && make
sudo make install

And we support debian-based distro now!

sudo apt-get install build-essential autoconf automake libpcre3-dev pkg-config debhelper libtool check
sudo apt-get install build-essential autoconf automake libpcre2-dev pkg-config debhelper libtool check
mv dist-debian debian
dpkg-buildpackage -b -us -uc
sudo gdebi ../libr3*.deb
Expand Down
37 changes: 0 additions & 37 deletions cmake/Modules/FindPCRE.cmake

This file was deleted.

37 changes: 37 additions & 0 deletions cmake/Modules/FindPCRE2.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (C) 2007-2009 LuaDist.
# Created by Peter Kapec <[email protected]>
# Redistribution and use of this file is allowed according to the terms of the MIT license.
# For details see the COPYRIGHT file distributed with LuaDist.
# Note:
# Searching headers and libraries is very simple and is NOT as powerful as scripts
# distributed with CMake, because LuaDist defines directories to search for.
# Everyone is encouraged to contact the author with improvements. Maybe this file
# becomes part of CMake distribution sometimes.

# - Find pcre2
# Find the native PCRE2 headers and libraries.
#
# PCRE2_INCLUDE_DIRS - where to find pcre2.h, etc.
# PCRE2_LIBRARIES - List of libraries when using pcre2.
# PCRE2_FOUND - True if pcre2 found.

# Look for the header file.
FIND_PATH(PCRE2_INCLUDE_DIR NAMES pcre2.h)

# Look for the library.
FIND_LIBRARY(PCRE2_LIBRARY NAMES pcre2-8)

# Handle the QUIETLY and REQUIRED arguments and set PCRE2_FOUND to TRUE if all listed variables are TRUE.
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCRE2 DEFAULT_MSG PCRE2_LIBRARY PCRE2_INCLUDE_DIR)

# Copy the results to the output variables.
IF(PCRE2_FOUND)
SET(PCRE2_LIBRARIES ${PCRE2_LIBRARY})
SET(PCRE2_INCLUDE_DIRS ${PCRE2_INCLUDE_DIR})
ELSE(PCRE2_FOUND)
SET(PCRE2_LIBRARIES)
SET(PCRE2_INCLUDE_DIRS)
ENDIF(PCRE2_FOUND)

MARK_AS_ADVANCED(PCRE2_INCLUDE_DIRS PCRE2_LIBRARIES)
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ AM_CONDITIONAL(USE_JEMALLOC, test "x$have_jemalloc" = "xyes")
# AC_DEFINE(USE_JEMALLOC, test "x$found_jemalloc" = "xyes" , "use jemalloc")


PKG_CHECK_MODULES(DEPS, [libpcre])
PKG_CHECK_MODULES(DEPS, [libpcre2-8])
AC_SUBST(DEPS_CFLAGS)
AC_SUBST(DEPS_LIBS)

Expand Down
2 changes: 1 addition & 1 deletion dist-debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Source: libr3
Priority: optional
Maintainer: Ronmi Ren <[email protected]>
Build-Depends: debhelper (>= 8.0.0), automake, autotools-dev, autoconf,
libtool, libpcre3-dev, pkg-config, check
libtool, libpcre2-dev, pkg-config, check
Standards-Version: 3.9.4
Section: libs
Homepage: https://github.com/c9s/r3
Expand Down
8 changes: 4 additions & 4 deletions include/r3.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pcre.h>
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>

#ifdef HAVE_STDBOOL_H
# include <stdbool.h>
Expand Down Expand Up @@ -41,13 +42,12 @@ struct _node {
R3_VECTOR(R3Edge) edges;
R3_VECTOR(R3Route) routes;
char * combined_pattern;
pcre * pcre_pattern;
pcre_extra * pcre_extra;
pcre2_code * pcre_pattern;
pcre2_match_data * match_data;

// edges are mostly less than 255
unsigned int compare_type; // compare_type: pcre, opcode, string
unsigned int endpoint; // endpoint, should be zero for non-endpoint nodes
unsigned int ov_cnt; // capture vector array size for pcre

// the pointer of R3Route data
void * data;
Expand Down
2 changes: 1 addition & 1 deletion r3.pc.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ libdir=@libdir@
Name: r3
Description: High-performance URL router library
Version: @PACKAGE_VERSION@
Requires: libpcre
Requires: libpcre2-8
Libs: -L${libdir} -lr3
CFlags: -I${includedir}
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ target_include_directories(r3

target_link_libraries(r3
PUBLIC
${PCRE_LIBRARIES})
${PCRE2_LIBRARIES})

install(
TARGETS r3
Expand Down
2 changes: 0 additions & 2 deletions src/edge.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
// Jemalloc memory management
// #include <jemalloc/jemalloc.h>

// PCRE
#include <pcre.h>
#include "r3.h"
#include "r3_slug.h"
#include "slug.h"
Expand Down
1 change: 0 additions & 1 deletion src/match_entry.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pcre.h>
#include <assert.h>
#include <stdbool.h>

Expand Down
70 changes: 28 additions & 42 deletions src/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
#include <netinet/in.h>
#include <arpa/inet.h>

// PCRE
#include <pcre.h>

#include "r3.h"
#include "r3_slug.h"
#include "slug.h"
Expand Down Expand Up @@ -75,13 +72,11 @@ void r3_tree_free(R3Node * tree) {
}
free(tree->routes.entries);
if (tree->pcre_pattern) {
pcre_free(tree->pcre_pattern);
pcre2_code_free(tree->pcre_pattern);
}
#ifdef PCRE_STUDY_JIT_COMPILE
if (tree->pcre_extra) {
pcre_free_study(tree->pcre_extra);
if (tree->match_data) {
pcre2_match_data_free(tree->match_data);
}
#endif
free(tree->combined_pattern);
free(tree);
tree = NULL;
Expand Down Expand Up @@ -223,41 +218,38 @@ int r3_tree_compile_patterns(R3Node * n, char **errstr) {
free(n->combined_pattern);
n->combined_pattern = cpat;

const char *pcre_error = NULL;
int pcre_erroffset = 0;
int pcre_errorcode = 0;
PCRE2_SIZE pcre_erroffset = 0;
unsigned int option_bits = 0;

n->ov_cnt = (1 + n->edges.size) * 3;

if (n->pcre_pattern) {
pcre_free(n->pcre_pattern);
pcre2_code_free(n->pcre_pattern);
}
n->pcre_pattern = pcre_compile(
n->pcre_pattern = pcre2_compile(
n->combined_pattern, /* the pattern */
PCRE2_ZERO_TERMINATED,
option_bits, /* default options */
&pcre_error, /* for error message */
&pcre_errorcode, /* for error code */
&pcre_erroffset, /* for error offset */
NULL); /* use default character tables */
NULL); /* compile context */
if (n->pcre_pattern == NULL) {
if (errstr) {
int r = asprintf(errstr, "PCRE compilation failed at offset %d: %s, pattern: %s", pcre_erroffset, pcre_error, n->combined_pattern);
if (r) {};
PCRE2_UCHAR buf[128];
pcre2_get_error_message(pcre_errorcode, buf, sizeof(buf));
asprintf(errstr, "PCRE compilation failed at offset %ld: %s, pattern: %s", pcre_erroffset, buf, n->combined_pattern);
}
return -1;
}
#ifdef PCRE_STUDY_JIT_COMPILE
if (n->pcre_extra) {
pcre_free_study(n->pcre_extra);
if (n->match_data) {
pcre2_match_data_free(n->match_data);
}
n->pcre_extra = pcre_study(n->pcre_pattern, 0, &pcre_error);
if (!n->pcre_extra && pcre_error) {
n->match_data = pcre2_match_data_create_from_pattern(n->pcre_pattern, NULL);
if (n->match_data == NULL) {
if (errstr) {
int r = asprintf(errstr, "PCRE study failed at offset %s, pattern: %s", pcre_error, n->combined_pattern);
if (r) {};
asprintf(errstr, "Failed to allocate match data block");
}
return -1;
}
#endif
return 0;
}

Expand Down Expand Up @@ -339,28 +331,26 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,
info("COMPARE PCRE_PATTERN\n");
const char *substring_start = 0;
int substring_length = 0;
int ov[ n->ov_cnt ];
int rc;

info("pcre matching %s on [%s]\n", n->combined_pattern, path);

rc = pcre_exec(
rc = pcre2_match(
n->pcre_pattern, /* the compiled pattern */
n->pcre_extra,
path, /* the subject string */
path_len, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
ov, /* output vector for substring information */
n->ov_cnt); /* number of elements in the output vector */
n->match_data,/* match data results */
NULL); /* match context */

// does not match all edges, return NULL;
if (rc < 0) {
#ifdef DEBUG
printf("pcre rc: %d\n", rc );
switch(rc)
{
case PCRE_ERROR_NOMATCH:
case PCRE2_ERROR_NOMATCH:
printf("pcre: no match '%s' on pattern '%s'\n", path, n->combined_pattern);
break;

Expand All @@ -373,23 +363,22 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,
return NULL;
}


PCRE2_SIZE *ov = pcre2_get_ovector_pointer(n->match_data);

restlen = path_len - ov[1]; // if it's fully matched to the end (rest string length)
int *inv = ov + 2;

if (!restlen) {
// Check the substring to decide we should go deeper on which edge
for (i = 1; i < rc; i++)
{
substring_length = *(inv+1) - *inv;
substring_length = ov[2*i+1] - ov[2*i];

// if it's not matched for this edge, just skip them quickly
if (!is_end && !substring_length) {
inv += 2;
continue;
}

substring_start = path + *inv;
substring_start = path + ov[2*i];
e = n->edges.entries + i - 1;

if (entry && e->has_slug) {
Expand All @@ -404,18 +393,16 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,


// Check the substring to decide we should go deeper on which edge
inv = ov + 2;
for (i = 1; i < rc; i++)
{
substring_length = *(inv+1) - *inv;
substring_length = ov[2*i+1] - ov[2*i];

// if it's not matched for this edge, just skip them quickly
if (!is_end && !substring_length) {
inv += 2;
continue;
}

substring_start = path + *inv;
substring_start = path + ov[2*i];
e = n->edges.entries + i - 1;

if (entry && e->has_slug) {
Expand Down Expand Up @@ -520,7 +507,6 @@ inline R3Edge * r3_node_find_edge_str(const R3Node * n, const char * str, int st
// n->endpoint = 0;
// n->combined_pattern = NULL;
// n->pcre_pattern = NULL;
// n->pcre_extra = NULL;
// n->data = NULL;
// return n;
// }
Expand Down