From c02e78f26d6a89db98f1c99d5b4f7ddb12e3248f Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Mon, 18 Dec 2017 20:20:26 +0900
Subject: [PATCH 01/10] Move tld regex to TldLists class

---
 lib/Twitter/Text/Regex.php          |    9 +-
 lib/Twitter/Text/TldLists.php       | 1636 +++++++++++++++++++++++++++
 tests/Twitter/Text/TldListsTest.php |   39 +
 3 files changed, 1679 insertions(+), 5 deletions(-)
 create mode 100644 lib/Twitter/Text/TldLists.php
 create mode 100644 tests/Twitter/Text/TldListsTest.php

diff --git a/lib/Twitter/Text/Regex.php b/lib/Twitter/Text/Regex.php
index 5969f38..83c88ab 100644
--- a/lib/Twitter/Text/Regex.php
+++ b/lib/Twitter/Text/Regex.php
@@ -10,6 +10,8 @@
 
 namespace Twitter\Text;
 
+use Twitter\Text\TldLists;
+
 /**
  * Twitter Regex Abstract Class
  *
@@ -177,11 +179,8 @@ public static function __static()
         $tmp['valid_domain_name'] = '(?:(?:[' . $tmp['domain_valid_chars'] . '][' . $tmp['domain_valid_chars'] . '\-]*)?[' . $tmp['domain_valid_chars'] . ']\.)';
         $tmp['domain_valid_unicode_chars'] = '[^\p{P}\p{Z}\p{C}' . $tmp['invalid_characters'] . $tmp['spaces'] . ']';
 
-        $gTLD = '삼성|닷컴|닷넷|香格里拉|餐厅|食品|飞利浦|電訊盈科|集团|通販|购物|谷歌|诺基亚|联通|网络|网站|网店|网址|组织机构|移动|珠宝|点看|游戏|淡马锡|机构|書籍|时尚|新闻|政府|政务|手表|手机|我爱你|慈善|微博|广东|工行|家電|娱乐|天主教|大拿|大众汽车|在线|嘉里大酒店|嘉里|商标|商店|商城|公益|公司|八卦|健康|信息|佛山|企业|中文网|中信|世界|ポイント|ファッション|セール|ストア|コム|グーグル|クラウド|みんな|คอม|संगठन|नेट|कॉम|همراه|موقع|موبايلي|كوم|كاثوليك|عرب|شبكة|بيتك|بازار|العليان|ارامكو|اتصالات|ابوظبي|קום|сайт|рус|орг|онлайн|москва|ком|католик|дети|zuerich|zone|zippo|zip|zero|zara|zappos|yun|youtube|you|yokohama|yoga|yodobashi|yandex|yamaxun|yahoo|yachts|xyz|xxx|xperia|xin|xihuan|xfinity|xerox|xbox|wtf|wtc|wow|world|works|work|woodside|wolterskluwer|wme|winners|wine|windows|win|williamhill|wiki|wien|whoswho|weir|weibo|wedding|wed|website|weber|webcam|weatherchannel|weather|watches|watch|warman|wanggou|wang|walter|walmart|wales|vuelos|voyage|voto|voting|vote|volvo|volkswagen|vodka|vlaanderen|vivo|viva|vistaprint|vista|vision|visa|virgin|vip|vin|villas|viking|vig|video|viajes|vet|versicherung|vermögensberatung|vermögensberater|verisign|ventures|vegas|vanguard|vana|vacations|ups|uol|uno|university|unicom|uconnect|ubs|ubank|tvs|tushu|tunes|tui|tube|trv|trust|travelersinsurance|travelers|travelchannel|travel|training|trading|trade|toys|toyota|town|tours|total|toshiba|toray|top|tools|tokyo|today|tmall|tkmaxx|tjx|tjmaxx|tirol|tires|tips|tiffany|tienda|tickets|tiaa|theatre|theater|thd|teva|tennis|temasek|telefonica|telecity|tel|technology|tech|team|tdk|tci|taxi|tax|tattoo|tatar|tatamotors|target|taobao|talk|taipei|tab|systems|symantec|sydney|swiss|swiftcover|swatch|suzuki|surgery|surf|support|supply|supplies|sucks|style|study|studio|stream|store|storage|stockholm|stcgroup|stc|statoil|statefarm|statebank|starhub|star|staples|stada|srt|srl|spreadbetting|spot|spiegel|space|soy|sony|song|solutions|solar|sohu|software|softbank|social|soccer|sncf|smile|smart|sling|skype|sky|skin|ski|site|singles|sina|silk|shriram|showtime|show|shouji|shopping|shop|shoes|shiksha|shia|shell|shaw|sharp|shangrila|sfr|sexy|sex|sew|seven|ses|services|sener|select|seek|security|secure|seat|search|scot|scor|scjohnson|science|schwarz|schule|school|scholarships|schmidt|schaeffler|scb|sca|sbs|sbi|saxo|save|sas|sarl|sapo|sap|sanofi|sandvikcoromant|sandvik|samsung|samsclub|salon|sale|sakura|safety|safe|saarland|ryukyu|rwe|run|ruhr|rugby|rsvp|room|rogers|rodeo|rocks|rocher|rmit|rip|rio|ril|rightathome|ricoh|richardli|rich|rexroth|reviews|review|restaurant|rest|republican|report|repair|rentals|rent|ren|reliance|reit|reisen|reise|rehab|redumbrella|redstone|red|recipes|realty|realtor|realestate|read|raid|radio|racing|qvc|quest|quebec|qpon|pwc|pub|prudential|pru|protection|property|properties|promo|progressive|prof|productions|prod|pro|prime|press|praxi|pramerica|post|porn|politie|poker|pohl|pnc|plus|plumbing|playstation|play|place|pizza|pioneer|pink|ping|pin|pid|pictures|pictet|pics|piaget|physio|photos|photography|photo|phone|philips|phd|pharmacy|pfizer|pet|pccw|pay|passagens|party|parts|partners|pars|paris|panerai|panasonic|pamperedchef|page|ovh|ott|otsuka|osaka|origins|orientexpress|organic|org|orange|oracle|open|ooo|onyourside|online|onl|ong|one|omega|ollo|oldnavy|olayangroup|olayan|okinawa|office|off|observer|obi|nyc|ntt|nrw|nra|nowtv|nowruz|now|norton|northwesternmutual|nokia|nissay|nissan|ninja|nikon|nike|nico|nhk|ngo|nfl|nexus|nextdirect|next|news|newholland|new|neustar|network|netflix|netbank|net|nec|nba|navy|natura|nationwide|name|nagoya|nadex|nab|mutuelle|mutual|museum|mtr|mtpc|mtn|msd|movistar|movie|mov|motorcycles|moto|moscow|mortgage|mormon|mopar|montblanc|monster|money|monash|mom|moi|moe|moda|mobily|mobile|mobi|mma|mls|mlb|mitsubishi|mit|mint|mini|mil|microsoft|miami|metlife|merckmsd|meo|menu|men|memorial|meme|melbourne|meet|media|med|mckinsey|mcdonalds|mcd|mba|mattel|maserati|marshalls|marriott|markets|marketing|market|map|mango|management|man|makeup|maison|maif|madrid|macys|luxury|luxe|lupin|lundbeck|ltda|ltd|lplfinancial|lpl|love|lotto|lotte|london|lol|loft|locus|locker|loans|loan|lixil|living|live|lipsy|link|linde|lincoln|limo|limited|lilly|like|lighting|lifestyle|lifeinsurance|life|lidl|liaison|lgbt|lexus|lego|legal|lefrak|leclerc|lease|lds|lawyer|law|latrobe|latino|lat|lasalle|lanxess|landrover|land|lancome|lancia|lancaster|lamer|lamborghini|ladbrokes|lacaixa|kyoto|kuokgroup|kred|krd|kpn|kpmg|kosher|komatsu|koeln|kiwi|kitchen|kindle|kinder|kim|kia|kfh|kerryproperties|kerrylogistics|kerryhotels|kddi|kaufen|juniper|juegos|jprs|jpmorgan|joy|jot|joburg|jobs|jnj|jmp|jll|jlc|jio|jewelry|jetzt|jeep|jcp|jcb|java|jaguar|iwc|iveco|itv|itau|istanbul|ist|ismaili|iselect|irish|ipiranga|investments|intuit|international|intel|int|insure|insurance|institute|ink|ing|info|infiniti|industries|immobilien|immo|imdb|imamat|ikano|iinet|ifm|ieee|icu|ice|icbc|ibm|hyundai|hyatt|hughes|htc|hsbc|how|house|hotmail|hotels|hoteles|hot|hosting|host|hospital|horse|honeywell|honda|homesense|homes|homegoods|homedepot|holiday|holdings|hockey|hkt|hiv|hitachi|hisamitsu|hiphop|hgtv|hermes|here|helsinki|help|healthcare|health|hdfcbank|hdfc|hbo|haus|hangout|hamburg|hair|guru|guitars|guide|guge|gucci|guardian|group|grocery|gripe|green|gratis|graphics|grainger|gov|got|gop|google|goog|goodyear|goodhands|goo|golf|goldpoint|gold|godaddy|gmx|gmo|gmbh|gmail|globo|global|gle|glass|glade|giving|gives|gifts|gift|ggee|george|genting|gent|gea|gdn|gbiz|garden|gap|games|game|gallup|gallo|gallery|gal|fyi|futbol|furniture|fund|fun|fujixerox|fujitsu|ftr|frontier|frontdoor|frogans|frl|fresenius|free|fox|foundation|forum|forsale|forex|ford|football|foodnetwork|food|foo|fly|flsmidth|flowers|florist|flir|flights|flickr|fitness|fit|fishing|fish|firmdale|firestone|fire|financial|finance|final|film|fido|fidelity|fiat|ferrero|ferrari|feedback|fedex|fast|fashion|farmers|farm|fans|fan|family|faith|fairwinds|fail|fage|extraspace|express|exposed|expert|exchange|everbank|events|eus|eurovision|etisalat|esurance|estate|esq|erni|ericsson|equipment|epson|epost|enterprises|engineering|engineer|energy|emerck|email|education|edu|edeka|eco|eat|earth|dvr|dvag|durban|dupont|duns|dunlop|duck|dubai|dtv|drive|download|dot|doosan|domains|doha|dog|dodge|doctor|docs|dnp|diy|dish|discover|discount|directory|direct|digital|diet|diamonds|dhl|dev|design|desi|dentist|dental|democrat|delta|deloitte|dell|delivery|degree|deals|dealer|deal|dds|dclk|day|datsun|dating|date|data|dance|dad|dabur|cyou|cymru|cuisinella|csc|cruises|cruise|crs|crown|cricket|creditunion|creditcard|credit|courses|coupons|coupon|country|corsica|coop|cool|cookingchannel|cooking|contractors|contact|consulting|construction|condos|comsec|computer|compare|company|community|commbank|comcast|com|cologne|college|coffee|codes|coach|clubmed|club|cloud|clothing|clinique|clinic|click|cleaning|claims|cityeats|city|citic|citi|citadel|cisco|circle|cipriani|church|chrysler|chrome|christmas|chloe|chintai|cheap|chat|chase|channel|chanel|cfd|cfa|cern|ceo|center|ceb|cbs|cbre|cbn|cba|catholic|catering|cat|casino|cash|caseih|case|casa|cartier|cars|careers|career|care|cards|caravan|car|capitalone|capital|capetown|canon|cancerresearch|camp|camera|cam|calvinklein|call|cal|cafe|cab|bzh|buzz|buy|business|builders|build|bugatti|budapest|brussels|brother|broker|broadway|bridgestone|bradesco|box|boutique|bot|boston|bostik|bosch|boots|booking|book|boo|bond|bom|bofa|boehringer|boats|bnpparibas|bnl|bmw|bms|blue|bloomberg|blog|blockbuster|blanco|blackfriday|black|biz|bio|bingo|bing|bike|bid|bible|bharti|bet|bestbuy|best|berlin|bentley|beer|beauty|beats|bcn|bcg|bbva|bbt|bbc|bayern|bauhaus|basketball|baseball|bargains|barefoot|barclays|barclaycard|barcelona|bar|bank|band|bananarepublic|banamex|baidu|baby|azure|axa|aws|avianca|autos|auto|author|auspost|audio|audible|audi|auction|attorney|athleta|associates|asia|asda|arte|art|arpa|army|archi|aramco|arab|aquarelle|apple|app|apartments|aol|anz|anquan|android|analytics|amsterdam|amica|amfam|amex|americanfamily|americanexpress|alstom|alsace|ally|allstate|allfinanz|alipay|alibaba|alfaromeo|akdn|airtel|airforce|airbus|aigo|aig|agency|agakhan|africa|afl|afamilycompany|aetna|aero|aeg|adult|ads|adac|actor|active|aco|accountants|accountant|accenture|academy|abudhabi|abogado|able|abc|abbvie|abbott|abb|abarth|aarp|aaa|onion';
-        $ccTLD = '한국|香港|澳門|新加坡|台灣|台湾|中國|中国|გე|ไทย|ලංකා|ഭാരതം|ಭಾರತ|భారత్|சிங்கப்பூர்|இலங்கை|இந்தியா|ଭାରତ|ભારત|ਭਾਰਤ|ভাৰত|ভারত|বাংলা|भारोत|भारतम्|भारत|ڀارت|پاکستان|مليسيا|مصر|قطر|فلسطين|عمان|عراق|سورية|سودان|تونس|بھارت|بارت|ایران|امارات|المغرب|السعودية|الجزائر|الاردن|հայ|қаз|укр|срб|рф|мон|мкд|ею|бел|бг|ελ|zw|zm|za|yt|ye|ws|wf|vu|vn|vi|vg|ve|vc|va|uz|uy|us|um|uk|ug|ua|tz|tw|tv|tt|tr|tp|to|tn|tm|tl|tk|tj|th|tg|tf|td|tc|sz|sy|sx|sv|su|st|ss|sr|so|sn|sm|sl|sk|sj|si|sh|sg|se|sd|sc|sb|sa|rw|ru|rs|ro|re|qa|py|pw|pt|ps|pr|pn|pm|pl|pk|ph|pg|pf|pe|pa|om|nz|nu|nr|np|no|nl|ni|ng|nf|ne|nc|na|mz|my|mx|mw|mv|mu|mt|ms|mr|mq|mp|mo|mn|mm|ml|mk|mh|mg|mf|me|md|mc|ma|ly|lv|lu|lt|ls|lr|lk|li|lc|lb|la|kz|ky|kw|kr|kp|kn|km|ki|kh|kg|ke|jp|jo|jm|je|it|is|ir|iq|io|in|im|il|ie|id|hu|ht|hr|hn|hm|hk|gy|gw|gu|gt|gs|gr|gq|gp|gn|gm|gl|gi|gh|gg|gf|ge|gd|gb|ga|fr|fo|fm|fk|fj|fi|eu|et|es|er|eh|eg|ee|ec|dz|do|dm|dk|dj|de|cz|cy|cx|cw|cv|cu|cr|co|cn|cm|cl|ck|ci|ch|cg|cf|cd|cc|ca|bz|by|bw|bv|bt|bs|br|bq|bo|bn|bm|bl|bj|bi|bh|bg|bf|be|bd|bb|ba|az|ax|aw|au|at|as|ar|aq|ao|an|am|al|ai|ag|af|ae|ad|ac';
-
-        $tmp['valid_gTLD'] = '(?:(?:' . $gTLD . ')(?=[^0-9a-z@]|$))';
-        $tmp['valid_ccTLD'] = '(?:(?:' . $ccTLD . ')(?=[^0-9a-z@]|$))';
+        $tmp['valid_gTLD'] = TldLists::getValidGTLD();
+        $tmp['valid_ccTLD'] = TldLists::getValidCcTLD();
         $tmp['valid_special_ccTLD'] = '(?:(?:' . 'co|tv' . ')(?=[^0-9a-z@]|$))';
         $tmp['valid_punycode'] = '(?:xn--[0-9a-z]+)';
 
diff --git a/lib/Twitter/Text/TldLists.php b/lib/Twitter/Text/TldLists.php
new file mode 100644
index 0000000..ccd9680
--- /dev/null
+++ b/lib/Twitter/Text/TldLists.php
@@ -0,0 +1,1636 @@
+<?php
+
+/**
+ * @author     Takashi Nojima
+ * @copyright  Copyright 2017, Takashi Nojima
+ * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
+ * @package    Twitter.Text
+ */
+
+namespace Twitter\Text;
+
+/**
+ * TLD Lists
+ */
+final class TldLists
+{
+
+    /**
+     * gTLDs
+     *
+     * @var array
+     */
+    private static $gTLDs = array(
+        '삼성',
+        '닷컴',
+        '닷넷',
+        '香格里拉',
+        '餐厅',
+        '食品',
+        '飞利浦',
+        '電訊盈科',
+        '集团',
+        '通販',
+        '购物',
+        '谷歌',
+        '诺基亚',
+        '联通',
+        '网络',
+        '网站',
+        '网店',
+        '网址',
+        '组织机构',
+        '移动',
+        '珠宝',
+        '点看',
+        '游戏',
+        '淡马锡',
+        '机构',
+        '書籍',
+        '时尚',
+        '新闻',
+        '政府',
+        '政务',
+        '手表',
+        '手机',
+        '我爱你',
+        '慈善',
+        '微博',
+        '广东',
+        '工行',
+        '家電',
+        '娱乐',
+        '天主教',
+        '大拿',
+        '大众汽车',
+        '在线',
+        '嘉里大酒店',
+        '嘉里',
+        '商标',
+        '商店',
+        '商城',
+        '公益',
+        '公司',
+        '八卦',
+        '健康',
+        '信息',
+        '佛山',
+        '企业',
+        '中文网',
+        '中信',
+        '世界',
+        'ポイント',
+        'ファッション',
+        'セール',
+        'ストア',
+        'コム',
+        'グーグル',
+        'クラウド',
+        'みんな',
+        'คอม',
+        'संगठन',
+        'नेट',
+        'कॉम',
+        'همراه',
+        'موقع',
+        'موبايلي',
+        'كوم',
+        'كاثوليك',
+        'عرب',
+        'شبكة',
+        'بيتك',
+        'بازار',
+        'العليان',
+        'ارامكو',
+        'اتصالات',
+        'ابوظبي',
+        'קום',
+        'сайт',
+        'рус',
+        'орг',
+        'онлайн',
+        'москва',
+        'ком',
+        'католик',
+        'дети',
+        'zuerich',
+        'zone',
+        'zippo',
+        'zip',
+        'zero',
+        'zara',
+        'zappos',
+        'yun',
+        'youtube',
+        'you',
+        'yokohama',
+        'yoga',
+        'yodobashi',
+        'yandex',
+        'yamaxun',
+        'yahoo',
+        'yachts',
+        'xyz',
+        'xxx',
+        'xperia',
+        'xin',
+        'xihuan',
+        'xfinity',
+        'xerox',
+        'xbox',
+        'wtf',
+        'wtc',
+        'wow',
+        'world',
+        'works',
+        'work',
+        'woodside',
+        'wolterskluwer',
+        'wme',
+        'winners',
+        'wine',
+        'windows',
+        'win',
+        'williamhill',
+        'wiki',
+        'wien',
+        'whoswho',
+        'weir',
+        'weibo',
+        'wedding',
+        'wed',
+        'website',
+        'weber',
+        'webcam',
+        'weatherchannel',
+        'weather',
+        'watches',
+        'watch',
+        'warman',
+        'wanggou',
+        'wang',
+        'walter',
+        'walmart',
+        'wales',
+        'vuelos',
+        'voyage',
+        'voto',
+        'voting',
+        'vote',
+        'volvo',
+        'volkswagen',
+        'vodka',
+        'vlaanderen',
+        'vivo',
+        'viva',
+        'vistaprint',
+        'vista',
+        'vision',
+        'visa',
+        'virgin',
+        'vip',
+        'vin',
+        'villas',
+        'viking',
+        'vig',
+        'video',
+        'viajes',
+        'vet',
+        'versicherung',
+        'vermögensberatung',
+        'vermögensberater',
+        'verisign',
+        'ventures',
+        'vegas',
+        'vanguard',
+        'vana',
+        'vacations',
+        'ups',
+        'uol',
+        'uno',
+        'university',
+        'unicom',
+        'uconnect',
+        'ubs',
+        'ubank',
+        'tvs',
+        'tushu',
+        'tunes',
+        'tui',
+        'tube',
+        'trv',
+        'trust',
+        'travelersinsurance',
+        'travelers',
+        'travelchannel',
+        'travel',
+        'training',
+        'trading',
+        'trade',
+        'toys',
+        'toyota',
+        'town',
+        'tours',
+        'total',
+        'toshiba',
+        'toray',
+        'top',
+        'tools',
+        'tokyo',
+        'today',
+        'tmall',
+        'tkmaxx',
+        'tjx',
+        'tjmaxx',
+        'tirol',
+        'tires',
+        'tips',
+        'tiffany',
+        'tienda',
+        'tickets',
+        'tiaa',
+        'theatre',
+        'theater',
+        'thd',
+        'teva',
+        'tennis',
+        'temasek',
+        'telefonica',
+        'telecity',
+        'tel',
+        'technology',
+        'tech',
+        'team',
+        'tdk',
+        'tci',
+        'taxi',
+        'tax',
+        'tattoo',
+        'tatar',
+        'tatamotors',
+        'target',
+        'taobao',
+        'talk',
+        'taipei',
+        'tab',
+        'systems',
+        'symantec',
+        'sydney',
+        'swiss',
+        'swiftcover',
+        'swatch',
+        'suzuki',
+        'surgery',
+        'surf',
+        'support',
+        'supply',
+        'supplies',
+        'sucks',
+        'style',
+        'study',
+        'studio',
+        'stream',
+        'store',
+        'storage',
+        'stockholm',
+        'stcgroup',
+        'stc',
+        'statoil',
+        'statefarm',
+        'statebank',
+        'starhub',
+        'star',
+        'staples',
+        'stada',
+        'srt',
+        'srl',
+        'spreadbetting',
+        'spot',
+        'spiegel',
+        'space',
+        'soy',
+        'sony',
+        'song',
+        'solutions',
+        'solar',
+        'sohu',
+        'software',
+        'softbank',
+        'social',
+        'soccer',
+        'sncf',
+        'smile',
+        'smart',
+        'sling',
+        'skype',
+        'sky',
+        'skin',
+        'ski',
+        'site',
+        'singles',
+        'sina',
+        'silk',
+        'shriram',
+        'showtime',
+        'show',
+        'shouji',
+        'shopping',
+        'shop',
+        'shoes',
+        'shiksha',
+        'shia',
+        'shell',
+        'shaw',
+        'sharp',
+        'shangrila',
+        'sfr',
+        'sexy',
+        'sex',
+        'sew',
+        'seven',
+        'ses',
+        'services',
+        'sener',
+        'select',
+        'seek',
+        'security',
+        'secure',
+        'seat',
+        'search',
+        'scot',
+        'scor',
+        'scjohnson',
+        'science',
+        'schwarz',
+        'schule',
+        'school',
+        'scholarships',
+        'schmidt',
+        'schaeffler',
+        'scb',
+        'sca',
+        'sbs',
+        'sbi',
+        'saxo',
+        'save',
+        'sas',
+        'sarl',
+        'sapo',
+        'sap',
+        'sanofi',
+        'sandvikcoromant',
+        'sandvik',
+        'samsung',
+        'samsclub',
+        'salon',
+        'sale',
+        'sakura',
+        'safety',
+        'safe',
+        'saarland',
+        'ryukyu',
+        'rwe',
+        'run',
+        'ruhr',
+        'rugby',
+        'rsvp',
+        'room',
+        'rogers',
+        'rodeo',
+        'rocks',
+        'rocher',
+        'rmit',
+        'rip',
+        'rio',
+        'ril',
+        'rightathome',
+        'ricoh',
+        'richardli',
+        'rich',
+        'rexroth',
+        'reviews',
+        'review',
+        'restaurant',
+        'rest',
+        'republican',
+        'report',
+        'repair',
+        'rentals',
+        'rent',
+        'ren',
+        'reliance',
+        'reit',
+        'reisen',
+        'reise',
+        'rehab',
+        'redumbrella',
+        'redstone',
+        'red',
+        'recipes',
+        'realty',
+        'realtor',
+        'realestate',
+        'read',
+        'raid',
+        'radio',
+        'racing',
+        'qvc',
+        'quest',
+        'quebec',
+        'qpon',
+        'pwc',
+        'pub',
+        'prudential',
+        'pru',
+        'protection',
+        'property',
+        'properties',
+        'promo',
+        'progressive',
+        'prof',
+        'productions',
+        'prod',
+        'pro',
+        'prime',
+        'press',
+        'praxi',
+        'pramerica',
+        'post',
+        'porn',
+        'politie',
+        'poker',
+        'pohl',
+        'pnc',
+        'plus',
+        'plumbing',
+        'playstation',
+        'play',
+        'place',
+        'pizza',
+        'pioneer',
+        'pink',
+        'ping',
+        'pin',
+        'pid',
+        'pictures',
+        'pictet',
+        'pics',
+        'piaget',
+        'physio',
+        'photos',
+        'photography',
+        'photo',
+        'phone',
+        'philips',
+        'phd',
+        'pharmacy',
+        'pfizer',
+        'pet',
+        'pccw',
+        'pay',
+        'passagens',
+        'party',
+        'parts',
+        'partners',
+        'pars',
+        'paris',
+        'panerai',
+        'panasonic',
+        'pamperedchef',
+        'page',
+        'ovh',
+        'ott',
+        'otsuka',
+        'osaka',
+        'origins',
+        'orientexpress',
+        'organic',
+        'org',
+        'orange',
+        'oracle',
+        'open',
+        'ooo',
+        'onyourside',
+        'online',
+        'onl',
+        'ong',
+        'one',
+        'omega',
+        'ollo',
+        'oldnavy',
+        'olayangroup',
+        'olayan',
+        'okinawa',
+        'office',
+        'off',
+        'observer',
+        'obi',
+        'nyc',
+        'ntt',
+        'nrw',
+        'nra',
+        'nowtv',
+        'nowruz',
+        'now',
+        'norton',
+        'northwesternmutual',
+        'nokia',
+        'nissay',
+        'nissan',
+        'ninja',
+        'nikon',
+        'nike',
+        'nico',
+        'nhk',
+        'ngo',
+        'nfl',
+        'nexus',
+        'nextdirect',
+        'next',
+        'news',
+        'newholland',
+        'new',
+        'neustar',
+        'network',
+        'netflix',
+        'netbank',
+        'net',
+        'nec',
+        'nba',
+        'navy',
+        'natura',
+        'nationwide',
+        'name',
+        'nagoya',
+        'nadex',
+        'nab',
+        'mutuelle',
+        'mutual',
+        'museum',
+        'mtr',
+        'mtpc',
+        'mtn',
+        'msd',
+        'movistar',
+        'movie',
+        'mov',
+        'motorcycles',
+        'moto',
+        'moscow',
+        'mortgage',
+        'mormon',
+        'mopar',
+        'montblanc',
+        'monster',
+        'money',
+        'monash',
+        'mom',
+        'moi',
+        'moe',
+        'moda',
+        'mobily',
+        'mobile',
+        'mobi',
+        'mma',
+        'mls',
+        'mlb',
+        'mitsubishi',
+        'mit',
+        'mint',
+        'mini',
+        'mil',
+        'microsoft',
+        'miami',
+        'metlife',
+        'merckmsd',
+        'meo',
+        'menu',
+        'men',
+        'memorial',
+        'meme',
+        'melbourne',
+        'meet',
+        'media',
+        'med',
+        'mckinsey',
+        'mcdonalds',
+        'mcd',
+        'mba',
+        'mattel',
+        'maserati',
+        'marshalls',
+        'marriott',
+        'markets',
+        'marketing',
+        'market',
+        'map',
+        'mango',
+        'management',
+        'man',
+        'makeup',
+        'maison',
+        'maif',
+        'madrid',
+        'macys',
+        'luxury',
+        'luxe',
+        'lupin',
+        'lundbeck',
+        'ltda',
+        'ltd',
+        'lplfinancial',
+        'lpl',
+        'love',
+        'lotto',
+        'lotte',
+        'london',
+        'lol',
+        'loft',
+        'locus',
+        'locker',
+        'loans',
+        'loan',
+        'lixil',
+        'living',
+        'live',
+        'lipsy',
+        'link',
+        'linde',
+        'lincoln',
+        'limo',
+        'limited',
+        'lilly',
+        'like',
+        'lighting',
+        'lifestyle',
+        'lifeinsurance',
+        'life',
+        'lidl',
+        'liaison',
+        'lgbt',
+        'lexus',
+        'lego',
+        'legal',
+        'lefrak',
+        'leclerc',
+        'lease',
+        'lds',
+        'lawyer',
+        'law',
+        'latrobe',
+        'latino',
+        'lat',
+        'lasalle',
+        'lanxess',
+        'landrover',
+        'land',
+        'lancome',
+        'lancia',
+        'lancaster',
+        'lamer',
+        'lamborghini',
+        'ladbrokes',
+        'lacaixa',
+        'kyoto',
+        'kuokgroup',
+        'kred',
+        'krd',
+        'kpn',
+        'kpmg',
+        'kosher',
+        'komatsu',
+        'koeln',
+        'kiwi',
+        'kitchen',
+        'kindle',
+        'kinder',
+        'kim',
+        'kia',
+        'kfh',
+        'kerryproperties',
+        'kerrylogistics',
+        'kerryhotels',
+        'kddi',
+        'kaufen',
+        'juniper',
+        'juegos',
+        'jprs',
+        'jpmorgan',
+        'joy',
+        'jot',
+        'joburg',
+        'jobs',
+        'jnj',
+        'jmp',
+        'jll',
+        'jlc',
+        'jio',
+        'jewelry',
+        'jetzt',
+        'jeep',
+        'jcp',
+        'jcb',
+        'java',
+        'jaguar',
+        'iwc',
+        'iveco',
+        'itv',
+        'itau',
+        'istanbul',
+        'ist',
+        'ismaili',
+        'iselect',
+        'irish',
+        'ipiranga',
+        'investments',
+        'intuit',
+        'international',
+        'intel',
+        'int',
+        'insure',
+        'insurance',
+        'institute',
+        'ink',
+        'ing',
+        'info',
+        'infiniti',
+        'industries',
+        'immobilien',
+        'immo',
+        'imdb',
+        'imamat',
+        'ikano',
+        'iinet',
+        'ifm',
+        'ieee',
+        'icu',
+        'ice',
+        'icbc',
+        'ibm',
+        'hyundai',
+        'hyatt',
+        'hughes',
+        'htc',
+        'hsbc',
+        'how',
+        'house',
+        'hotmail',
+        'hotels',
+        'hoteles',
+        'hot',
+        'hosting',
+        'host',
+        'hospital',
+        'horse',
+        'honeywell',
+        'honda',
+        'homesense',
+        'homes',
+        'homegoods',
+        'homedepot',
+        'holiday',
+        'holdings',
+        'hockey',
+        'hkt',
+        'hiv',
+        'hitachi',
+        'hisamitsu',
+        'hiphop',
+        'hgtv',
+        'hermes',
+        'here',
+        'helsinki',
+        'help',
+        'healthcare',
+        'health',
+        'hdfcbank',
+        'hdfc',
+        'hbo',
+        'haus',
+        'hangout',
+        'hamburg',
+        'hair',
+        'guru',
+        'guitars',
+        'guide',
+        'guge',
+        'gucci',
+        'guardian',
+        'group',
+        'grocery',
+        'gripe',
+        'green',
+        'gratis',
+        'graphics',
+        'grainger',
+        'gov',
+        'got',
+        'gop',
+        'google',
+        'goog',
+        'goodyear',
+        'goodhands',
+        'goo',
+        'golf',
+        'goldpoint',
+        'gold',
+        'godaddy',
+        'gmx',
+        'gmo',
+        'gmbh',
+        'gmail',
+        'globo',
+        'global',
+        'gle',
+        'glass',
+        'glade',
+        'giving',
+        'gives',
+        'gifts',
+        'gift',
+        'ggee',
+        'george',
+        'genting',
+        'gent',
+        'gea',
+        'gdn',
+        'gbiz',
+        'garden',
+        'gap',
+        'games',
+        'game',
+        'gallup',
+        'gallo',
+        'gallery',
+        'gal',
+        'fyi',
+        'futbol',
+        'furniture',
+        'fund',
+        'fun',
+        'fujixerox',
+        'fujitsu',
+        'ftr',
+        'frontier',
+        'frontdoor',
+        'frogans',
+        'frl',
+        'fresenius',
+        'free',
+        'fox',
+        'foundation',
+        'forum',
+        'forsale',
+        'forex',
+        'ford',
+        'football',
+        'foodnetwork',
+        'food',
+        'foo',
+        'fly',
+        'flsmidth',
+        'flowers',
+        'florist',
+        'flir',
+        'flights',
+        'flickr',
+        'fitness',
+        'fit',
+        'fishing',
+        'fish',
+        'firmdale',
+        'firestone',
+        'fire',
+        'financial',
+        'finance',
+        'final',
+        'film',
+        'fido',
+        'fidelity',
+        'fiat',
+        'ferrero',
+        'ferrari',
+        'feedback',
+        'fedex',
+        'fast',
+        'fashion',
+        'farmers',
+        'farm',
+        'fans',
+        'fan',
+        'family',
+        'faith',
+        'fairwinds',
+        'fail',
+        'fage',
+        'extraspace',
+        'express',
+        'exposed',
+        'expert',
+        'exchange',
+        'everbank',
+        'events',
+        'eus',
+        'eurovision',
+        'etisalat',
+        'esurance',
+        'estate',
+        'esq',
+        'erni',
+        'ericsson',
+        'equipment',
+        'epson',
+        'epost',
+        'enterprises',
+        'engineering',
+        'engineer',
+        'energy',
+        'emerck',
+        'email',
+        'education',
+        'edu',
+        'edeka',
+        'eco',
+        'eat',
+        'earth',
+        'dvr',
+        'dvag',
+        'durban',
+        'dupont',
+        'duns',
+        'dunlop',
+        'duck',
+        'dubai',
+        'dtv',
+        'drive',
+        'download',
+        'dot',
+        'doosan',
+        'domains',
+        'doha',
+        'dog',
+        'dodge',
+        'doctor',
+        'docs',
+        'dnp',
+        'diy',
+        'dish',
+        'discover',
+        'discount',
+        'directory',
+        'direct',
+        'digital',
+        'diet',
+        'diamonds',
+        'dhl',
+        'dev',
+        'design',
+        'desi',
+        'dentist',
+        'dental',
+        'democrat',
+        'delta',
+        'deloitte',
+        'dell',
+        'delivery',
+        'degree',
+        'deals',
+        'dealer',
+        'deal',
+        'dds',
+        'dclk',
+        'day',
+        'datsun',
+        'dating',
+        'date',
+        'data',
+        'dance',
+        'dad',
+        'dabur',
+        'cyou',
+        'cymru',
+        'cuisinella',
+        'csc',
+        'cruises',
+        'cruise',
+        'crs',
+        'crown',
+        'cricket',
+        'creditunion',
+        'creditcard',
+        'credit',
+        'courses',
+        'coupons',
+        'coupon',
+        'country',
+        'corsica',
+        'coop',
+        'cool',
+        'cookingchannel',
+        'cooking',
+        'contractors',
+        'contact',
+        'consulting',
+        'construction',
+        'condos',
+        'comsec',
+        'computer',
+        'compare',
+        'company',
+        'community',
+        'commbank',
+        'comcast',
+        'com',
+        'cologne',
+        'college',
+        'coffee',
+        'codes',
+        'coach',
+        'clubmed',
+        'club',
+        'cloud',
+        'clothing',
+        'clinique',
+        'clinic',
+        'click',
+        'cleaning',
+        'claims',
+        'cityeats',
+        'city',
+        'citic',
+        'citi',
+        'citadel',
+        'cisco',
+        'circle',
+        'cipriani',
+        'church',
+        'chrysler',
+        'chrome',
+        'christmas',
+        'chloe',
+        'chintai',
+        'cheap',
+        'chat',
+        'chase',
+        'channel',
+        'chanel',
+        'cfd',
+        'cfa',
+        'cern',
+        'ceo',
+        'center',
+        'ceb',
+        'cbs',
+        'cbre',
+        'cbn',
+        'cba',
+        'catholic',
+        'catering',
+        'cat',
+        'casino',
+        'cash',
+        'caseih',
+        'case',
+        'casa',
+        'cartier',
+        'cars',
+        'careers',
+        'career',
+        'care',
+        'cards',
+        'caravan',
+        'car',
+        'capitalone',
+        'capital',
+        'capetown',
+        'canon',
+        'cancerresearch',
+        'camp',
+        'camera',
+        'cam',
+        'calvinklein',
+        'call',
+        'cal',
+        'cafe',
+        'cab',
+        'bzh',
+        'buzz',
+        'buy',
+        'business',
+        'builders',
+        'build',
+        'bugatti',
+        'budapest',
+        'brussels',
+        'brother',
+        'broker',
+        'broadway',
+        'bridgestone',
+        'bradesco',
+        'box',
+        'boutique',
+        'bot',
+        'boston',
+        'bostik',
+        'bosch',
+        'boots',
+        'booking',
+        'book',
+        'boo',
+        'bond',
+        'bom',
+        'bofa',
+        'boehringer',
+        'boats',
+        'bnpparibas',
+        'bnl',
+        'bmw',
+        'bms',
+        'blue',
+        'bloomberg',
+        'blog',
+        'blockbuster',
+        'blanco',
+        'blackfriday',
+        'black',
+        'biz',
+        'bio',
+        'bingo',
+        'bing',
+        'bike',
+        'bid',
+        'bible',
+        'bharti',
+        'bet',
+        'bestbuy',
+        'best',
+        'berlin',
+        'bentley',
+        'beer',
+        'beauty',
+        'beats',
+        'bcn',
+        'bcg',
+        'bbva',
+        'bbt',
+        'bbc',
+        'bayern',
+        'bauhaus',
+        'basketball',
+        'baseball',
+        'bargains',
+        'barefoot',
+        'barclays',
+        'barclaycard',
+        'barcelona',
+        'bar',
+        'bank',
+        'band',
+        'bananarepublic',
+        'banamex',
+        'baidu',
+        'baby',
+        'azure',
+        'axa',
+        'aws',
+        'avianca',
+        'autos',
+        'auto',
+        'author',
+        'auspost',
+        'audio',
+        'audible',
+        'audi',
+        'auction',
+        'attorney',
+        'athleta',
+        'associates',
+        'asia',
+        'asda',
+        'arte',
+        'art',
+        'arpa',
+        'army',
+        'archi',
+        'aramco',
+        'arab',
+        'aquarelle',
+        'apple',
+        'app',
+        'apartments',
+        'aol',
+        'anz',
+        'anquan',
+        'android',
+        'analytics',
+        'amsterdam',
+        'amica',
+        'amfam',
+        'amex',
+        'americanfamily',
+        'americanexpress',
+        'alstom',
+        'alsace',
+        'ally',
+        'allstate',
+        'allfinanz',
+        'alipay',
+        'alibaba',
+        'alfaromeo',
+        'akdn',
+        'airtel',
+        'airforce',
+        'airbus',
+        'aigo',
+        'aig',
+        'agency',
+        'agakhan',
+        'africa',
+        'afl',
+        'afamilycompany',
+        'aetna',
+        'aero',
+        'aeg',
+        'adult',
+        'ads',
+        'adac',
+        'actor',
+        'active',
+        'aco',
+        'accountants',
+        'accountant',
+        'accenture',
+        'academy',
+        'abudhabi',
+        'abogado',
+        'able',
+        'abc',
+        'abbvie',
+        'abbott',
+        'abb',
+        'abarth',
+        'aarp',
+        'aaa',
+        'onion',
+    );
+
+    /**
+     * gTLDs
+     *
+     * @var array
+     */
+    private static $ccTLDs = array(
+        '한국',
+        '香港',
+        '澳門',
+        '新加坡',
+        '台灣',
+        '台湾',
+        '中國',
+        '中国',
+        'გე',
+        'ไทย',
+        'ලංකා',
+        'ഭാരതം',
+        'ಭಾರತ',
+        'భారత్',
+        'சிங்கப்பூர்',
+        'இலங்கை',
+        'இந்தியா',
+        'ଭାରତ',
+        'ભારત',
+        'ਭਾਰਤ',
+        'ভাৰত',
+        'ভারত',
+        'বাংলা',
+        'भारोत',
+        'भारतम्',
+        'भारत',
+        'ڀارت',
+        'پاکستان',
+        'موريتانيا',
+        'مليسيا',
+        'مصر',
+        'قطر',
+        'فلسطين',
+        'عمان',
+        'عراق',
+        'سورية',
+        'سودان',
+        'تونس',
+        'بھارت',
+        'بارت',
+        'ایران',
+        'امارات',
+        'المغرب',
+        'السعودية',
+        'الجزائر',
+        'الاردن',
+        'հայ',
+        'қаз',
+        'укр',
+        'срб',
+        'рф',
+        'мон',
+        'мкд',
+        'ею',
+        'бел',
+        'бг',
+        'ελ',
+        'zw',
+        'zm',
+        'za',
+        'yt',
+        'ye',
+        'ws',
+        'wf',
+        'vu',
+        'vn',
+        'vi',
+        'vg',
+        've',
+        'vc',
+        'va',
+        'uz',
+        'uy',
+        'us',
+        'um',
+        'uk',
+        'ug',
+        'ua',
+        'tz',
+        'tw',
+        'tv',
+        'tt',
+        'tr',
+        'tp',
+        'to',
+        'tn',
+        'tm',
+        'tl',
+        'tk',
+        'tj',
+        'th',
+        'tg',
+        'tf',
+        'td',
+        'tc',
+        'sz',
+        'sy',
+        'sx',
+        'sv',
+        'su',
+        'st',
+        'ss',
+        'sr',
+        'so',
+        'sn',
+        'sm',
+        'sl',
+        'sk',
+        'sj',
+        'si',
+        'sh',
+        'sg',
+        'se',
+        'sd',
+        'sc',
+        'sb',
+        'sa',
+        'rw',
+        'ru',
+        'rs',
+        'ro',
+        're',
+        'qa',
+        'py',
+        'pw',
+        'pt',
+        'ps',
+        'pr',
+        'pn',
+        'pm',
+        'pl',
+        'pk',
+        'ph',
+        'pg',
+        'pf',
+        'pe',
+        'pa',
+        'om',
+        'nz',
+        'nu',
+        'nr',
+        'np',
+        'no',
+        'nl',
+        'ni',
+        'ng',
+        'nf',
+        'ne',
+        'nc',
+        'na',
+        'mz',
+        'my',
+        'mx',
+        'mw',
+        'mv',
+        'mu',
+        'mt',
+        'ms',
+        'mr',
+        'mq',
+        'mp',
+        'mo',
+        'mn',
+        'mm',
+        'ml',
+        'mk',
+        'mh',
+        'mg',
+        'mf',
+        'me',
+        'md',
+        'mc',
+        'ma',
+        'ly',
+        'lv',
+        'lu',
+        'lt',
+        'ls',
+        'lr',
+        'lk',
+        'li',
+        'lc',
+        'lb',
+        'la',
+        'kz',
+        'ky',
+        'kw',
+        'kr',
+        'kp',
+        'kn',
+        'km',
+        'ki',
+        'kh',
+        'kg',
+        'ke',
+        'jp',
+        'jo',
+        'jm',
+        'je',
+        'it',
+        'is',
+        'ir',
+        'iq',
+        'io',
+        'in',
+        'im',
+        'il',
+        'ie',
+        'id',
+        'hu',
+        'ht',
+        'hr',
+        'hn',
+        'hm',
+        'hk',
+        'gy',
+        'gw',
+        'gu',
+        'gt',
+        'gs',
+        'gr',
+        'gq',
+        'gp',
+        'gn',
+        'gm',
+        'gl',
+        'gi',
+        'gh',
+        'gg',
+        'gf',
+        'ge',
+        'gd',
+        'gb',
+        'ga',
+        'fr',
+        'fo',
+        'fm',
+        'fk',
+        'fj',
+        'fi',
+        'eu',
+        'et',
+        'es',
+        'er',
+        'eh',
+        'eg',
+        'ee',
+        'ec',
+        'dz',
+        'do',
+        'dm',
+        'dk',
+        'dj',
+        'de',
+        'cz',
+        'cy',
+        'cx',
+        'cw',
+        'cv',
+        'cu',
+        'cr',
+        'co',
+        'cn',
+        'cm',
+        'cl',
+        'ck',
+        'ci',
+        'ch',
+        'cg',
+        'cf',
+        'cd',
+        'cc',
+        'ca',
+        'bz',
+        'by',
+        'bw',
+        'bv',
+        'bt',
+        'bs',
+        'br',
+        'bq',
+        'bo',
+        'bn',
+        'bm',
+        'bl',
+        'bj',
+        'bi',
+        'bh',
+        'bg',
+        'bf',
+        'be',
+        'bd',
+        'bb',
+        'ba',
+        'az',
+        'ax',
+        'aw',
+        'au',
+        'at',
+        'as',
+        'ar',
+        'aq',
+        'ao',
+        'an',
+        'am',
+        'al',
+        'ai',
+        'ag',
+        'af',
+        'ae',
+        'ad',
+        'ac',
+    );
+
+    /**
+     * get valid gTLD regexp
+     *
+     * @staticvar string $regex
+     * @return string
+     */
+    final public static function getValidGTLD()
+    {
+        static $regex;
+
+        if (!empty($regex)) {
+            return $regex;
+        }
+
+        $gTLD = implode('|', static::$gTLDs);
+        $regex = '(?:(?:' . $gTLD . ')(?=[^0-9a-z@]|$))';
+
+        return $regex;
+    }
+
+    /**
+     * get valid ccTLD regexp
+     *
+     * @staticvar string $regex
+     * @return string
+     */
+    final public static function getValidCcTLD()
+    {
+        static $regex;
+
+        if (!empty($regex)) {
+            return $regex;
+        }
+
+        $ccTLD = implode('|', static::$ccTLDs);
+        $regex = '(?:(?:' . $ccTLD . ')(?=[^0-9a-z@]|$))';
+
+        return $regex;
+    }
+}
diff --git a/tests/Twitter/Text/TldListsTest.php b/tests/Twitter/Text/TldListsTest.php
new file mode 100644
index 0000000..235bcbd
--- /dev/null
+++ b/tests/Twitter/Text/TldListsTest.php
@@ -0,0 +1,39 @@
+<?php
+
+namespace Twitter\Text;
+
+use PHPUnit_Framework_TestCase;
+use Twitter\Text\TldLists;
+
+/**
+ * Test for TldLists
+ */
+class TldListsTest extends PHPUnit_Framework_TestCase
+{
+
+    /**
+     * @covers Twitter\Text\TldLists::getValidGTLD
+     */
+    public function testGetValidGTLD()
+    {
+        $regexp = TldLists::getValidGTLD();
+        $this->assertStringStartsWith('(?:(?:삼성|닷컴|', $regexp);
+        $this->assertStringEndsWith('|aaa|onion)(?=[^0-9a-z@]|$))', $regexp);
+
+        $regexpCached = TldLists::getValidGTLD();
+        $this->assertSame($regexp, $regexpCached);
+    }
+
+    /**
+     * @covers Twitter\Text\TldLists::getValidCcTLD
+     */
+    public function testGetValidCcTLD()
+    {
+        $regexp = TldLists::getValidCcTLD();
+        $this->assertStringStartsWith('(?:(?:한국|香港|', $regexp);
+        $this->assertStringEndsWith('|ad|ac)(?=[^0-9a-z@]|$))', $regexp);
+
+        $regexpCached = TldLists::getValidCcTLD();
+        $this->assertSame($regexp, $regexpCached);
+    }
+}

From 4d4a9759e8a6bb4738c5c337a3ca0468ecfc2c4f Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Mon, 18 Dec 2017 20:42:05 +0900
Subject: [PATCH 02/10] replace rtl_chars matcher to static method

---
 lib/Twitter/Text/Autolink.php      |  2 +-
 lib/Twitter/Text/LooseAutolink.php |  2 +-
 lib/Twitter/Text/Regex.php         | 46 +++++++++++++++++++++---------
 tests/Twitter/Text/RegexTest.php   | 25 ++++++++++++++++
 4 files changed, 59 insertions(+), 16 deletions(-)
 create mode 100644 tests/Twitter/Text/RegexTest.php

diff --git a/lib/Twitter/Text/Autolink.php b/lib/Twitter/Text/Autolink.php
index bff6e14..b25fe49 100644
--- a/lib/Twitter/Text/Autolink.php
+++ b/lib/Twitter/Text/Autolink.php
@@ -650,7 +650,7 @@ public function linkToHashtag($entity, $tweet = null)
         if (!empty($this->class_hash)) {
             $class[] = $this->class_hash;
         }
-        if (preg_match(self::$patterns['rtl_chars'], $linkText)) {
+        if (preg_match(Regex::getRtlCharsMatcher(), $linkText)) {
             $class[] = 'rtl';
         }
         if (!empty($class)) {
diff --git a/lib/Twitter/Text/LooseAutolink.php b/lib/Twitter/Text/LooseAutolink.php
index cc4905c..15cc2bd 100644
--- a/lib/Twitter/Text/LooseAutolink.php
+++ b/lib/Twitter/Text/LooseAutolink.php
@@ -269,7 +269,7 @@ protected function _addLinksToHashtags($matches)
         $element = $hash . $tag;
         $url = $this->url_base_hash . $tag;
         $class_hash = $this->class_hash;
-        if (preg_match(self::$patterns['rtl_chars'], $element)) {
+        if (preg_match(Regex::getRtlCharsMatcher(), $element)) {
             $class_hash .= ' rtl';
         }
         $replacement .= $this->wrapHash($url, $class_hash, $element);
diff --git a/lib/Twitter/Text/Regex.php b/lib/Twitter/Text/Regex.php
index 83c88ab..c03ea17 100644
--- a/lib/Twitter/Text/Regex.php
+++ b/lib/Twitter/Text/Regex.php
@@ -27,7 +27,7 @@
  * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
  * @package    Twitter
  */
-abstract class Regex
+class Regex
 {
 
     /**
@@ -45,6 +45,20 @@ abstract class Regex
      */
     protected $tweet = '';
 
+    /**
+     * Expression to match RTL characters.
+     *
+     * 0x0600-0x06FF Arabic
+     * 0x0750-0x077F Arabic Supplement
+     * 0x08A0-0x08FF Arabic Extended-A
+     * 0x0590-0x05FF Hebrew
+     * 0xFB50-0xFDFF Arabic Presentation Forms-A
+     * 0xFE70-0xFEFF Arabic Presentation Forms-B
+     *
+     * @var string
+     */
+    private static $rtlChars = '\x{0600}-\x{06ff}\x{0750}-\x{077f}\x{08a0}-\x{08ff}\x{0590}-\x{05ff}\x{fb50}-\x{fdff}\x{fe70}-\x{feff}';
+
     /**
      * This constructor is used to populate some variables.
      *
@@ -122,16 +136,6 @@ public static function __static()
         $tmp['latin_accents'] .= '\x{0100}-\x{024f}\x{0253}-\x{0254}\x{0256}-\x{0257}';
         $tmp['latin_accents'] .= '\x{0259}\x{025b}\x{0263}\x{0268}\x{026f}\x{0272}\x{0289}\x{028b}\x{02bb}\x{0300}-\x{036f}\x{1e00}-\x{1eff}';
 
-        # Expression to match RTL characters.
-        #
-        #   0x0600-0x06FF Arabic
-        #   0x0750-0x077F Arabic Supplement
-        #   0x08A0-0x08FF Arabic Extended-A
-        #   0x0590-0x05FF Hebrew
-        #   0xFB50-0xFDFF Arabic Presentation Forms-A
-        #   0xFE70-0xFEFF Arabic Presentation Forms-B
-        $tmp['rtl_chars'] = '\x{0600}-\x{06ff}\x{0750}-\x{077f}\x{08a0}-\x{08ff}\x{0590}-\x{05ff}\x{fb50}-\x{fdff}\x{fe70}-\x{feff}';
-
         $tmp['hashtag_letters'] = '\p{L}\p{M}';
         $tmp['hashtag_numerals'] = '\p{Nd}';
         # Hashtag special chars
@@ -329,12 +333,26 @@ public static function __static()
 
         $re['invalid_characters'] = '/[' . $tmp['invalid_characters'] . ']/u';
 
-        $re['rtl_chars'] = '/[' . $tmp['rtl_chars'] . ']/iu';
-
         # Flag that initialization is complete:
         $initialized = true;
     }
-}
 
+    /**
+     * Regexp to match RTL characters.
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getRtlCharsMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/[' . static::$rtlChars . ']/iu';
+        }
+
+        return $regexp;
+    }
+}
 # Cause regular expressions to be initialized as soon as this file is loaded:
 Regex::__static();
diff --git a/tests/Twitter/Text/RegexTest.php b/tests/Twitter/Text/RegexTest.php
new file mode 100644
index 0000000..afc9e58
--- /dev/null
+++ b/tests/Twitter/Text/RegexTest.php
@@ -0,0 +1,25 @@
+<?php
+
+namespace Twitter\Text;
+
+use Twitter\Text\Regex;
+
+/**
+ * test for Regex
+ */
+class RegexTest extends \PHPUnit_Framework_TestCase
+{
+
+    /**
+     * @covers Twitter\Text\Regex::getRtlCharsMatcher
+     */
+    public function testGetRtlCharsMatcher()
+    {
+        $matcher = Regex::getRtlCharsMatcher();
+        $this->assertStringStartsWith('/[', $matcher);
+        $this->assertStringEndsWith(']/iu', $matcher);
+
+        $matcherCached = Regex::getRtlCharsMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+}

From 86387a5e95acce291eeedde7c15cd4cae449c5a5 Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Mon, 18 Dec 2017 20:51:17 +0900
Subject: [PATCH 03/10] replase invalid_characters matcher to static method

---
 lib/Twitter/Text/Regex.php       | 42 +++++++++++++++++++++++---------
 lib/Twitter/Text/Validator.php   |  2 +-
 tests/Twitter/Text/RegexTest.php | 13 ++++++++++
 3 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/lib/Twitter/Text/Regex.php b/lib/Twitter/Text/Regex.php
index c03ea17..03d30ad 100644
--- a/lib/Twitter/Text/Regex.php
+++ b/lib/Twitter/Text/Regex.php
@@ -45,6 +45,15 @@ class Regex
      */
     protected $tweet = '';
 
+    /**
+     * Invalid Characters
+     *
+     * 0xFFFE,0xFEFF # BOM
+     * 0xFFFF        # Special
+     * 0x202A-0x202E # Directional change
+     */
+    private static $invalidCharacters = '\x{202a}-\x{202e}\x{feff}\x{fffe}\x{ffff}';
+
     /**
      * Expression to match RTL characters.
      *
@@ -100,12 +109,6 @@ public static function __static()
         #   0x3000         Zs # IDEOGRAPHIC SPACE
         $tmp['spaces'] = '\x{0009}-\x{000D}\x{0020}\x{0085}\x{00a0}\x{1680}\x{180E}\x{2000}-\x{200a}\x{2028}\x{2029}\x{202f}\x{205f}\x{3000}';
 
-        # Invalid Characters:
-        #   0xFFFE,0xFEFF # BOM
-        #   0xFFFF        # Special
-        #   0x202A-0x202E # Directional change
-        $tmp['invalid_characters'] = '\x{202a}-\x{202e}\x{feff}\x{fffe}\x{ffff}';
-
         # Expression to match at and hash sign characters:
         $tmp['at_signs'] = '@＠';
         $tmp['hash_signs'] = '#＃';
@@ -158,7 +161,7 @@ public static function __static()
         #   0x0f0c TIBETAN MARK DELIMITER TSHEG BSTAR
         #   0x00b7 MIDDLE DOT
         $tmp['hashtag_special_chars'] = '_\x{200c}\x{200d}\x{a67e}\x{05be}\x{05f3}\x{05f4}\x{ff5e}\x{301c}\x{309b}\x{309c}\x{30a0}\x{30fb}\x{3003}\x{0f0b}\x{0f0c}\x{00b7}';
-        $tmp['hashtag_letters_numerals_set'] = '[' . $tmp['hashtag_letters'] .  $tmp['hashtag_numerals'] . $tmp['hashtag_special_chars'] . ']';
+        $tmp['hashtag_letters_numerals_set'] = '[' . $tmp['hashtag_letters'] . $tmp['hashtag_numerals'] . $tmp['hashtag_special_chars'] . ']';
         $tmp['hashtag_letters_set'] = '[' . $tmp['hashtag_letters'] . ']';
         $tmp['hashtag_boundary'] = '(?:\A|\x{fe0e}|\x{fe0f}|[^&' . $tmp['hashtag_letters'] . $tmp['hashtag_numerals'] . $tmp['hashtag_special_chars'] . '])';
         $tmp['hashtag'] = '(' . $tmp['hashtag_boundary'] . ')(#|\x{ff03})(?!\x{fe0f}|\x{20e3})(' . $tmp['hashtag_letters_numerals_set'] . '*' . $tmp['hashtag_letters_set'] . $tmp['hashtag_letters_numerals_set'] . '*)';
@@ -176,12 +179,12 @@ public static function __static()
 
         # URL related hash regex collection
 
-        $tmp['valid_url_preceding_chars'] = '(?:[^A-Z0-9_@＠\$#＃' . $tmp['invalid_characters'] . ']|^)';
+        $tmp['valid_url_preceding_chars'] = '(?:[^A-Z0-9_@＠\$#＃' . static::$invalidCharacters . ']|^)';
 
         $tmp['domain_valid_chars'] = '0-9a-z' . $tmp['latin_accents'];
         $tmp['valid_subdomain'] = '(?>(?:[' . $tmp['domain_valid_chars'] . '][' . $tmp['domain_valid_chars'] . '\-_]*)?[' . $tmp['domain_valid_chars'] . ']\.)';
         $tmp['valid_domain_name'] = '(?:(?:[' . $tmp['domain_valid_chars'] . '][' . $tmp['domain_valid_chars'] . '\-]*)?[' . $tmp['domain_valid_chars'] . ']\.)';
-        $tmp['domain_valid_unicode_chars'] = '[^\p{P}\p{Z}\p{C}' . $tmp['invalid_characters'] . $tmp['spaces'] . ']';
+        $tmp['domain_valid_unicode_chars'] = '[^\p{P}\p{Z}\p{C}' . static::$invalidCharacters . $tmp['spaces'] . ']';
 
         $tmp['valid_gTLD'] = TldLists::getValidGTLD();
         $tmp['valid_ccTLD'] = TldLists::getValidCcTLD();
@@ -331,14 +334,29 @@ public static function __static()
             . '\#(.*)'           #  $5 Fragment
             . ')?$/iux';
 
-        $re['invalid_characters'] = '/[' . $tmp['invalid_characters'] . ']/u';
-
         # Flag that initialization is complete:
         $initialized = true;
     }
 
     /**
-     * Regexp to match RTL characters.
+     * Get invalid characters matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getInvalidCharactersMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/[' . static::$invalidCharacters . ']/u';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get RTL characters matcher
      *
      * @staticvar string $regexp
      * @return string
diff --git a/lib/Twitter/Text/Validator.php b/lib/Twitter/Text/Validator.php
index 650b206..429a887 100644
--- a/lib/Twitter/Text/Validator.php
+++ b/lib/Twitter/Text/Validator.php
@@ -178,7 +178,7 @@ public function isValidTweetText($tweet = null)
         if ($length > self::MAX_LENGTH) {
             return false;
         }
-        if (preg_match(self::$patterns['invalid_characters'], $tweet)) {
+        if (preg_match(Regex::getInvalidCharactersMatcher(), $tweet)) {
             return false;
         }
         return true;
diff --git a/tests/Twitter/Text/RegexTest.php b/tests/Twitter/Text/RegexTest.php
index afc9e58..201ea88 100644
--- a/tests/Twitter/Text/RegexTest.php
+++ b/tests/Twitter/Text/RegexTest.php
@@ -10,6 +10,19 @@
 class RegexTest extends \PHPUnit_Framework_TestCase
 {
 
+    /**
+     * @covers Twitter\Text\Regex::getInvalidCharactersMatcher
+     */
+    public function testGetInvalidCharactersMatcher()
+    {
+        $matcher = Regex::getInvalidCharactersMatcher();
+        $this->assertStringStartsWith('/[', $matcher);
+        $this->assertStringEndsWith(']/u', $matcher);
+
+        $matcherCached = Regex::getInvalidCharactersMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
     /**
      * @covers Twitter\Text\Regex::getRtlCharsMatcher
      */

From e547a2f100dc9c750bc2680ed57a0aa43f68b186 Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Mon, 18 Dec 2017 21:00:31 +0900
Subject: [PATCH 04/10] replace validate_url matchers to static method

---
 lib/Twitter/Text/Regex.php       | 298 ++++++++++++++++++++++++-------
 lib/Twitter/Text/Validator.php   |  21 ++-
 tests/Twitter/Text/RegexTest.php |  90 ++++++++++
 3 files changed, 341 insertions(+), 68 deletions(-)

diff --git a/lib/Twitter/Text/Regex.php b/lib/Twitter/Text/Regex.php
index 03d30ad..ffb9aef 100644
--- a/lib/Twitter/Text/Regex.php
+++ b/lib/Twitter/Text/Regex.php
@@ -68,6 +68,14 @@ class Regex
      */
     private static $rtlChars = '\x{0600}-\x{06ff}\x{0750}-\x{077f}\x{08a0}-\x{08ff}\x{0590}-\x{05ff}\x{fb50}-\x{fdff}\x{fe70}-\x{feff}';
 
+    # These URL validation pattern strings are based on the ABNF from RFC 3986
+    private static $validateUrlUnreserved = '[a-z\p{Cyrillic}0-9\-._~]';
+    private static $validateUrlPctEncoded = '(?:%[0-9a-f]{2})';
+    private static $validateUrlSubDelims = '[!$&\'()*+,;=]';
+    private static $validateUrlIpv4 = '(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3})';
+    private static $validateUrlIpv6 = '(?:\[[a-f0-9:\.]+\])';
+    private static $validateUrlPort = '[0-9]{1,5}';
+
     /**
      * This constructor is used to populate some variables.
      *
@@ -272,68 +280,6 @@ public static function __static()
         $re['valid_cashtag'] = '/(^|[' . $tmp['spaces'] . '])([' . $tmp['cash_signs'] . '])(' . $tmp['cashtag'] . ')(?=($|\s|[[:punct:]]))/iu';
         $re['end_cashtag_match'] = '/\A(?:[' . $tmp['cash_signs'] . ']|:\/\/)/u';
 
-        # These URL validation pattern strings are based on the ABNF from RFC 3986
-        $tmp['validate_url_unreserved'] = '[a-z\p{Cyrillic}0-9\-._~]';
-        $tmp['validate_url_pct_encoded'] = '(?:%[0-9a-f]{2})';
-        $tmp['validate_url_sub_delims'] = '[!$&\'()*+,;=]';
-        $tmp['validate_url_pchar'] = '(?:' . $tmp['validate_url_unreserved'] . '|' . $tmp['validate_url_pct_encoded'] . '|' . $tmp['validate_url_sub_delims'] . '|[:\|@])'; #/iox
-
-        $tmp['validate_url_userinfo'] = '(?:' . $tmp['validate_url_unreserved'] . '|' . $tmp['validate_url_pct_encoded'] . '|' . $tmp['validate_url_sub_delims'] . '|:)*'; #/iox
-
-        $tmp['validate_url_dec_octet'] = '(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'; #/i
-        $tmp['validate_url_ipv4'] = '(?:' . $tmp['validate_url_dec_octet'] . '(?:\.' . $tmp['validate_url_dec_octet'] . '){3})'; #/iox
-        # Punting on real IPv6 validation for now
-        $tmp['validate_url_ipv6'] = '(?:\[[a-f0-9:\.]+\])'; #/i
-        # Also punting on IPvFuture for now
-        $tmp['validate_url_ip'] = '(?:' . $tmp['validate_url_ipv4'] . '|' . $tmp['validate_url_ipv6'] . ')'; #/iox
-        # This is more strict than the rfc specifies
-        $tmp['validate_url_subdomain_segment'] = '(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)'; #/i
-        $tmp['validate_url_domain_segment'] = '(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)'; #/i
-        $tmp['validate_url_domain_tld'] = '(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)'; #/i
-        $tmp['validate_url_domain'] = '(?:(?:' . $tmp['validate_url_subdomain_segment'] . '\.)*(?:' . $tmp['validate_url_domain_segment'] . '\.)' . $tmp['validate_url_domain_tld'] . ')'; #/iox
-
-        $tmp['validate_url_host'] = '(?:' . $tmp['validate_url_ip'] . '|' . $tmp['validate_url_domain'] . ')'; #/iox
-        # Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences
-        $tmp['validate_url_unicode_subdomain_segment'] = '(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9_\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)'; #/ix
-        $tmp['validate_url_unicode_domain_segment'] = '(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)'; #/ix
-        $tmp['validate_url_unicode_domain_tld'] = '(?:(?:[a-z]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)'; #/ix
-        $tmp['validate_url_unicode_domain'] = '(?:(?:' . $tmp['validate_url_unicode_subdomain_segment'] . '\.)*(?:' . $tmp['validate_url_unicode_domain_segment'] . '\.)' . $tmp['validate_url_unicode_domain_tld'] . ')'; #/iox
-
-        $tmp['validate_url_unicode_host'] = '(?:' . $tmp['validate_url_ip'] . '|' . $tmp['validate_url_unicode_domain'] . ')'; #/iox
-
-        $tmp['validate_url_port'] = '[0-9]{1,5}';
-
-        $re['validate_url_unicode_authority'] = '/'
-            . '(?:(' . $tmp['validate_url_userinfo'] . ')@)?' #  $1 userinfo
-            . '(' . $tmp['validate_url_unicode_host'] . ')'   #  $2 host
-            . '(?::(' . $tmp['validate_url_port'] . '))?'     #  $3 port
-            . '/iux';
-
-        $re['validate_url_authority'] = '/'
-            . '(?:(' . $tmp['validate_url_userinfo'] . ')@)?' #  $1 userinfo
-            . '(' . $tmp['validate_url_host'] . ')'           #  $2 host
-            . '(?::(' . $tmp['validate_url_port'] . '))?'     #  $3 port
-            . '/ix';
-
-        $re['validate_url_scheme'] = '/(?:[a-z][a-z0-9+\-.]*)/i';
-        $re['validate_url_path'] = '/(\/' . $tmp['validate_url_pchar'] . '*)*/iu';
-        $re['validate_url_query'] = '/(' . $tmp['validate_url_pchar'] . '|\/|\?)*/iu';
-        $re['validate_url_fragment'] = '/(' . $tmp['validate_url_pchar'] . '|\/|\?)*/iu';
-
-        # Modified version of RFC 3986 Appendix B
-        $re['validate_url_unencoded'] = '/^' #  Full URL
-            . '(?:'
-            . '([^:\/?#]+):\/\/' #  $1 Scheme
-            . ')?'
-            . '([^\/?#]*)'       #  $2 Authority
-            . '([^?#]*)'         #  $3 Path
-            . '(?:'
-            . '\?([^#]*)'        #  $4 Query
-            . ')?'
-            . '(?:'
-            . '\#(.*)'           #  $5 Fragment
-            . ')?$/iux';
-
         # Flag that initialization is complete:
         $initialized = true;
     }
@@ -371,6 +317,234 @@ public static function getRtlCharsMatcher()
 
         return $regexp;
     }
+
+    /**
+     * Get url matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidateUrlUnencodedMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            # Modified version of RFC 3986 Appendix B
+            $regexp = '/\A' #  Full URL
+                . '(?:'
+                . '([^:\/?#]+):\/\/' #  $1 Scheme
+                . ')?'
+                . '([^\/?#]*)'       #  $2 Authority
+                . '([^?#]*)'         #  $3 Path
+                . '(?:'
+                . '\?([^#]*)'        #  $4 Query
+                . ')?'
+                . '(?:'
+                . '\#(.*)'           #  $5 Fragment
+                . ')?\z/iux';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get valid url ip
+     *
+     * @return string matcher
+     */
+    private static function getValidateUrlIp()
+    {
+        return '(?:' . static::$validateUrlIpv4 . '|' . static::$validateUrlIpv6 . ')'; #/iox
+    }
+
+    /**
+     * Get valid url domain
+     *
+     * @return string matcher
+     */
+    private static function getValidateUrlDomain()
+    {
+        $subdomain = '(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)'; #/i
+        $domain = '(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)'; #/i
+        $tld = '(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)'; #/i
+
+        return '(?:(?:' . $subdomain . '\.)*(?:' . $domain . '\.)' . $tld . ')'; #/iox
+    }
+
+    /**
+     * Get valid url host
+     *
+     * @return string matcher
+     */
+    private static function getValidateUrlHost()
+    {
+        return '(?:' . static::getValidateUrlIp() . '|' . static::getValidateUrlDomain() . ')'; #/iox
+    }
+
+    /**
+     * Get valid url unicode domain
+     *
+     * @return string matcher
+     */
+    private static function getValidateUrlUnicodeDomain()
+    {
+        $subdomain = '(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9_\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)'; #/ix
+        $domain = '(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)'; #/ix
+        $tld = '(?:(?:[a-z]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)'; #/ix
+
+        return '(?:(?:' . $subdomain . '\.)*(?:' . $domain . '\.)' . $tld . ')'; #/iox
+    }
+
+    /**
+     * Get valid url unicode host
+     *
+     * @return string matcher
+     */
+    private static function getValidateUrlUnicodeHost()
+    {
+        return '(?:' . static::getValidateUrlIp() . '|' . static::getValidateUrlUnicodeDomain() . ')'; #/iox
+    }
+
+    /**
+     * Get valid url userinfo
+     *
+     * @return string matcher
+     */
+    private static function getValidateUrlUserinfo()
+    {
+        return '(?:' . static::$validateUrlUnreserved
+            . '|' . static::$validateUrlPctEncoded
+            . '|' . static::$validateUrlSubDelims
+            . '|:)*'; #/iox
+    }
+
+    /**
+     * Get url unicode authority matcher
+     *
+     * Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidateUrlUnicodeAuthorityMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/'
+                    . '(?:(' . static::getValidateUrlUserinfo() . ')@)?' #  $1 userinfo
+                    . '(' . static::getValidateUrlUnicodeHost() . ')'    #  $2 host
+                    . '(?::(' . static::$validateUrlPort . '))?'         #  $3 port
+                    . '/iux';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get url authority matcher
+     *
+     * This is more strict than the rfc specifies
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidateUrlAuthorityMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/'
+                    . '(?:(' . static::getValidateUrlUserinfo() . ')@)?' #  $1 userinfo
+                    . '(' . static::getValidateUrlHost() . ')'           #  $2 host
+                    . '(?::(' . static::$validateUrlPort . '))?'         #  $3 port
+                    . '/ix';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get url scheme matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidateUrlSchemeMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/(?:[a-z][a-z0-9+\-.]*)/i';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get valid url charactors
+     *
+     * @return string matcher
+     */
+    private static function getValidateUrlPchar()
+    {
+        return '(?:' . static::$validateUrlUnreserved
+            . '|' . static::$validateUrlPctEncoded
+            . '|' . static::$validateUrlSubDelims
+            . '|[:\|@])'; #/iox
+    }
+
+    /**
+     * Get url path matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidateUrlPathMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/(\/' . static::getValidateUrlPchar() . '*)*/iu';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get url query matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidateUrlQueryMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/(' . static::getValidateUrlPchar() . '|\/|\?)*/iu';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get url flagment matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidateUrlFragmentMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/(' . static::getValidateUrlPchar() . '|\/|\?)*/iu';
+        }
+
+        return $regexp;
+    }
 }
+
 # Cause regular expressions to be initialized as soon as this file is loaded:
 Regex::__static();
diff --git a/lib/Twitter/Text/Validator.php b/lib/Twitter/Text/Validator.php
index 429a887..80f35cd 100644
--- a/lib/Twitter/Text/Validator.php
+++ b/lib/Twitter/Text/Validator.php
@@ -300,26 +300,35 @@ public function isValidURL($url = null, $unicode_domains = true, $require_protoc
         if (is_null($url)) {
             $url = $this->tweet;
         }
+
         $length = StringUtils::strlen($url);
         if (empty($url) || !$length) {
             return false;
         }
-        preg_match(self::$patterns['validate_url_unencoded'], $url, $matches);
+
+        preg_match(Regex::getValidateUrlUnencodedMatcher(), $url, $matches);
         $match = array_shift($matches);
         if (!$matches || $match !== $url) {
             return false;
         }
+
         list($scheme, $authority, $path, $query, $fragment) = array_pad($matches, 5, '');
+
         # Check scheme, path, query, fragment:
         if (($require_protocol && !(
-            self::isValidMatch($scheme, self::$patterns['validate_url_scheme']) && preg_match('/^https?$/i', $scheme))
-            ) || !self::isValidMatch($path, self::$patterns['validate_url_path']) || !self::isValidMatch($query, self::$patterns['validate_url_query'], true)
-            || !self::isValidMatch($fragment, self::$patterns['validate_url_fragment'], true)) {
+                self::isValidMatch($scheme, Regex::getValidateUrlSchemeMatcher())
+                && preg_match('/^https?$/i', $scheme)
+            ))
+            || !self::isValidMatch($path, Regex::getValidateUrlPathMatcher())
+            || !self::isValidMatch($query, Regex::getValidateUrlQueryMatcher(), true)
+            || !self::isValidMatch($fragment, Regex::getValidateUrlFragmentMatcher(), true)) {
             return false;
         }
+
         # Check authority:
-        $authority_pattern = $unicode_domains ? 'validate_url_unicode_authority' : 'validate_url_authority';
-        return self::isValidMatch($authority, self::$patterns[$authority_pattern]);
+        $authorityPattern = $unicode_domains ? Regex::getValidateUrlUnicodeAuthorityMatcher() : Regex::getValidateUrlAuthorityMatcher();
+
+        return self::isValidMatch($authority, $authorityPattern);
     }
 
     /**
diff --git a/tests/Twitter/Text/RegexTest.php b/tests/Twitter/Text/RegexTest.php
index 201ea88..6c9c7fe 100644
--- a/tests/Twitter/Text/RegexTest.php
+++ b/tests/Twitter/Text/RegexTest.php
@@ -35,4 +35,94 @@ public function testGetRtlCharsMatcher()
         $matcherCached = Regex::getRtlCharsMatcher();
         $this->assertSame($matcher, $matcherCached);
     }
+
+    /**
+     * @covers Twitter\Text\Regex::getValidateUrlUnencodedMatcher
+     */
+    public function testGetValidateUrlUnencodedMatcher()
+    {
+        $matcher = Regex::getValidateUrlUnencodedMatcher();
+        $this->assertStringStartsWith('/\A(?:', $matcher);
+        $this->assertStringEndsWith(')?\z/iux', $matcher);
+
+        $matcherCached = Regex::getValidateUrlUnencodedMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getValidateUrlUnicodeAuthorityMatcher
+     */
+    public function testGetValidateUrlUnicodeAuthorityMatcher()
+    {
+        $matcher = Regex::getValidateUrlUnicodeAuthorityMatcher();
+        $this->assertStringStartsWith('/(?:', $matcher);
+        $this->assertStringEndsWith(')?/iux', $matcher);
+
+        $matcherCached = Regex::getValidateUrlUnicodeAuthorityMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getValidateUrlAuthorityMatcher
+     */
+    public function testGetValidateUrlAuthorityMatcher()
+    {
+        $matcher = Regex::getValidateUrlAuthorityMatcher();
+        $this->assertStringStartsWith('/(?:', $matcher);
+        $this->assertStringEndsWith(')?/ix', $matcher);
+
+        $matcherCached = Regex::getValidateUrlAuthorityMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getValidateUrlSchemeMatcher
+     */
+    public function testGetValidateUrlSchemeMatcher()
+    {
+        $matcher = Regex::getValidateUrlSchemeMatcher();
+        $this->assertSame('/(?:[a-z][a-z0-9+\-.]*)/i', $matcher);
+
+        $matcherCached = Regex::getValidateUrlSchemeMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getValidateUrlPathMatcher
+     */
+    public function testGetValidateUrlPathMatcher()
+    {
+        $matcher = Regex::getValidateUrlPathMatcher();
+        $this->assertStringStartsWith('/(', $matcher);
+        $this->assertStringEndsWith(')*/iu', $matcher);
+
+        $matcherCached = Regex::getValidateUrlPathMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getValidateUrlQueryMatcher
+     */
+    public function testGetValidateUrlQueryMatcher()
+    {
+        $matcher = Regex::getValidateUrlQueryMatcher();
+        $this->assertStringStartsWith('/(', $matcher);
+        $this->assertStringEndsWith(')*/iu', $matcher);
+
+        $matcherCached = Regex::getValidateUrlQueryMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getValidateUrlFragmentMatcher
+     */
+    public function testGetValidateUrlFragmentMatcher()
+    {
+        $matcher = Regex::getValidateUrlFragmentMatcher();
+        $this->assertStringStartsWith('/(', $matcher);
+        $this->assertStringEndsWith(')*/iu', $matcher);
+
+        $matcherCached = Regex::getValidateUrlFragmentMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
 }

From 1e3373d4903d40aec3614db872ad108d152056a1 Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Tue, 19 Dec 2017 18:58:33 +0900
Subject: [PATCH 05/10] replace cashtag matchers to static method

---
 lib/Twitter/Text/Extractor.php     |   2 +-
 lib/Twitter/Text/LooseAutolink.php |   4 +-
 lib/Twitter/Text/Regex.php         | 100 ++++++++++++++++++++---------
 tests/Twitter/Text/RegexTest.php   |  26 ++++++++
 4 files changed, 98 insertions(+), 34 deletions(-)

diff --git a/lib/Twitter/Text/Extractor.php b/lib/Twitter/Text/Extractor.php
index 0b1c3af..8cbefda 100644
--- a/lib/Twitter/Text/Extractor.php
+++ b/lib/Twitter/Text/Extractor.php
@@ -296,7 +296,7 @@ public function extractCashtagsWithIndices($tweet = null)
             return array();
         }
 
-        preg_match_all(self::$patterns['valid_cashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+        preg_match_all(Regex::getValidCashtagMatcher(), $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
         $tags = array();
 
         foreach ($matches as $match) {
diff --git a/lib/Twitter/Text/LooseAutolink.php b/lib/Twitter/Text/LooseAutolink.php
index 15cc2bd..418f436 100644
--- a/lib/Twitter/Text/LooseAutolink.php
+++ b/lib/Twitter/Text/LooseAutolink.php
@@ -148,7 +148,7 @@ public function addLinksToHashtags()
     public function addLinksToCashtags()
     {
         return preg_replace_callback(
-            self::$patterns['valid_cashtag'],
+            Regex::getValidCashtagMatcher(),
             array($this, '_addLinksToCashtags'),
             $this->tweet
         );
@@ -286,7 +286,7 @@ protected function _addLinksToHashtags($matches)
     protected function _addLinksToCashtags($matches)
     {
         list($all, $before, $cash, $tag, $after) = array_pad($matches, 5, '');
-        if (preg_match(self::$patterns['end_cashtag_match'], $after)
+        if (preg_match(Regex::getEndCashtagMatcher(), $after)
             || (!preg_match('!\A["\']!', $before) && preg_match('!\A["\']!', $after)) || preg_match('!\A</!', $after)) {
             return $all;
         }
diff --git a/lib/Twitter/Text/Regex.php b/lib/Twitter/Text/Regex.php
index ffb9aef..33e7c91 100644
--- a/lib/Twitter/Text/Regex.php
+++ b/lib/Twitter/Text/Regex.php
@@ -45,6 +45,27 @@ class Regex
      */
     protected $tweet = '';
 
+    /**
+     * Expression to match whitespace characters.
+     *
+     * 0x0009-0x000D  Cc # <control-0009>..<control-000D>
+     * 0x0020         Zs # SPACE
+     * 0x0085         Cc # <control-0085>
+     * 0x00A0         Zs # NO-BREAK SPACE
+     * 0x1680         Zs # OGHAM SPACE MARK
+     * 0x180E         Zs # MONGOLIAN VOWEL SEPARATOR
+     * 0x2000-0x200A  Zs # EN QUAD..HAIR SPACE
+     * 0x2028         Zl # LINE SEPARATOR
+     * 0x2029         Zp # PARAGRAPH SEPARATOR
+     * 0x202F         Zs # NARROW NO-BREAK SPACE
+     * 0x205F         Zs # MEDIUM MATHEMATICAL SPACE
+     * 0x3000         Zs # IDEOGRAPHIC SPACE
+     *
+     * @var string
+     */
+    #
+    private static $spaces = '\x{0009}-\x{000D}\x{0020}\x{0085}\x{00a0}\x{1680}\x{180E}\x{2000}-\x{200a}\x{2028}\x{2029}\x{202f}\x{205f}\x{3000}';
+
     /**
      * Invalid Characters
      *
@@ -68,6 +89,10 @@ class Regex
      */
     private static $rtlChars = '\x{0600}-\x{06ff}\x{0750}-\x{077f}\x{08a0}-\x{08ff}\x{0590}-\x{05ff}\x{fb50}-\x{fdff}\x{fe70}-\x{feff}';
 
+    # cash tags
+    private static $cashSigns = '\$';
+    private static $cashtag = '[a-z]{1,6}(?:[._][a-z]{1,2})?';
+
     # These URL validation pattern strings are based on the ABNF from RFC 3986
     private static $validateUrlUnreserved = '[a-z\p{Cyrillic}0-9\-._~]';
     private static $validateUrlPctEncoded = '(?:%[0-9a-f]{2})';
@@ -101,22 +126,6 @@ public static function __static()
         # Initialise local storage arrays:
         $tmp = array();
 
-        # Expression to match whitespace characters.
-        #
-        #   0x0009-0x000D  Cc # <control-0009>..<control-000D>
-        #   0x0020         Zs # SPACE
-        #   0x0085         Cc # <control-0085>
-        #   0x00A0         Zs # NO-BREAK SPACE
-        #   0x1680         Zs # OGHAM SPACE MARK
-        #   0x180E         Zs # MONGOLIAN VOWEL SEPARATOR
-        #   0x2000-0x200A  Zs # EN QUAD..HAIR SPACE
-        #   0x2028         Zl # LINE SEPARATOR
-        #   0x2029         Zp # PARAGRAPH SEPARATOR
-        #   0x202F         Zs # NARROW NO-BREAK SPACE
-        #   0x205F         Zs # MEDIUM MATHEMATICAL SPACE
-        #   0x3000         Zs # IDEOGRAPHIC SPACE
-        $tmp['spaces'] = '\x{0009}-\x{000D}\x{0020}\x{0085}\x{00a0}\x{1680}\x{180E}\x{2000}-\x{200a}\x{2028}\x{2029}\x{202f}\x{205f}\x{3000}';
-
         # Expression to match at and hash sign characters:
         $tmp['at_signs'] = '@＠';
         $tmp['hash_signs'] = '#＃';
@@ -182,7 +191,7 @@ public static function __static()
         #      look-ahead capture here and don't append $after when we return.
         $tmp['valid_mention_preceding_chars'] = '([^a-zA-Z0-9_!#\$%&*@＠\/]|^|(?:^|[^a-z0-9_+~.-])RT:?)';
         $re['valid_mentions_or_lists'] = '/' . $tmp['valid_mention_preceding_chars'] . '([' . $tmp['at_signs'] . '])([a-z0-9_]{1,20})(\/[a-z][a-z0-9_\-]{0,24})?(?=(.*|$))/iu';
-        $re['valid_reply'] = '/^(?:[' . $tmp['spaces'] . '])*[' . $tmp['at_signs'] . ']([a-z0-9_]{1,20})(?=(.*|$))/iu';
+        $re['valid_reply'] = '/^(?:[' . static::$spaces . '])*[' . $tmp['at_signs'] . ']([a-z0-9_]{1,20})(?=(.*|$))/iu';
         $re['end_mention_match'] = '/\A(?:[' . $tmp['at_signs'] . ']|[' . $tmp['latin_accents'] . ']|:\/\/)/iu';
 
         # URL related hash regex collection
@@ -192,7 +201,7 @@ public static function __static()
         $tmp['domain_valid_chars'] = '0-9a-z' . $tmp['latin_accents'];
         $tmp['valid_subdomain'] = '(?>(?:[' . $tmp['domain_valid_chars'] . '][' . $tmp['domain_valid_chars'] . '\-_]*)?[' . $tmp['domain_valid_chars'] . ']\.)';
         $tmp['valid_domain_name'] = '(?:(?:[' . $tmp['domain_valid_chars'] . '][' . $tmp['domain_valid_chars'] . '\-]*)?[' . $tmp['domain_valid_chars'] . ']\.)';
-        $tmp['domain_valid_unicode_chars'] = '[^\p{P}\p{Z}\p{C}' . static::$invalidCharacters . $tmp['spaces'] . ']';
+        $tmp['domain_valid_unicode_chars'] = '[^\p{P}\p{Z}\p{C}' . static::$invalidCharacters . static::$spaces . ']';
 
         $tmp['valid_gTLD'] = TldLists::getValidGTLD();
         $tmp['valid_ccTLD'] = TldLists::getValidCcTLD();
@@ -275,11 +284,6 @@ public static function __static()
             . ')'
             . ')/iux';
 
-        $tmp['cash_signs'] = '\$';
-        $tmp['cashtag'] = '[a-z]{1,6}(?:[._][a-z]{1,2})?';
-        $re['valid_cashtag'] = '/(^|[' . $tmp['spaces'] . '])([' . $tmp['cash_signs'] . '])(' . $tmp['cashtag'] . ')(?=($|\s|[[:punct:]]))/iu';
-        $re['end_cashtag_match'] = '/\A(?:[' . $tmp['cash_signs'] . ']|:\/\/)/u';
-
         # Flag that initialization is complete:
         $initialized = true;
     }
@@ -318,6 +322,40 @@ public static function getRtlCharsMatcher()
         return $regexp;
     }
 
+    /**
+     * Get valid cachtag matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidCashtagMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/(^|[' . static::$spaces . '])([' . static::$cashSigns . '])(' . static::$cashtag . ')(?=($|\s|[[:punct:]]))/iu';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get end of cachtag matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getEndCashtagMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/\A(?:[' . static::$cashSigns . ']|:\/\/)/u';
+        }
+
+        return $regexp;
+    }
+
     /**
      * Get url matcher
      *
@@ -432,10 +470,10 @@ public static function getValidateUrlUnicodeAuthorityMatcher()
 
         if ($regexp === null) {
             $regexp = '/'
-                    . '(?:(' . static::getValidateUrlUserinfo() . ')@)?' #  $1 userinfo
-                    . '(' . static::getValidateUrlUnicodeHost() . ')'    #  $2 host
-                    . '(?::(' . static::$validateUrlPort . '))?'         #  $3 port
-                    . '/iux';
+                . '(?:(' . static::getValidateUrlUserinfo() . ')@)?' #  $1 userinfo
+                . '(' . static::getValidateUrlUnicodeHost() . ')'    #  $2 host
+                . '(?::(' . static::$validateUrlPort . '))?'         #  $3 port
+                . '/iux';
         }
 
         return $regexp;
@@ -455,10 +493,10 @@ public static function getValidateUrlAuthorityMatcher()
 
         if ($regexp === null) {
             $regexp = '/'
-                    . '(?:(' . static::getValidateUrlUserinfo() . ')@)?' #  $1 userinfo
-                    . '(' . static::getValidateUrlHost() . ')'           #  $2 host
-                    . '(?::(' . static::$validateUrlPort . '))?'         #  $3 port
-                    . '/ix';
+                . '(?:(' . static::getValidateUrlUserinfo() . ')@)?' #  $1 userinfo
+                . '(' . static::getValidateUrlHost() . ')'           #  $2 host
+                . '(?::(' . static::$validateUrlPort . '))?'         #  $3 port
+                . '/ix';
         }
 
         return $regexp;
diff --git a/tests/Twitter/Text/RegexTest.php b/tests/Twitter/Text/RegexTest.php
index 6c9c7fe..1ab2ff5 100644
--- a/tests/Twitter/Text/RegexTest.php
+++ b/tests/Twitter/Text/RegexTest.php
@@ -36,6 +36,32 @@ public function testGetRtlCharsMatcher()
         $this->assertSame($matcher, $matcherCached);
     }
 
+    /**
+     * @covers Twitter\Text\Regex::getValidCashtagMatcher
+     */
+    public function testGetValidCashtagMatcher()
+    {
+        $matcher = Regex::getValidCashtagMatcher();
+        $this->assertStringStartsWith('/(^|[', $matcher);
+        $this->assertStringEndsWith(']))/iu', $matcher);
+
+        $matcherCached = Regex::getValidCashtagMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getEndCashtagMatcher
+     */
+    public function testGetEndCashtagMatcher()
+    {
+        $matcher = Regex::getEndCashtagMatcher();
+        $this->assertStringStartsWith('/\A(?:', $matcher);
+        $this->assertStringEndsWith(')/u', $matcher);
+
+        $matcherCached = Regex::getEndCashtagMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
     /**
      * @covers Twitter\Text\Regex::getValidateUrlUnencodedMatcher
      */

From 683c6f77ea31b8953efd76ded95d5aacc69da9ba Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Tue, 19 Dec 2017 19:27:31 +0900
Subject: [PATCH 06/10] replace hashtag matchers to static method

---
 lib/Twitter/Text/Extractor.php     |   6 +-
 lib/Twitter/Text/LooseAutolink.php |   4 +-
 lib/Twitter/Text/Regex.php         | 120 ++++++++++++++++++++---------
 tests/Twitter/Text/RegexTest.php   |  26 +++++++
 4 files changed, 114 insertions(+), 42 deletions(-)

diff --git a/lib/Twitter/Text/Extractor.php b/lib/Twitter/Text/Extractor.php
index 8cbefda..8d7c464 100644
--- a/lib/Twitter/Text/Extractor.php
+++ b/lib/Twitter/Text/Extractor.php
@@ -243,7 +243,7 @@ public function extractHashtagsWithIndices($tweet = null, $checkUrlOverlap = tru
             return array();
         }
 
-        preg_match_all(self::$patterns['valid_hashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+        preg_match_all(Regex::getValidHashtagMatcher(), $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
         $tags = array();
 
         foreach ($matches as $match) {
@@ -251,7 +251,7 @@ public function extractHashtagsWithIndices($tweet = null, $checkUrlOverlap = tru
             $start_position = $hash[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $hash[1])) : $hash[1];
             $end_position = $start_position + StringUtils::strlen($hash[0] . $hashtag[0]);
 
-            if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) {
+            if (preg_match(Regex::getEndHashtagMatcher(), $outer[0])) {
                 continue;
             }
 
@@ -304,7 +304,7 @@ public function extractCashtagsWithIndices($tweet = null)
             $start_position = $dollar[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $dollar[1])) : $dollar[1];
             $end_position = $start_position + StringUtils::strlen($dollar[0] . $cash_text[0]);
 
-            if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) {
+            if (preg_match(Regex::getEndHashtagMatcher(), $outer[0])) {
                 continue;
             }
 
diff --git a/lib/Twitter/Text/LooseAutolink.php b/lib/Twitter/Text/LooseAutolink.php
index 418f436..aa564dc 100644
--- a/lib/Twitter/Text/LooseAutolink.php
+++ b/lib/Twitter/Text/LooseAutolink.php
@@ -134,7 +134,7 @@ public function addLinks()
     public function addLinksToHashtags()
     {
         return preg_replace_callback(
-            self::$patterns['valid_hashtag'],
+            Regex::getValidHashtagMatcher(),
             array($this, '_addLinksToHashtags'),
             $this->tweet
         );
@@ -261,7 +261,7 @@ protected function wrapHash($url, $class, $element)
     protected function _addLinksToHashtags($matches)
     {
         list($all, $before, $hash, $tag, $after) = array_pad($matches, 5, '');
-        if (preg_match(self::$patterns['end_hashtag_match'], $after)
+        if (preg_match(Regex::getEndHashtagMatcher(), $after)
             || (!preg_match('!\A["\']!', $before) && preg_match('!\A["\']!', $after)) || preg_match('!\A</!', $after)) {
             return $all;
         }
diff --git a/lib/Twitter/Text/Regex.php b/lib/Twitter/Text/Regex.php
index 33e7c91..04e847a 100644
--- a/lib/Twitter/Text/Regex.php
+++ b/lib/Twitter/Text/Regex.php
@@ -89,6 +89,10 @@ class Regex
      */
     private static $rtlChars = '\x{0600}-\x{06ff}\x{0750}-\x{077f}\x{08a0}-\x{08ff}\x{0590}-\x{05ff}\x{fb50}-\x{fdff}\x{fe70}-\x{feff}';
 
+    # Expression to match at and hash sign characters:
+    private static $atSigns = '@＠';
+    private static $hashSigns = '#＃';
+
     # cash tags
     private static $cashSigns = '\$';
     private static $cashtag = '[a-z]{1,6}(?:[._][a-z]{1,2})?';
@@ -126,10 +130,6 @@ public static function __static()
         # Initialise local storage arrays:
         $tmp = array();
 
-        # Expression to match at and hash sign characters:
-        $tmp['at_signs'] = '@＠';
-        $tmp['hash_signs'] = '#＃';
-
         # Expression to match latin accented characters.
         #
         #   0x00C0-0x00D6
@@ -156,43 +156,13 @@ public static function __static()
         $tmp['latin_accents'] .= '\x{0100}-\x{024f}\x{0253}-\x{0254}\x{0256}-\x{0257}';
         $tmp['latin_accents'] .= '\x{0259}\x{025b}\x{0263}\x{0268}\x{026f}\x{0272}\x{0289}\x{028b}\x{02bb}\x{0300}-\x{036f}\x{1e00}-\x{1eff}';
 
-        $tmp['hashtag_letters'] = '\p{L}\p{M}';
-        $tmp['hashtag_numerals'] = '\p{Nd}';
-        # Hashtag special chars
-        #
-        #   _      underscore
-        #   0x200c ZERO WIDTH NON-JOINER (ZWNJ)
-        #   0x200d ZERO WIDTH JOINER (ZWJ)
-        #   0xa67e CYRILLIC KAVYKA
-        #   0x05be HEBREW PUNCTUATION MAQAF
-        #   0x05f3 HEBREW PUNCTUATION GERESH
-        #   0x05f4 HEBREW PUNCTUATION GERSHAYIM
-        #   0xff5e FULLWIDTH TILDE
-        #   0x301c WAVE DASH
-        #   0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
-        #   0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
-        #   0x30a0 KATAKANA-HIRAGANA DOUBLE HYPHEN
-        #   0x30fb KATAKANA MIDDLE DOT
-        #   0x3003 DITTO MARK
-        #   0x0f0b TIBETAN MARK INTERSYLLABIC TSHEG
-        #   0x0f0c TIBETAN MARK DELIMITER TSHEG BSTAR
-        #   0x00b7 MIDDLE DOT
-        $tmp['hashtag_special_chars'] = '_\x{200c}\x{200d}\x{a67e}\x{05be}\x{05f3}\x{05f4}\x{ff5e}\x{301c}\x{309b}\x{309c}\x{30a0}\x{30fb}\x{3003}\x{0f0b}\x{0f0c}\x{00b7}';
-        $tmp['hashtag_letters_numerals_set'] = '[' . $tmp['hashtag_letters'] . $tmp['hashtag_numerals'] . $tmp['hashtag_special_chars'] . ']';
-        $tmp['hashtag_letters_set'] = '[' . $tmp['hashtag_letters'] . ']';
-        $tmp['hashtag_boundary'] = '(?:\A|\x{fe0e}|\x{fe0f}|[^&' . $tmp['hashtag_letters'] . $tmp['hashtag_numerals'] . $tmp['hashtag_special_chars'] . '])';
-        $tmp['hashtag'] = '(' . $tmp['hashtag_boundary'] . ')(#|\x{ff03})(?!\x{fe0f}|\x{20e3})(' . $tmp['hashtag_letters_numerals_set'] . '*' . $tmp['hashtag_letters_set'] . $tmp['hashtag_letters_numerals_set'] . '*)';
-
-        $re['valid_hashtag'] = '/' . $tmp['hashtag'] . '(?=(.*|$))/iu';
-        $re['end_hashtag_match'] = '/\A(?:[' . $tmp['hash_signs'] . ']|:\/\/)/u';
-
         # XXX: PHP doesn't have Ruby's $' (dollar apostrophe) so we have to capture
         #      $after in the following regular expression.  Note that we only use a
         #      look-ahead capture here and don't append $after when we return.
         $tmp['valid_mention_preceding_chars'] = '([^a-zA-Z0-9_!#\$%&*@＠\/]|^|(?:^|[^a-z0-9_+~.-])RT:?)';
-        $re['valid_mentions_or_lists'] = '/' . $tmp['valid_mention_preceding_chars'] . '([' . $tmp['at_signs'] . '])([a-z0-9_]{1,20})(\/[a-z][a-z0-9_\-]{0,24})?(?=(.*|$))/iu';
-        $re['valid_reply'] = '/^(?:[' . static::$spaces . '])*[' . $tmp['at_signs'] . ']([a-z0-9_]{1,20})(?=(.*|$))/iu';
-        $re['end_mention_match'] = '/\A(?:[' . $tmp['at_signs'] . ']|[' . $tmp['latin_accents'] . ']|:\/\/)/iu';
+        $re['valid_mentions_or_lists'] = '/' . $tmp['valid_mention_preceding_chars'] . '([' . static::$atSigns . '])([a-z0-9_]{1,20})(\/[a-z][a-z0-9_\-]{0,24})?(?=(.*|$))/iu';
+        $re['valid_reply'] = '/^(?:[' . static::$spaces . '])*[' . static::$atSigns . ']([a-z0-9_]{1,20})(?=(.*|$))/iu';
+        $re['end_mention_match'] = '/\A(?:[' . static::$atSigns . ']|[' . $tmp['latin_accents'] . ']|:\/\/)/iu';
 
         # URL related hash regex collection
 
@@ -322,6 +292,80 @@ public static function getRtlCharsMatcher()
         return $regexp;
     }
 
+    // =================================================================================================================
+
+    /**
+     * Get hashtag matcher
+     *
+     * @return string matcher
+     */
+    private static function getHashtagPattern()
+    {
+        $hashtag_letters = '\p{L}\p{M}';
+        $hashtag_numerals = '\p{Nd}';
+        # Hashtag special chars
+        #
+        #   _      underscore
+        #   0x200c ZERO WIDTH NON-JOINER (ZWNJ)
+        #   0x200d ZERO WIDTH JOINER (ZWJ)
+        #   0xa67e CYRILLIC KAVYKA
+        #   0x05be HEBREW PUNCTUATION MAQAF
+        #   0x05f3 HEBREW PUNCTUATION GERESH
+        #   0x05f4 HEBREW PUNCTUATION GERSHAYIM
+        #   0xff5e FULLWIDTH TILDE
+        #   0x301c WAVE DASH
+        #   0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
+        #   0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+        #   0x30a0 KATAKANA-HIRAGANA DOUBLE HYPHEN
+        #   0x30fb KATAKANA MIDDLE DOT
+        #   0x3003 DITTO MARK
+        #   0x0f0b TIBETAN MARK INTERSYLLABIC TSHEG
+        #   0x0f0c TIBETAN MARK DELIMITER TSHEG BSTAR
+        #   0x00b7 MIDDLE DOT
+        $hashtag_special_chars = '_\x{200c}\x{200d}\x{a67e}\x{05be}\x{05f3}\x{05f4}\x{ff5e}\x{301c}\x{309b}\x{309c}\x{30a0}\x{30fb}\x{3003}\x{0f0b}\x{0f0c}\x{00b7}';
+        $hashtag_letters_numerals_set = '[' . $hashtag_letters . $hashtag_numerals . $hashtag_special_chars . ']';
+        $hashtag_letters_set = '[' . $hashtag_letters . ']';
+        $hashtag_boundary = '(?:\A|\x{fe0e}|\x{fe0f}|[^&' . $hashtag_letters . $hashtag_numerals . $hashtag_special_chars . '])';
+
+        return '(' . $hashtag_boundary . ')(#|\x{ff03})(?!\x{fe0f}|\x{20e3})(' . $hashtag_letters_numerals_set . '*' . $hashtag_letters_set . $hashtag_letters_numerals_set . '*)';
+    }
+
+    /**
+     * Get valid hashtag matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidHashtagMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/' . static::getHashtagPattern() . '(?=(.*|$))/iu';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get end of hashtag matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getEndHashtagMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/\A(?:[' . static::$hashSigns . ']|:\/\/)/u';
+        }
+
+        return $regexp;
+    }
+
+    // =================================================================================================================
+
     /**
      * Get valid cachtag matcher
      *
@@ -356,6 +400,8 @@ public static function getEndCashtagMatcher()
         return $regexp;
     }
 
+    // =================================================================================================================
+
     /**
      * Get url matcher
      *
diff --git a/tests/Twitter/Text/RegexTest.php b/tests/Twitter/Text/RegexTest.php
index 1ab2ff5..19e6bb9 100644
--- a/tests/Twitter/Text/RegexTest.php
+++ b/tests/Twitter/Text/RegexTest.php
@@ -36,6 +36,32 @@ public function testGetRtlCharsMatcher()
         $this->assertSame($matcher, $matcherCached);
     }
 
+    /**
+     * @covers Twitter\Text\Regex::getValidHashtagMatcher
+     */
+    public function testGetValidHashtagMatcher()
+    {
+        $matcher = Regex::getValidHashtagMatcher();
+        $this->assertStringStartsWith('/((?:', $matcher);
+        $this->assertStringEndsWith('))/iu', $matcher);
+
+        $matcherCached = Regex::getValidHashtagMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getEndHashtagMatcher
+     */
+    public function testGetEndHashtagMatcher()
+    {
+        $matcher = Regex::getEndHashtagMatcher();
+        $this->assertStringStartsWith('/\A(?:', $matcher);
+        $this->assertStringEndsWith(')/u', $matcher);
+
+        $matcherCached = Regex::getEndHashtagMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
     /**
      * @covers Twitter\Text\Regex::getValidCashtagMatcher
      */

From 14bb731f78da153fd92f25e1c82456673e23920d Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Tue, 19 Dec 2017 19:55:50 +0900
Subject: [PATCH 07/10] replace mention matchers to static method

---
 lib/Twitter/Text/Extractor.php     |   8 +-
 lib/Twitter/Text/LooseAutolink.php |   4 +-
 lib/Twitter/Text/Regex.php         | 126 ++++++++++++++++++++---------
 lib/Twitter/Text/Validator.php     |   2 +-
 tests/Twitter/Text/RegexTest.php   |  39 +++++++++
 5 files changed, 135 insertions(+), 44 deletions(-)

diff --git a/lib/Twitter/Text/Extractor.php b/lib/Twitter/Text/Extractor.php
index 8d7c464..c5c615b 100644
--- a/lib/Twitter/Text/Extractor.php
+++ b/lib/Twitter/Text/Extractor.php
@@ -205,9 +205,9 @@ public function extractReplyScreenname($tweet = null)
         if (is_null($tweet)) {
             $tweet = $this->tweet;
         }
-        $matched = preg_match(self::$patterns['valid_reply'], $tweet, $matches);
+        $matched = preg_match(Regex::getValidReplyMatcher(), $tweet, $matches);
         # Check username ending in
-        if ($matched && preg_match(self::$patterns['end_mention_match'], $matches[2])) {
+        if ($matched && preg_match(Regex::getEndMentionMatcher(), $matches[2])) {
             $matched = false;
         }
         return $matched ? $matches[1] : null;
@@ -453,7 +453,7 @@ public function extractMentionsOrListsWithIndices($tweet = null)
             return array();
         }
 
-        preg_match_all(self::$patterns['valid_mentions_or_lists'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+        preg_match_all(Regex::getValidMentionsOrListsMatcher(), $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
         $results = array();
 
         foreach ($matches as $match) {
@@ -466,7 +466,7 @@ public function extractMentionsOrListsWithIndices($tweet = null)
                 'indices' => array($start_position, $end_position),
             );
 
-            if (preg_match(self::$patterns['end_mention_match'], $outer[0])) {
+            if (preg_match(Regex::getEndMentionMatcher(), $outer[0])) {
                 continue;
             }
 
diff --git a/lib/Twitter/Text/LooseAutolink.php b/lib/Twitter/Text/LooseAutolink.php
index aa564dc..d03da9c 100644
--- a/lib/Twitter/Text/LooseAutolink.php
+++ b/lib/Twitter/Text/LooseAutolink.php
@@ -172,7 +172,7 @@ public function addLinksToURLs()
     public function addLinksToUsernamesAndLists()
     {
         return preg_replace_callback(
-            self::$patterns['valid_mentions_or_lists'],
+            Regex::getValidMentionsOrListsMatcher(),
             array($this, '_addLinksToUsernamesAndLists'),
             $this->tweet
         );
@@ -331,7 +331,7 @@ protected function _addLinksToUsernamesAndLists($matches)
             $class = $this->class_list;
             $url = $this->url_base_list . $element;
         } else {
-            if (preg_match(self::$patterns['end_mention_match'], $after)) {
+            if (preg_match(Regex::getEndMentionMatcher(), $after)) {
                 return $all;
             }
             # Replace the username
diff --git a/lib/Twitter/Text/Regex.php b/lib/Twitter/Text/Regex.php
index 04e847a..325a484 100644
--- a/lib/Twitter/Text/Regex.php
+++ b/lib/Twitter/Text/Regex.php
@@ -66,6 +66,34 @@ class Regex
     #
     private static $spaces = '\x{0009}-\x{000D}\x{0020}\x{0085}\x{00a0}\x{1680}\x{180E}\x{2000}-\x{200a}\x{2028}\x{2029}\x{202f}\x{205f}\x{3000}';
 
+    /**
+     * Expression to match latin accented characters.
+     *
+     * 0x00C0-0x00D6
+     * 0x00D8-0x00F6
+     * 0x00F8-0x00FF
+     * 0x0100-0x024f
+     * 0x0253-0x0254
+     * 0x0256-0x0257
+     * 0x0259
+     * 0x025b
+     * 0x0263
+     * 0x0268
+     * 0x026f
+     * 0x0272
+     * 0x0289
+     * 0x028b
+     * 0x02bb
+     * 0x0300-0x036f
+     * 0x1e00-0x1eff
+     *
+     * Excludes 0x00D7 - multiplication sign (confusable with 'x').
+     * Excludes 0x00F7 - division sign.
+     *
+     * @var string
+     */
+    private static $latinAccents = '\x{00c0}-\x{00d6}\x{00d8}-\x{00f6}\x{00f8}-\x{00ff}\x{0100}-\x{024f}\x{0253}-\x{0254}\x{0256}-\x{0257}\x{0259}\x{025b}\x{0263}\x{0268}\x{026f}\x{0272}\x{0289}\x{028b}\x{02bb}\x{0300}-\x{036f}\x{1e00}-\x{1eff}';
+
     /**
      * Invalid Characters
      *
@@ -130,45 +158,11 @@ public static function __static()
         # Initialise local storage arrays:
         $tmp = array();
 
-        # Expression to match latin accented characters.
-        #
-        #   0x00C0-0x00D6
-        #   0x00D8-0x00F6
-        #   0x00F8-0x00FF
-        #   0x0100-0x024f
-        #   0x0253-0x0254
-        #   0x0256-0x0257
-        #   0x0259
-        #   0x025b
-        #   0x0263
-        #   0x0268
-        #   0x026f
-        #   0x0272
-        #   0x0289
-        #   0x028b
-        #   0x02bb
-        #   0x0300-0x036f
-        #   0x1e00-0x1eff
-        #
-        # Excludes 0x00D7 - multiplication sign (confusable with 'x').
-        # Excludes 0x00F7 - division sign.
-        $tmp['latin_accents'] = '\x{00c0}-\x{00d6}\x{00d8}-\x{00f6}\x{00f8}-\x{00ff}';
-        $tmp['latin_accents'] .= '\x{0100}-\x{024f}\x{0253}-\x{0254}\x{0256}-\x{0257}';
-        $tmp['latin_accents'] .= '\x{0259}\x{025b}\x{0263}\x{0268}\x{026f}\x{0272}\x{0289}\x{028b}\x{02bb}\x{0300}-\x{036f}\x{1e00}-\x{1eff}';
-
-        # XXX: PHP doesn't have Ruby's $' (dollar apostrophe) so we have to capture
-        #      $after in the following regular expression.  Note that we only use a
-        #      look-ahead capture here and don't append $after when we return.
-        $tmp['valid_mention_preceding_chars'] = '([^a-zA-Z0-9_!#\$%&*@＠\/]|^|(?:^|[^a-z0-9_+~.-])RT:?)';
-        $re['valid_mentions_or_lists'] = '/' . $tmp['valid_mention_preceding_chars'] . '([' . static::$atSigns . '])([a-z0-9_]{1,20})(\/[a-z][a-z0-9_\-]{0,24})?(?=(.*|$))/iu';
-        $re['valid_reply'] = '/^(?:[' . static::$spaces . '])*[' . static::$atSigns . ']([a-z0-9_]{1,20})(?=(.*|$))/iu';
-        $re['end_mention_match'] = '/\A(?:[' . static::$atSigns . ']|[' . $tmp['latin_accents'] . ']|:\/\/)/iu';
-
         # URL related hash regex collection
 
         $tmp['valid_url_preceding_chars'] = '(?:[^A-Z0-9_@＠\$#＃' . static::$invalidCharacters . ']|^)';
 
-        $tmp['domain_valid_chars'] = '0-9a-z' . $tmp['latin_accents'];
+        $tmp['domain_valid_chars'] = '0-9a-z' . static::$latinAccents;
         $tmp['valid_subdomain'] = '(?>(?:[' . $tmp['domain_valid_chars'] . '][' . $tmp['domain_valid_chars'] . '\-_]*)?[' . $tmp['domain_valid_chars'] . ']\.)';
         $tmp['valid_domain_name'] = '(?:(?:[' . $tmp['domain_valid_chars'] . '][' . $tmp['domain_valid_chars'] . '\-]*)?[' . $tmp['domain_valid_chars'] . ']\.)';
         $tmp['domain_valid_unicode_chars'] = '[^\p{P}\p{Z}\p{C}' . static::$invalidCharacters . static::$spaces . ']';
@@ -214,7 +208,7 @@ public static function __static()
 
         $tmp['valid_port_number'] = '[0-9]+';
 
-        $tmp['valid_general_url_path_chars'] = '[a-z\p{Cyrillic}0-9!\*;:=\+\,\.\$\/%#\[\]\-_~&|@' . $tmp['latin_accents'] . ']';
+        $tmp['valid_general_url_path_chars'] = '[a-z\p{Cyrillic}0-9!\*;:=\+\,\.\$\/%#\[\]\-_~&|@' . static::$latinAccents . ']';
         # Allow URL paths to contain up to two nested levels of balanced parentheses:
         # 1. Used in Wikipedia URLs, e.g. /Primer_(film)
         # 2. Used in IIS sessions, e.g. /S(dfd346)/
@@ -232,7 +226,7 @@ public static function __static()
             . '\))';
         # Valid end-of-path characters (so /foo. does not gobble the period).
         # 1. Allow =&# for empty URL parameters and other URL-join artifacts.
-        $tmp['valid_url_path_ending_chars'] = '[a-z\p{Cyrillic}0-9=_#\/\+\-' . $tmp['latin_accents'] . ']|(?:' . $tmp['valid_url_balanced_parens'] . ')';
+        $tmp['valid_url_path_ending_chars'] = '[a-z\p{Cyrillic}0-9=_#\/\+\-' . static::$latinAccents . ']|(?:' . $tmp['valid_url_balanced_parens'] . ')';
         $tmp['valid_url_path'] = '(?:(?:'
             . $tmp['valid_general_url_path_chars'] . '*(?:'
             . $tmp['valid_url_balanced_parens'] . ' '
@@ -294,6 +288,64 @@ public static function getRtlCharsMatcher()
 
     // =================================================================================================================
 
+    # NOTE: PHP doesn't have Ruby's $' (dollar apostrophe) so we have to capture
+    #      $after in the following regular expression.  Note that we only use a
+    #      look-ahead capture here and don't append $after when we return.
+
+    /**
+     * Get valid mentions or lists matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidMentionsOrListsMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $mention_preceding_chars = '([^a-zA-Z0-9_!#\$%&*@＠\/]|^|(?:^|[^a-z0-9_+~.-])RT:?)';
+            $regexp = '/' . $mention_preceding_chars . '([' . static::$atSigns . '])([a-z0-9_]{1,20})(\/[a-z][a-z0-9_\-]{0,24})?(?=(.*|$))/iu';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get valid hashtag matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidReplyMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/^(?:[' . static::$spaces . '])*[' . static::$atSigns . ']([a-z0-9_]{1,20})(?=(.*|$))/iu';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get end of hashtag matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getEndMentionMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/\A(?:[' . static::$atSigns . ']|[' . static::$latinAccents . ']|:\/\/)/iu';
+        }
+
+        return $regexp;
+    }
+
+    // =================================================================================================================
+
     /**
      * Get hashtag matcher
      *
diff --git a/lib/Twitter/Text/Validator.php b/lib/Twitter/Text/Validator.php
index 80f35cd..dbe79ee 100644
--- a/lib/Twitter/Text/Validator.php
+++ b/lib/Twitter/Text/Validator.php
@@ -240,7 +240,7 @@ public function isValidList($list = null)
         if (empty($list) || !$length) {
             return false;
         }
-        preg_match(self::$patterns['valid_mentions_or_lists'], $list, $matches);
+        preg_match(Regex::getValidMentionsOrListsMatcher(), $list, $matches);
         $matches = array_pad($matches, 5, '');
         return isset($matches) && $matches[1] === '' && $matches[4] && !empty($matches[4]) && $matches[5] === '';
     }
diff --git a/tests/Twitter/Text/RegexTest.php b/tests/Twitter/Text/RegexTest.php
index 19e6bb9..31999f2 100644
--- a/tests/Twitter/Text/RegexTest.php
+++ b/tests/Twitter/Text/RegexTest.php
@@ -36,6 +36,45 @@ public function testGetRtlCharsMatcher()
         $this->assertSame($matcher, $matcherCached);
     }
 
+    /**
+     * @covers Twitter\Text\Regex::getValidMentionsOrListsMatcher
+     */
+    public function testGetValidMentionsOrListsMatcher()
+    {
+        $matcher = Regex::getValidMentionsOrListsMatcher();
+        $this->assertStringStartsWith('/([', $matcher);
+        $this->assertStringEndsWith('(?=(.*|$))/iu', $matcher);
+
+        $matcherCached = Regex::getValidMentionsOrListsMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getValidReplyMatcher
+     */
+    public function testGetValidReplyMatcher()
+    {
+        $matcher = Regex::getValidReplyMatcher();
+        $this->assertStringStartsWith('/^(?:[', $matcher);
+        $this->assertStringEndsWith('(?=(.*|$))/iu', $matcher);
+
+        $matcherCached = Regex::getValidReplyMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
+    /**
+     * @covers Twitter\Text\Regex::getEndMentionMatcher
+     */
+    public function testGetEndMentionMatcher()
+    {
+        $matcher = Regex::getEndMentionMatcher();
+        $this->assertStringStartsWith('/\A(?:', $matcher);
+        $this->assertStringEndsWith(')/iu', $matcher);
+
+        $matcherCached = Regex::getEndMentionMatcher();
+        $this->assertSame($matcher, $matcherCached);
+    }
+
     /**
      * @covers Twitter\Text\Regex::getValidHashtagMatcher
      */

From bb88bea806c3f34be47986940e33d5939a5a13a7 Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Thu, 28 Dec 2017 18:52:24 +0900
Subject: [PATCH 08/10] replace url matchers to static method

---
 lib/Twitter/Text/Extractor.php     |  12 +-
 lib/Twitter/Text/LooseAutolink.php |   2 +-
 lib/Twitter/Text/Regex.php         | 336 ++++++++++++++++++++---------
 3 files changed, 241 insertions(+), 109 deletions(-)

diff --git a/lib/Twitter/Text/Extractor.php b/lib/Twitter/Text/Extractor.php
index c5c615b..963d7d2 100644
--- a/lib/Twitter/Text/Extractor.php
+++ b/lib/Twitter/Text/Extractor.php
@@ -335,7 +335,7 @@ public function extractURLsWithIndices($tweet = null)
         }
 
         $urls = array();
-        preg_match_all(self::$patterns['valid_url'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+        preg_match_all(Regex::getValidUrlMatcher(), $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
 
         foreach ($matches as $match) {
             list($all, $before, $url, $protocol, $domain, $port, $path, $query) = array_pad($match, 8, array(''));
@@ -354,14 +354,14 @@ public function extractURLsWithIndices($tweet = null)
             // If protocol is missing and domain contains non-ASCII characters,
             // extract ASCII-only domains.
             if (empty($protocol)) {
-                if (!$this->extractURLWithoutProtocol || preg_match(self::$patterns['invalid_url_without_protocol_preceding_chars'], $before)) {
+                if (!$this->extractURLWithoutProtocol || preg_match(Regex::getInvalidUrlWithoutProtocolPrecedingCharsMatcher(), $before)) {
                     continue;
                 }
 
                 $last_url = null;
                 $ascii_end_position = 0;
 
-                if (preg_match(self::$patterns['valid_ascii_domain'], $domain, $asciiDomain)) {
+                if (preg_match(Regex::getValidAsciiDomainMatcher(), $domain, $asciiDomain)) {
                     $asciiDomain[0] = preg_replace('/' . preg_quote($domain, '/') . '/u', $asciiDomain[0], $url);
                     $ascii_start_position = StringUtils::strpos($domain, $asciiDomain[0], $ascii_end_position);
                     $ascii_end_position = $ascii_start_position + StringUtils::strlen($asciiDomain[0]);
@@ -370,8 +370,8 @@ public function extractURLsWithIndices($tweet = null)
                         'indices' => array($start_position + $ascii_start_position, $start_position + $ascii_end_position),
                     );
                     if (!empty($path)
-                        || preg_match(self::$patterns['valid_special_short_domain'], $asciiDomain[0])
-                        || !preg_match(self::$patterns['invalid_short_domain'], $asciiDomain[0])) {
+                        || preg_match(Regex::getValidSpecialShortDomainMatcher(), $asciiDomain[0])
+                        || !preg_match(Regex::getInvalidCharactersMatcher(), $asciiDomain[0])) {
                         $urls[] = $last_url;
                     }
                 }
@@ -389,7 +389,7 @@ public function extractURLsWithIndices($tweet = null)
                 }
             } else {
                 // In the case of t.co URLs, don't allow additional path characters
-                if (preg_match(self::$patterns['valid_tco_url'], $url, $tcoUrlMatches)) {
+                if (preg_match(Regex::getValidTcoUrlMatcher(), $url, $tcoUrlMatches)) {
                     $url = $tcoUrlMatches[0];
                     $end_position = $start_position + StringUtils::strlen($url);
                 }
diff --git a/lib/Twitter/Text/LooseAutolink.php b/lib/Twitter/Text/LooseAutolink.php
index d03da9c..ca9000c 100644
--- a/lib/Twitter/Text/LooseAutolink.php
+++ b/lib/Twitter/Text/LooseAutolink.php
@@ -161,7 +161,7 @@ public function addLinksToCashtags()
      */
     public function addLinksToURLs()
     {
-        return preg_replace_callback(self::$patterns['valid_url'], array($this, '_addLinksToURLs'), $this->tweet);
+        return preg_replace_callback(Regex::getValidUrlMatcher(), array($this, '_addLinksToURLs'), $this->tweet);
     }
 
     /**
diff --git a/lib/Twitter/Text/Regex.php b/lib/Twitter/Text/Regex.php
index 325a484..74121f6 100644
--- a/lib/Twitter/Text/Regex.php
+++ b/lib/Twitter/Text/Regex.php
@@ -133,6 +133,10 @@ class Regex
     private static $validateUrlIpv6 = '(?:\[[a-f0-9:\.]+\])';
     private static $validateUrlPort = '[0-9]{1,5}';
 
+    # URL related hash regex collection
+    private static $validSpecialCcTLD = '(?:(?:co|tv)(?=[^0-9a-z@]|$))';
+    private static $validPunycode = '(?:xn--[0-9a-z]+)';
+
     /**
      * This constructor is used to populate some variables.
      *
@@ -144,148 +148,279 @@ protected function __construct($tweet = null)
     }
 
     /**
-     * Emulate a static initialiser while PHP doesn't have one.
+     * Get invalid characters matcher
+     *
+     * @staticvar string $regexp
+     * @return string
      */
-    public static function __static()
+    public static function getInvalidCharactersMatcher()
     {
-        # Check whether we have initialized the regular expressions:
-        static $initialized = false;
-        if ($initialized) {
-            return;
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/[' . static::$invalidCharacters . ']/u';
         }
-        # Get a shorter reference to the regular expression array:
-        $re = & self::$patterns;
-        # Initialise local storage arrays:
-        $tmp = array();
 
-        # URL related hash regex collection
+        return $regexp;
+    }
 
-        $tmp['valid_url_preceding_chars'] = '(?:[^A-Z0-9_@＠\$#＃' . static::$invalidCharacters . ']|^)';
+    /**
+     * Get RTL characters matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getRtlCharsMatcher()
+    {
+        static $regexp = null;
 
-        $tmp['domain_valid_chars'] = '0-9a-z' . static::$latinAccents;
-        $tmp['valid_subdomain'] = '(?>(?:[' . $tmp['domain_valid_chars'] . '][' . $tmp['domain_valid_chars'] . '\-_]*)?[' . $tmp['domain_valid_chars'] . ']\.)';
-        $tmp['valid_domain_name'] = '(?:(?:[' . $tmp['domain_valid_chars'] . '][' . $tmp['domain_valid_chars'] . '\-]*)?[' . $tmp['domain_valid_chars'] . ']\.)';
-        $tmp['domain_valid_unicode_chars'] = '[^\p{P}\p{Z}\p{C}' . static::$invalidCharacters . static::$spaces . ']';
+        if ($regexp === null) {
+            $regexp = '/[' . static::$rtlChars . ']/iu';
+        }
 
-        $tmp['valid_gTLD'] = TldLists::getValidGTLD();
-        $tmp['valid_ccTLD'] = TldLists::getValidCcTLD();
-        $tmp['valid_special_ccTLD'] = '(?:(?:' . 'co|tv' . ')(?=[^0-9a-z@]|$))';
-        $tmp['valid_punycode'] = '(?:xn--[0-9a-z]+)';
+        return $regexp;
+    }
 
-        $tmp['valid_domain'] = ''
-            // subdomains + domain + TLD
-            // e.g. www.twitter.com, foo.co.jp, bar.co.uk
-            . '(?:' . $tmp['valid_subdomain'] . '+' . $tmp['valid_domain_name']
-            . '(?:' . $tmp['valid_gTLD'] . '|' . $tmp['valid_ccTLD'] . '|' . $tmp['valid_punycode'] . '))'
-            // domain + gTLD | protocol + unicode domain + gTLD
-            . '|(?:'
-            . '(?:'
-            . $tmp['valid_domain_name'] . '|(?:(?<=http:\/\/|https:\/\/)' . $tmp['domain_valid_unicode_chars'] . '+\.)'
-            . ')'
-            . $tmp['valid_gTLD']
-            . ')'
-            // domain + gTLD | some ccTLD
-            // e.g. twitter.com
-            . '|(?:' . $tmp['valid_domain_name'] . $tmp['valid_punycode'] . ')'
-            . '|(?:' . $tmp['valid_domain_name'] . $tmp['valid_special_ccTLD'] . ')'
-            // protocol + domain + ccTLD | protocol + unicode domain + ccTLD
-            . '|(?:(?<=http:\/\/|https:\/\/)'
-            . '(?:' . $tmp['valid_domain_name'] . '|' . $tmp['domain_valid_unicode_chars'] . '+\.)'
-            . $tmp['valid_ccTLD'] . ')'
-            // domain + ccTLD + '/'
-            // e.g. t.co/
-            . '|(?:' . $tmp['valid_domain_name'] . $tmp['valid_ccTLD'] . '(?=\/))';
-        # Used by the extractor:
-        $re['valid_ascii_domain'] = '/' . $tmp['valid_subdomain'] . '*' . $tmp['valid_domain_name'] . '(?:' . $tmp['valid_gTLD'] . '|' . $tmp['valid_ccTLD'] . '|' . $tmp['valid_punycode'] . ')/iu';
+    // =================================================================================================================
 
-        # Used by the extractor for stricter t.co URL extraction:
-        $re['valid_tco_url'] = '/^https?:\/\/t\.co\/[a-z0-9]+/iu';
+    /**
+     * Get valid ascii domain matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidAsciiDomainMatcher()
+    {
+        static $regexp = null;
 
-        # Used by the extractor to filter out unwanted URLs:
-        $re['invalid_short_domain'] = '/\A' . $tmp['valid_domain_name'] . $tmp['valid_ccTLD'] . '\Z/iu';
-        $re['valid_special_short_domain'] = '/\A' . $tmp['valid_domain_name'] . $tmp['valid_special_ccTLD'] . '\Z/iu';
-        $re['invalid_url_without_protocol_preceding_chars'] = '/[\-_.\/]\z/iu';
+        if ($regexp === null) {
+            $regexp = '/' . static::getValidSubdomain() . '*' . static::getValidDomainName()
+                . '(?:' . TldLists::getValidGTLD() . '|' . TldLists::getValidCcTLD()
+                . '|' . static::$validPunycode . ')/iu';
+        }
 
-        $tmp['valid_port_number'] = '[0-9]+';
+        return $regexp;
+    }
 
-        $tmp['valid_general_url_path_chars'] = '[a-z\p{Cyrillic}0-9!\*;:=\+\,\.\$\/%#\[\]\-_~&|@' . static::$latinAccents . ']';
-        # Allow URL paths to contain up to two nested levels of balanced parentheses:
-        # 1. Used in Wikipedia URLs, e.g. /Primer_(film)
-        # 2. Used in IIS sessions, e.g. /S(dfd346)/
-        # 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/
-        $tmp['valid_url_balanced_parens'] = '(?:\('
-            . '(?:' . $tmp['valid_general_url_path_chars'] . '+'
-            . '|'
-            // allow one nested level of balanced parentheses
-            . '(?:'
-            . $tmp['valid_general_url_path_chars'] . '*'
-            . '\(' . $tmp['valid_general_url_path_chars'] . '+' . '\)'
-            . $tmp['valid_general_url_path_chars'] . '*'
-            . ')'
-            . ')'
-            . '\))';
-        # Valid end-of-path characters (so /foo. does not gobble the period).
-        # 1. Allow =&# for empty URL parameters and other URL-join artifacts.
-        $tmp['valid_url_path_ending_chars'] = '[a-z\p{Cyrillic}0-9=_#\/\+\-' . static::$latinAccents . ']|(?:' . $tmp['valid_url_balanced_parens'] . ')';
-        $tmp['valid_url_path'] = '(?:(?:'
-            . $tmp['valid_general_url_path_chars'] . '*(?:'
-            . $tmp['valid_url_balanced_parens'] . ' '
-            . $tmp['valid_general_url_path_chars'] . '*)*'
-            . $tmp['valid_url_path_ending_chars'] . ')|(?:@'
-            . $tmp['valid_general_url_path_chars'] . '+\/))';
-
-        $tmp['valid_url_query_chars'] = '[a-z0-9!?\*\'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]';
-        $tmp['valid_url_query_ending_chars'] = '[a-z0-9_&=#\/\-]';
-
-        $re['valid_url'] = '/(?:'                           # $1 Complete match (preg_match() already matches everything.)
-            . '(' . $tmp['valid_url_preceding_chars'] . ')' # $2 Preceding characters
-            . '('                                           # $3 Complete URL
-            . '(https?:\/\/)?'                              # $4 Protocol (optional)
-            . '(' . $tmp['valid_domain'] . ')'              # $5 Domain(s)
-            . '(?::(' . $tmp['valid_port_number'] . '))?'   # $6 Port number (optional)
-            . '(\/' . $tmp['valid_url_path'] . '*)?'        # $7 URL Path
-            . '(\?' . $tmp['valid_url_query_chars'] . '*' . $tmp['valid_url_query_ending_chars'] . ')?' # $8 Query String
-            . ')'
-            . ')/iux';
+    /**
+     * Get valid tco url matcher
+     *
+     * Used by the extractor for stricter t.co URL extraction
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidTcoUrlMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/^https?:\/\/t\.co\/[a-z0-9]+/iu';
+        }
 
-        # Flag that initialization is complete:
-        $initialized = true;
+        return $regexp;
     }
 
     /**
-     * Get invalid characters matcher
+     * Get invalid short domain matcher
      *
      * @staticvar string $regexp
      * @return string
      */
-    public static function getInvalidCharactersMatcher()
+    public static function getInvalidShortDomainMatcher()
     {
         static $regexp = null;
 
         if ($regexp === null) {
-            $regexp = '/[' . static::$invalidCharacters . ']/u';
+            $regexp = '/\A' . static::getValidDomainName() . TldLists::getValidCcTLD() . '\Z/iu';
         }
 
         return $regexp;
     }
 
     /**
-     * Get RTL characters matcher
+     * Get valid special short domain matcher
      *
      * @staticvar string $regexp
      * @return string
      */
-    public static function getRtlCharsMatcher()
+    public static function getValidSpecialShortDomainMatcher()
     {
         static $regexp = null;
 
         if ($regexp === null) {
-            $regexp = '/[' . static::$rtlChars . ']/iu';
+            $regexp = '/\A' . static::getValidDomainName() . static::$validSpecialCcTLD . '\Z/iu';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get invalid url without protocol preceding chars matcher
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getInvalidUrlWithoutProtocolPrecedingCharsMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $regexp = '/[\-_.\/]\z/iu';
         }
 
         return $regexp;
     }
 
+    /**
+     * Get valid url
+     *
+     * @staticvar string $regexp
+     * @return string
+     */
+    public static function getValidUrlMatcher()
+    {
+        static $regexp = null;
+
+        if ($regexp === null) {
+            $validUrlPrecedingChars = '(?:[^A-Z0-9_@＠\$#＃' . static::$invalidCharacters . ']|^)';
+            $validUrlQueryChars = '[a-z0-9!?\*\'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]';
+            $validUrlQueryEndingChars = '[a-z0-9_&=#\/\-]';
+            $validPortNumber = '[0-9]+';
+
+            $regexp = '/(?:'                                      # $1 Complete match (preg_match() already matches everything.)
+                . '(' . $validUrlPrecedingChars . ')' # $2 Preceding characters
+                . '('                                             # $3 Complete URL
+                . '(https?:\/\/)?'                                # $4 Protocol (optional)
+                . '(' . static::getValidDomain() . ')'            # $5 Domain(s)
+                . '(?::(' . $validPortNumber . '))?'      # $6 Port number (optional)
+                . '(\/' . static::getValidUrlPath() . '*)?'       # $7 URL Path
+                . '(\?' . $validUrlQueryChars . '*' . $validUrlQueryEndingChars . ')?' # $8 Query String
+                . ')'
+                . ')/iux';
+        }
+
+        return $regexp;
+    }
+
+    /**
+     * Get domain valid chars
+     *
+     * @return string
+     */
+    private static function getDomainValidChars()
+    {
+        return '0-9a-z' . static::$latinAccents;
+    }
+
+    /**
+     * Get valid subdomain
+     *
+     * @return string
+     */
+    private static function getValidSubdomain()
+    {
+        $domainValidChars = static::getDomainValidChars();
+
+        return '(?>(?:[' . $domainValidChars . '][' . $domainValidChars . '\-_]*)?[' . $domainValidChars . ']\.)';
+    }
+
+    /**
+     * Get valid domain name
+     *
+     * @return string
+     */
+    private static function getValidDomainName()
+    {
+        $domainValidChars = static::getDomainValidChars();
+
+        return '(?:(?:[' . $domainValidChars . '][' . $domainValidChars . '\-]*)?[' . $domainValidChars . ']\.)';
+    }
+
+    /**
+     * Get domain valid unicode chars
+     *
+     * @return string
+     */
+    private static function getDomainValidUnicodeChars()
+    {
+        return '[^\p{P}\p{Z}\p{C}' . static::$invalidCharacters . static::$spaces . ']';
+    }
+
+    /**
+     * Get valid domain
+     *
+     * @return string
+     */
+    private static function getValidDomain()
+    {
+        $validSubdomain = static::getValidSubdomain();
+        $validDomainName = static::getValidDomainName();
+        $domainValidUnicodeChars = static::getDomainValidUnicodeChars();
+        $validGTLD = TldLists::getValidGTLD();
+        $validCcTLD = TldLists::getValidCcTLD();
+
+        return ''
+            // subdomains + domain + TLD
+            // e.g. www.twitter.com, foo.co.jp, bar.co.uk
+            . '(?:' . $validSubdomain . '+' . $validDomainName
+            . '(?:' . $validGTLD . '|' . $validCcTLD . '|' . static::$validPunycode . '))'
+            // domain + gTLD | protocol + unicode domain + gTLD
+            . '|(?:'
+            . '(?:'
+            . $validDomainName . '|(?:(?<=http:\/\/|https:\/\/)' . $domainValidUnicodeChars . '+\.)'
+            . ')'
+            . $validGTLD
+            . ')'
+            // domain + gTLD | some ccTLD
+            // e.g. twitter.com
+            . '|(?:' . $validDomainName . static::$validPunycode . ')'
+            . '|(?:' . $validDomainName . static::$validSpecialCcTLD . ')'
+            // protocol + domain + ccTLD | protocol + unicode domain + ccTLD
+            . '|(?:(?<=http:\/\/|https:\/\/)'
+            . '(?:' . $validDomainName . '|' . $domainValidUnicodeChars . '+\.)'
+            . $validCcTLD . ')'
+            // domain + ccTLD + '/'
+            // e.g. t.co/
+            . '|(?:' . $validDomainName . $validCcTLD . '(?=\/))';
+    }
+
+    /**
+     * Get valid url path
+     *
+     * @return string
+     */
+    private static function getValidUrlPath()
+    {
+        $validGeneralUrlPathChars = '[a-z\p{Cyrillic}0-9!\*;:=\+\,\.\$\/%#\[\]\-_~&|@' . static::$latinAccents . ']';
+
+        # Allow URL paths to contain up to two nested levels of balanced parentheses:
+        # 1. Used in Wikipedia URLs, e.g. /Primer_(film)
+        # 2. Used in IIS sessions, e.g. /S(dfd346)/
+        # 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/
+        $validUrlBalancedParens = '(?:\('
+            . '(?:' . $validGeneralUrlPathChars . '+'
+            . '|'
+            // allow one nested level of balanced parentheses
+            . '(?:'
+            . $validGeneralUrlPathChars . '*'
+            . '\(' . $validGeneralUrlPathChars . '+' . '\)'
+            . $validGeneralUrlPathChars . '*'
+            . ')'
+            . ')'
+            . '\))';
+        # Valid end-of-path characters (so /foo. does not gobble the period).
+        # 1. Allow =&# for empty URL parameters and other URL-join artifacts.
+        $validUrlPathEndingChars = '[a-z\p{Cyrillic}0-9=_#\/\+\-' . static::$latinAccents . ']|(?:' . $validUrlBalancedParens . ')';
+
+        return '(?:(?:'
+            . $validGeneralUrlPathChars . '*(?:'
+            . $validUrlBalancedParens . ' '
+            . $validGeneralUrlPathChars . '*)*'
+            . $validUrlPathEndingChars . ')|(?:@'
+            . $validGeneralUrlPathChars . '+\/))';
+    }
+
     // =================================================================================================================
 
     # NOTE: PHP doesn't have Ruby's $' (dollar apostrophe) so we have to capture
@@ -681,6 +816,3 @@ public static function getValidateUrlFragmentMatcher()
         return $regexp;
     }
 }
-
-# Cause regular expressions to be initialized as soon as this file is loaded:
-Regex::__static();

From 320d905c6fabaa9ed2bc0a5592d21b9280cae303 Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Thu, 28 Dec 2017 19:03:18 +0900
Subject: [PATCH 09/10] drop Regex extends

---
 lib/Twitter/Text/Autolink.php       | 16 ++++++++++++----
 lib/Twitter/Text/Extractor.php      | 11 +++++++++--
 lib/Twitter/Text/HitHighlighter.php | 15 +++++++++++----
 lib/Twitter/Text/Validator.php      | 11 +++++++++--
 4 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/lib/Twitter/Text/Autolink.php b/lib/Twitter/Text/Autolink.php
index b25fe49..6584718 100644
--- a/lib/Twitter/Text/Autolink.php
+++ b/lib/Twitter/Text/Autolink.php
@@ -30,7 +30,7 @@
  * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
  * @package    Twitter.Text
  */
-class Autolink extends Regex
+class Autolink
 {
 
     /**
@@ -139,6 +139,13 @@ class Autolink extends Regex
      */
     protected $extractor = null;
 
+    /**
+     * The tweet to be used in parsing.
+     *
+     * @var  string
+     */
+    protected $tweet = '';
+
     /**
      * Provides fluent method chaining.
      *
@@ -170,13 +177,14 @@ public function __construct($tweet = null, $escape = true, $full_encode = false)
     {
         if ($escape && !empty($tweet)) {
             if ($full_encode) {
-                parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
+                $this->tweet = htmlentities($tweet, ENT_QUOTES, 'UTF-8', false);
             } else {
-                parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
+                $this->tweet = htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false);
             }
         } else {
-            parent::__construct($tweet);
+            $this->tweet = $tweet;
         }
+
         $this->extractor = Extractor::create();
     }
 
diff --git a/lib/Twitter/Text/Extractor.php b/lib/Twitter/Text/Extractor.php
index 963d7d2..1c6da07 100644
--- a/lib/Twitter/Text/Extractor.php
+++ b/lib/Twitter/Text/Extractor.php
@@ -29,7 +29,7 @@
  * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
  * @package    Twitter.Text
  */
-class Extractor extends Regex
+class Extractor
 {
 
     /**
@@ -37,6 +37,13 @@ class Extractor extends Regex
      */
     protected $extractURLWithoutProtocol = true;
 
+    /**
+     * The tweet to be used in parsing.
+     *
+     * @var  string
+     */
+    protected $tweet = '';
+
     /**
      * Provides fluent method chaining.
      *
@@ -60,7 +67,7 @@ public static function create($tweet = null)
      */
     public function __construct($tweet = null)
     {
-        parent::__construct($tweet);
+        $this->tweet = $tweet;
     }
 
     /**
diff --git a/lib/Twitter/Text/HitHighlighter.php b/lib/Twitter/Text/HitHighlighter.php
index 0ed0387..2697592 100644
--- a/lib/Twitter/Text/HitHighlighter.php
+++ b/lib/Twitter/Text/HitHighlighter.php
@@ -27,7 +27,7 @@
  * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
  * @package    Twitter.Text
  */
-class HitHighlighter extends Regex
+class HitHighlighter
 {
 
     /**
@@ -37,6 +37,13 @@ class HitHighlighter extends Regex
      */
     protected $tag = 'em';
 
+    /**
+     * The tweet to be used in parsing.
+     *
+     * @var  string
+     */
+    protected $tweet = '';
+
     /**
      * Provides fluent method chaining.
      *
@@ -67,12 +74,12 @@ public function __construct($tweet = null, $escape = true, $full_encode = false)
     {
         if (!empty($tweet) && $escape) {
             if ($full_encode) {
-                parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
+                $this->tweet = htmlentities($tweet, ENT_QUOTES, 'UTF-8', false);
             } else {
-                parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
+                $this->tweet = htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false);
             }
         } else {
-            parent::__construct($tweet);
+            $this->tweet = $tweet;
         }
     }
 
diff --git a/lib/Twitter/Text/Validator.php b/lib/Twitter/Text/Validator.php
index dbe79ee..c53ae9c 100644
--- a/lib/Twitter/Text/Validator.php
+++ b/lib/Twitter/Text/Validator.php
@@ -27,7 +27,7 @@
  * @license    http://www.apache.org/licenses/LICENSE-2.0  Apache License v2.0
  * @package    Twitter.Text
  */
-class Validator extends Regex
+class Validator
 {
 
     /**
@@ -57,6 +57,13 @@ class Validator extends Regex
      */
     protected $extractor = null;
 
+    /**
+     * The tweet to be used in parsing.
+     *
+     * @var  string
+     */
+    protected $tweet = '';
+
     /**
      * Provides fluent method chaining.
      *
@@ -79,10 +86,10 @@ public static function create($tweet = null, $config = null)
      */
     public function __construct($tweet = null, $config = null)
     {
-        parent::__construct($tweet);
         if (!empty($config)) {
             $this->setConfiguration($config);
         }
+        $this->tweet = $tweet;
         $this->extractor = Extractor::create();
     }
 

From c763596ee45123cba643c4d31221ce0c87ce921c Mon Sep 17 00:00:00 2001
From: nojimage <nojimage@gmail.com>
Date: Thu, 28 Dec 2017 19:08:27 +0900
Subject: [PATCH 10/10] add test PHP7.2 on travis

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 7acd194..2330481 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,6 +6,7 @@ php:
   - 5.6
   - 7.0
   - 7.1
+  - 7.2
 
 dist: trusty
 sudo: false