Skip to content

Commit 744f9fb

Browse files
committed
update user agent parser regexes
1 parent 56fae52 commit 744f9fb

File tree

2 files changed

+125
-31
lines changed

2 files changed

+125
-31
lines changed

pkg/embedded/uap/regexes.patch

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
diff --git a/pkg/embedded/uap/regexes.yml b/pkg/embedded/uap/regexes.yml
2-
index 302e4b5..34ab79c 100644
2+
index 26a64f1..826c764 100644
33
--- a/pkg/embedded/uap/regexes.yml
44
+++ b/pkg/embedded/uap/regexes.yml
55
@@ -1,3 +1,4 @@
@@ -10,7 +10,7 @@ index 302e4b5..34ab79c 100644
1010
@@ -149,6 +150,7 @@ user_agent_parsers:
1111

1212
# Bots
13-
- regex: '(CSimpleSpider|Cityreview Robot|CrawlDaddy|CrawlFire|Finderbots|Index crawler|Job Roboter|KiwiStatus Spider|Lijit Crawler|QuerySeekerSpider|ScollSpider|Trends Crawler|USyd-NLP-Spider|SiteCat Webbot|BotName\/\$BotVersion|123metaspider-Bot|1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]{1,30}-Agent|AdsBot-Google(?:-[a-z]{1,30}|)|altavista|AppEngine-Google|archive.{0,30}\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|BingPreview|blitzbot|BlogBridge|Bloglovin|BoardReader Blog Indexer|BoardReader Favicon Fetcher|boitho.com-dc|BotSeer|BUbiNG|\b\w{0,30}favicon\w{0,30}\b|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher|)|Feed Seeker Bot|Feedbin|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|)|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile|)|IconSurf|IlTrovatore(?:-Setaccio|)|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]{1,30}Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masscan|masidani_bot|Mediapartners-Google|Microsoft .{0,30} Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media {0,2}|)|msrbot|Mtps Feed Aggregation System|netresearch|Netvibes|NewsGator[^/]{0,30}|^NING|Nutch[^/]{0,30}|Nymesis|ObjectsSearch|OgScrper|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PHPCrawl|PlantyNet_WebRobot|Pompos|Qwantify|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|SemrushBot|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slackbot-LinkExpanding|Slack-ImgProxy|Slurp|snappy|Speedy Spider|Squrl Java|Stringer|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|Tiny Tiny RSS|Twitterbot|WhatsApp|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]{1,30}|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s|) Link Sleuth|Xerka [A-z]{1,30}Bot|yacy(?:bot|)|YahooSeeker|Yahoo! Slurp|Yandex\w{1,30}|YodaoBot(?:-[A-z]{1,30}|)|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg|ArcGIS Hub Indexer)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+)|)|)|)'
13+
- regex: '(CSimpleSpider|Cityreview Robot|CrawlDaddy|CrawlFire|Finderbots|Index crawler|Job Roboter|KiwiStatus Spider|Lijit Crawler|QuerySeekerSpider|ScollSpider|Trends Crawler|USyd-NLP-Spider|SiteCat Webbot|BotName\/\$BotVersion|123metaspider-Bot|1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]{1,30}-Agent|AdsBot-Google(?:-[a-z]{1,30}|)|altavista|AppEngine-Google|archive.{0,30}\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|BingPreview|blitzbot|BlogBridge|Bloglovin|BoardReader Blog Indexer|BoardReader Favicon Fetcher|boitho.com-dc|BotSeer|BUbiNG|\b\w{0,30}favicon\w{0,30}\b|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher|)|Feed Seeker Bot|Feedbin|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|)|GoogleOther|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile|)|IconSurf|IlTrovatore(?:-Setaccio|)|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]{1,30}Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masscan|masidani_bot|Mediapartners-Google|Microsoft .{0,30} Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media {0,2}|)|msrbot|Mtps Feed Aggregation System|netresearch|Netvibes|NewsGator[^/]{0,30}|^NING|Nutch[^/]{0,30}|Nymesis|ObjectsSearch|OgScrper|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PHPCrawl|PlantyNet_WebRobot|Pompos|Qwantify|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|SemrushBot|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slackbot-LinkExpanding|Slack-ImgProxy|Slurp|snappy|Speedy Spider|Squrl Java|Stringer|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|Tiny Tiny RSS|Twitterbot|WhatsApp|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]{1,30}|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s|) Link Sleuth|Xerka [A-z]{1,30}Bot|yacy(?:bot|)|YahooSeeker|Yahoo! Slurp|Yandex\w{1,30}|YodaoBot(?:-[A-z]{1,30}|)|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg|ArcGIS Hub Indexer|GPTBot)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+)|)|)|)'
1414
+ family_replacement: 'bot'
1515

1616
# AWS S3 Clients
@@ -29,7 +29,7 @@ index 302e4b5..34ab79c 100644
2929

3030
# HbbTV standard defines what features the browser should understand.
3131
# but it's like targeting "HTML5 browsers", effective browser support depends on the model
32-
@@ -247,30 +252,30 @@ user_agent_parsers:
32+
@@ -250,30 +255,30 @@ user_agent_parsers:
3333

3434
# Firefox
3535
- regex: '(Fennec)/(\d+)\.(\d+)\.?([ab]?\d+[a-z]*)'
@@ -71,7 +71,7 @@ index 302e4b5..34ab79c 100644
7171

7272
# e.g.: Flock/2.0b2
7373
- regex: '(Flock)/(\d+)\.(\d+)(b\d+?)'
74-
@@ -301,18 +306,18 @@ user_agent_parsers:
74+
@@ -304,18 +309,18 @@ user_agent_parsers:
7575
- regex: '(Opera Tablet).{0,200}Version/(\d+)\.(\d+)(?:\.(\d+)|)'
7676
- regex: '(Opera Mini)(?:/att|)/?(\d+|)(?:\.(\d+)|)(?:\.(\d+)|)'
7777
- regex: '(Opera)/.{1,100}Opera Mobi.{1,100}Version/(\d+)\.(\d+)'
@@ -95,7 +95,7 @@ index 302e4b5..34ab79c 100644
9595

9696
# Opera >=15 for Desktop is similar to Chrome but includes an "OPR" Version string.
9797
- regex: '(?:Chrome).{1,300}(OPR)/(\d+)\.(\d+)\.(\d+)'
98-
@@ -320,15 +325,15 @@ user_agent_parsers:
98+
@@ -323,15 +328,15 @@ user_agent_parsers:
9999

100100
# Opera Coast
101101
- regex: '(Coast)/(\d+).(\d+).(\d+)'
@@ -114,7 +114,7 @@ index 302e4b5..34ab79c 100644
114114

115115
# Palm WebOS looks a lot like Safari.
116116
- regex: '(hpw|web)OS/(\d+)\.(\d+)(?:\.(\d+)|)'
117-
@@ -420,9 +425,9 @@ user_agent_parsers:
117+
@@ -423,9 +428,9 @@ user_agent_parsers:
118118

119119
# Edge Mobile
120120
- regex: 'Windows Phone .{0,200}(Edge)/(\d+)\.(\d+)'
@@ -125,8 +125,17 @@ index 302e4b5..34ab79c 100644
125125
+ family_replacement: 'Edge'
126126

127127
# Oculus Browser, should go before Samsung Internet
128-
- regex: '(OculusBrowser)/(\d+)\.(\d+).0.0(?:\.([0-9\-]+)|)'
129-
@@ -472,7 +477,7 @@ user_agent_parsers:
128+
- regex: '(OculusBrowser)/(\d+)\.(\d+)(?:\.([0-9\-]+)|)'
129+
@@ -447,7 +452,7 @@ user_agent_parsers:
130+
- regex: '(baidubrowser)[/\s](\d+)(?:\.(\d+)|)(?:\.(\d+)|)'
131+
family_replacement: 'Baidu Browser'
132+
- regex: '(FlyFlow)/(\d+)\.(\d+)'
133+
- family_replacement: 'Baidu Explorer'
134+
+ family_replacement: 'Baidu Browser'
135+
136+
# MxBrowser is Maxthon. Must go before Mobile Chrome for Android
137+
- regex: '(MxBrowser)/(\d+)\.(\d+)(?:\.(\d+)|)'
138+
@@ -475,7 +480,7 @@ user_agent_parsers:
130139

131140
# Google Search App on Android, eg:
132141
- regex: 'Mozilla.{1,200}Android.{1,200}(GSA)/(\d+)\.(\d+)\.(\d+)'
@@ -135,7 +144,7 @@ index 302e4b5..34ab79c 100644
135144

136145
# QQ Browsers
137146
- regex: '(MQQBrowser/Mini)(?:(\d+)(?:\.(\d+)|)(?:\.(\d+)|)|)'
138-
@@ -484,11 +489,11 @@ user_agent_parsers:
147+
@@ -487,11 +492,11 @@ user_agent_parsers:
139148

140149
# DuckDuckGo
141150
- regex: 'Mozilla.{1,200}Mobile.{1,100}(DuckDuckGo)/(\d+)'
@@ -149,7 +158,7 @@ index 302e4b5..34ab79c 100644
149158
- regex: 'Mozilla.{1,200}(Ddg)/(\d+)(?:\.(\d+)|)'
150159
family_replacement: 'DuckDuckGo'
151160

152-
@@ -498,9 +503,9 @@ user_agent_parsers:
161+
@@ -501,9 +506,9 @@ user_agent_parsers:
153162

154163
# Ecosia on iOS / Android
155164
- regex: '(Ecosia) ios@(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)'
@@ -161,7 +170,7 @@ index 302e4b5..34ab79c 100644
161170

162171
# VivoBrowser
163172
- regex: '(VivoBrowser)\/(\d+)\.(\d+)\.(\d+)\.(\d+)'
164-
@@ -510,17 +515,17 @@ user_agent_parsers:
173+
@@ -513,17 +518,17 @@ user_agent_parsers:
165174

166175
# Chrome Mobile
167176
- regex: 'Version/.{1,300}(Chrome)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
@@ -185,15 +194,15 @@ index 302e4b5..34ab79c 100644
185194

186195
# Chrome Frame must come before MSIE.
187196
- regex: '(chromeframe)/(\d+)\.(\d+)\.(\d+)'
188-
@@ -684,6 +689,7 @@ user_agent_parsers:
197+
@@ -766,6 +771,7 @@ user_agent_parsers:
189198

190199
# Chrome/Chromium/major_version.minor_version
191200
- regex: '(Chromium|Chrome)/(\d+)\.(\d+)(?:\.(\d+)|)(?:\.(\d+)|)'
192201
+ family_replacement: 'Chrome'
193202

194203
##########
195204
# IE Mobile needs to happen before Android to catch cases such as:
196-
@@ -695,7 +701,7 @@ user_agent_parsers:
205+
@@ -777,7 +783,7 @@ user_agent_parsers:
197206

198207
# IE Mobile
199208
- regex: '(IEMobile)[ /](\d+)\.(\d+)'
@@ -202,7 +211,7 @@ index 302e4b5..34ab79c 100644
202211

203212
# Baca Berita App News Reader
204213
- regex: '(BacaBerita App)\/(\d+)\.(\d+)\.(\d+)'
205-
@@ -845,15 +851,15 @@ user_agent_parsers:
214+
@@ -927,15 +933,15 @@ user_agent_parsers:
206215
- regex: '(iPod|iPhone|iPad).{1,200}GSA/(\d+)\.(\d+)\.(\d+)(?:\.(\d+)|) Mobile'
207216
family_replacement: 'Google'
208217
- regex: '(iPod|iPhone|iPad).{1,200}Version/(\d+)\.(\d+)(?:\.(\d+)|).{1,200}[ +]Safari'
@@ -223,7 +232,7 @@ index 302e4b5..34ab79c 100644
223232
- regex: '(Watch)(\d+),(\d+)'
224233
family_replacement: 'Apple $1 App'
225234

226-
@@ -928,7 +934,7 @@ user_agent_parsers:
235+
@@ -1010,7 +1016,7 @@ user_agent_parsers:
227236

228237
# WebKit Nightly
229238
- regex: '(AppleWebKit)/(\d+)(?:\.(\d+)|)\+ .{0,200} Safari'
@@ -232,7 +241,7 @@ index 302e4b5..34ab79c 100644
232241

233242
# Safari
234243
- regex: '(Version)/(\d+)\.(\d+)(?:\.(\d+)|).{0,100}Safari/'
235-
@@ -1173,7 +1179,7 @@ os_parsers:
244+
@@ -1255,7 +1261,7 @@ os_parsers:
236245
# Ex: Mozilla/5.0 (Fuchsia) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 CrKey/1.56.500000
237246
# These are some intermediate "Nest Hub" Chromecast devices running Fuchsia.
238247
- regex: 'Fuchsia.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)'
@@ -241,7 +250,7 @@ index 302e4b5..34ab79c 100644
241250

242251
# Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/SmartSpeaker
243252
- regex: 'Linux.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|).*DeviceType/SmartSpeaker'
244-
@@ -1182,7 +1188,7 @@ os_parsers:
253+
@@ -1264,7 +1270,7 @@ os_parsers:
245254
# Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/Chromecast
246255
# These are the oldest Chromecast devices that ran Linux.
247256
- regex: 'Linux.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)'
@@ -250,7 +259,7 @@ index 302e4b5..34ab79c 100644
250259

251260
##########
252261
# Android
253-
@@ -1357,35 +1363,35 @@ os_parsers:
262+
@@ -1439,35 +1445,35 @@ os_parsers:
254263
# @ref: http://www.puredarwin.org/curious/versions
255264
##########
256265
- regex: '((?:Mac[ +]?|; )OS[ +]X)[\s+/](?:(\d+)[_.](\d+)(?:[_.](\d+)|)|Mach-O)'
@@ -293,7 +302,7 @@ index 302e4b5..34ab79c 100644
293302
os_v1_replacement: '10'
294303
os_v2_replacement: '9'
295304
# Yosemite is Darwin/14.x but patch versions are inconsistent in the Darwin string;
296-
@@ -1393,7 +1399,7 @@ os_parsers:
305+
@@ -1475,7 +1481,7 @@ os_parsers:
297306

298307
# IE on Mac doesn't specify version number
299308
- regex: 'Mac_PowerPC'
@@ -302,7 +311,7 @@ index 302e4b5..34ab79c 100644
302311

303312
# builds before tiger don't seem to specify version?
304313

305-
@@ -1402,14 +1408,14 @@ os_parsers:
314+
@@ -1484,14 +1490,14 @@ os_parsers:
306315

307316
# Box Drive and Box Sync on Mac OS X use OSX version numbers, not Darwin
308317
- regex: '^Box.{0,200};(Darwin)/(10)\.(1\d)(?:\.(\d+)|)'
@@ -319,7 +328,7 @@ index 302e4b5..34ab79c 100644
319328

320329
##########
321330
# iOS
322-
@@ -1463,11 +1469,11 @@ os_parsers:
331+
@@ -1545,11 +1551,11 @@ os_parsers:
323332
os_replacement: 'iOS'
324333
os_v1_replacement: '8'
325334
- regex: '(CF)(Network)/(720)\.(\d)'
@@ -333,7 +342,7 @@ index 302e4b5..34ab79c 100644
333342
os_v1_replacement: '10'
334343
os_v2_replacement: '11'
335344
- regex: 'CFNetwork/7.{0,100} Darwin/15\.4\.\d+'
336-
@@ -1502,15 +1508,15 @@ os_parsers:
345+
@@ -1584,15 +1590,15 @@ os_parsers:
337346
# @ref: https://en.wikipedia.org/wiki/Darwin_(operating_system)#Release_history
338347
##########
339348
- regex: 'CFNetwork/.{0,100} Darwin/17\.\d+.{0,100}\(x86_64\)'
@@ -352,7 +361,7 @@ index 302e4b5..34ab79c 100644
352361
os_v1_replacement: '10'
353362
os_v2_replacement: '11'
354363
##########
355-
@@ -1881,7 +1887,8 @@ os_parsers:
364+
@@ -1963,7 +1969,8 @@ os_parsers:
356365
# Generic patterns
357366
# since the majority of os cases are very specific, these go last
358367
##########
@@ -362,7 +371,7 @@ index 302e4b5..34ab79c 100644
362371

363372
# Gentoo Linux + Kernel Version
364373
- regex: '(Linux)[ /](\d+)\.(\d+)(?:\.(\d+)|).{0,100}gentoo'
365-
@@ -1892,7 +1899,9 @@ os_parsers:
374+
@@ -1974,7 +1981,9 @@ os_parsers:
366375

367376
# just os
368377
- regex: '(Windows|Android|WeTab|Maemo|Web0S)'
@@ -373,7 +382,7 @@ index 302e4b5..34ab79c 100644
373382
# Linux + Kernel Version
374383
- regex: '(Linux)(?:[ /](\d+)\.(\d+)(?:\.(\d+)|)|)'
375384
- regex: 'SunOS'
376-
@@ -1901,7 +1910,7 @@ os_parsers:
385+
@@ -1983,7 +1992,7 @@ os_parsers:
377386
- regex: '\(linux-gnu\)'
378387
os_replacement: 'Linux'
379388
- regex: '\(x86_64-redhat-linux-gnu\)'
@@ -382,7 +391,7 @@ index 302e4b5..34ab79c 100644
382391
- regex: '\((freebsd)(\d+)\.(\d+)\)'
383392
os_replacement: 'FreeBSD'
384393
- regex: 'linux'
385-
@@ -1917,7 +1926,7 @@ os_parsers:
394+
@@ -1999,7 +2008,7 @@ os_parsers:
386395
# APN/1.0 HashiCorp/1.0 Terraform/1.8.1 (+https://www.terraform.io) terraform-provider-aws/4.67.0 (+https://registry.terraform.io/providers/hashicorp/aws) aws-sdk-go-v2/1.18.0 os/macos lang/go/1.19.8 md/GOOS/darwin md/GOARCH/arm64 api/identitystore/1.16.11
387396
##########
388397
- regex: 'os\/macos[#]?(\d*)[.]?(\d*)[.]?(\d*)'

0 commit comments

Comments
 (0)