-
Notifications
You must be signed in to change notification settings - Fork 9
/
regexp.go
163 lines (133 loc) · 7.02 KB
/
regexp.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
package reference
import (
"regexp"
"strings"
)
// DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
var DigestRegexp = regexp.MustCompile(digestPat)
// DomainRegexp matches hostname or IP-addresses, optionally including a port
// number. It defines the structure of potential domain components that may be
// part of image names. This is purposely a subset of what is allowed by DNS to
// ensure backwards compatibility with Docker image names. It may be a subset of
// DNS domain name, an IPv4 address in decimal format, or an IPv6 address between
// square brackets (excluding zone identifiers as defined by [RFC 6874] or special
// addresses such as IPv4-Mapped).
//
// [RFC 6874]: https://www.rfc-editor.org/rfc/rfc6874.
var DomainRegexp = regexp.MustCompile(domainAndPort)
// IdentifierRegexp is the format for string identifier used as a
// content addressable identifier using sha256. These identifiers
// are like digests without the algorithm, since sha256 is used.
var IdentifierRegexp = regexp.MustCompile(identifier)
// NameRegexp is the format for the name component of references, including
// an optional domain and port, but without tag or digest suffix.
var NameRegexp = regexp.MustCompile(namePat)
// ReferenceRegexp is the full supported format of a reference. The regexp
// is anchored and has capturing groups for name, tag, and digest
// components.
var ReferenceRegexp = regexp.MustCompile(referencePat)
// TagRegexp matches valid tag names. From [docker/docker:graph/tags.go].
//
// [docker/docker:graph/tags.go]: https://github.com/moby/moby/blob/v1.6.0/graph/tags.go#L26-L28
var TagRegexp = regexp.MustCompile(tag)
const (
// alphanumeric defines the alphanumeric atom, typically a
// component of names. This only allows lower case characters and digits.
alphanumeric = `[a-z0-9]+`
// separator defines the separators allowed to be embedded in name
// components. This allows one period, one or two underscore and multiple
// dashes. Repeated dashes and underscores are intentionally treated
// differently. In order to support valid hostnames as name components,
// supporting repeated dash was added. Additionally double underscore is
// now allowed as a separator to loosen the restriction for previously
// supported names.
separator = `(?:[._]|__|[-]+)`
// localhost is treated as a special value for domain-name. Any other
// domain-name without a "." or a ":port" are considered a path component.
localhost = `localhost`
// domainNameComponent restricts the registry domain component of a
// repository name to start with a component as defined by DomainRegexp.
domainNameComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`
// optionalPort matches an optional port-number including the port separator
// (e.g. ":80").
optionalPort = `(?::[0-9]+)?`
// tag matches valid tag names. From docker/docker:graph/tags.go.
tag = `[\w][\w.-]{0,127}`
// digestPat matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
//
// TODO(thaJeztah): this should follow the same rules as https://pkg.go.dev/github.com/opencontainers/[email protected]#DigestRegexp
// so that go-digest defines the canonical format. Note that the go-digest is
// more relaxed:
// - it allows multiple algorithms (e.g. "sha256+b64:<encoded>") to allow
// future expansion of supported algorithms.
// - it allows the "<encoded>" value to use urlsafe base64 encoding as defined
// in [rfc4648, section 5].
//
// [rfc4648, section 5]: https://www.rfc-editor.org/rfc/rfc4648#section-5.
digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`
// identifier is the format for a content addressable identifier using sha256.
// These identifiers are like digests without the algorithm, since sha256 is used.
identifier = `([a-f0-9]{64})`
// ipv6address are enclosed between square brackets and may be represented
// in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format
// are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as
// IPv4-Mapped are deliberately excluded.
ipv6address = `\[(?:[a-fA-F0-9:]+)\]`
)
var (
// domainName defines the structure of potential domain components
// that may be part of image names. This is purposely a subset of what is
// allowed by DNS to ensure backwards compatibility with Docker image
// names. This includes IPv4 addresses on decimal format.
domainName = domainNameComponent + anyTimes(`\.`+domainNameComponent)
// host defines the structure of potential domains based on the URI
// Host subcomponent on rfc3986. It may be a subset of DNS domain name,
// or an IPv4 address in decimal format, or an IPv6 address between square
// brackets (excluding zone identifiers as defined by rfc6874 or special
// addresses such as IPv4-Mapped).
host = `(?:` + domainName + `|` + ipv6address + `)`
// allowed by the URI Host subcomponent on rfc3986 to ensure backwards
// compatibility with Docker image names.
domainAndPort = host + optionalPort
// anchoredTagRegexp matches valid tag names, anchored at the start and
// end of the matched string.
anchoredTagRegexp = regexp.MustCompile(anchored(tag))
// anchoredDigestRegexp matches valid digests, anchored at the start and
// end of the matched string.
anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat))
// pathComponent restricts path-components to start with an alphanumeric
// character, with following parts able to be separated by a separator
// (one period, one or two underscore and multiple dashes).
pathComponent = alphanumeric + anyTimes(separator+alphanumeric)
// remoteName matches the remote-name of a repository. It consists of one
// or more forward slash (/) delimited path-components:
//
// pathComponent[[/pathComponent] ...] // e.g., "library/ubuntu"
remoteName = pathComponent + anyTimes(`/`+pathComponent)
namePat = optional(domainAndPort+`/`) + remoteName
// anchoredNameRegexp is used to parse a name value, capturing the
// domain and trailing components.
anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName)))
referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat)))
// anchoredIdentifierRegexp is used to check or match an
// identifier value, anchored at start and end of string.
anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier))
)
// optional wraps the expression in a non-capturing group and makes the
// production optional.
func optional(res ...string) string {
return `(?:` + strings.Join(res, "") + `)?`
}
// anyTimes wraps the expression in a non-capturing group that can occur
// any number of times.
func anyTimes(res ...string) string {
return `(?:` + strings.Join(res, "") + `)*`
}
// capture wraps the expression in a capturing group.
func capture(res ...string) string {
return `(` + strings.Join(res, "") + `)`
}
// anchored anchors the regular expression by adding start and end delimiters.
func anchored(res ...string) string {
return `^` + strings.Join(res, "") + `$`
}