@@ -201,36 +201,112 @@ extension String {
201201 }
202202
203203 @available ( SwiftStdlib 6 . 2 , * )
204- public var utf8Span : UTF8Span {
204+ private var _span : Span < UTF8 . CodeUnit > {
205205 @lifetime ( borrow self)
206206 borrowing get {
207- let isKnownASCII = _guts. isASCII
208- let utf8 = self . utf8
209- let span = utf8. span
210- let result = unsafe UTF8 Span(
211- unchecked: span,
212- isKnownASCII: isKnownASCII)
213- return unsafe _overrideLifetime ( result, borrowing: self )
207+ #if _runtime(_ObjC)
208+ // handle non-UTF8 Objective-C bridging cases here
209+ if !_guts. isFastUTF8, _guts. _object. hasObjCBridgeableObject {
210+ let storage = _guts. _getOrAllocateAssociatedStorage ( )
211+ let ( start, count) = unsafe ( storage. start, storage. count)
212+ let span = unsafe Span( _unsafeStart: start, count: count)
213+ return unsafe _override Lifetime ( span, borrowing: self )
214+ }
215+ #endif
216+ let count = _guts. count
217+ if _guts. isSmall {
218+ let a = Builtin . addressOfBorrow ( self )
219+ let address = unsafe UnsafePointer< UTF8 . CodeUnit > ( a)
220+ let span = unsafe Span( _unsafeStart: address, count: count)
221+ return unsafe _override Lifetime ( span, borrowing: self )
222+ }
223+ let isFastUTF8 = _guts. isFastUTF8
224+ _precondition ( isFastUTF8, " String must be contiguous UTF8 " )
225+ let buffer = unsafe _guts. _object . fastUTF8
226+ let span = unsafe Span( _unsafeElements: buffer)
227+ return unsafe _override Lifetime ( span, borrowing: self )
214228 }
215229 }
216- }
217230
218- extension Substring {
231+ /// A UTF8span over the code units that make up this string.
232+ ///
233+ /// - Note: In the case of bridged UTF16 String instances (on Apple
234+ /// platforms,) this property transcodes the code units the first time
235+ /// it is called. The transcoded buffer is cached, and subsequent calls
236+ /// to `span` can reuse the buffer.
237+ ///
238+ /// Returns: a `UTF8Span` over the code units of this String.
239+ ///
240+ /// Complexity: O(1) for native UTF8 Strings,
241+ /// amortized O(1) for bridged UTF16 Strings.
219242 @available ( SwiftStdlib 6 . 2 , * )
220243 public var utf8Span : UTF8Span {
221244 @lifetime ( borrow self)
222245 borrowing get {
223- let isKnownASCII = base. _guts. isASCII
224- let utf8 = self . utf8
225- let span = utf8. span
226- let result = unsafe UTF8 Span(
227- unchecked: span,
228- isKnownASCII: isKnownASCII)
229- return unsafe _overrideLifetime ( result, borrowing: self )
246+ unsafe UTF8Span( unchecked: _span, isKnownASCII: _guts. isASCII)
230247 }
231248 }
232249}
233250
251+ extension Substring {
234252
253+ @available ( SwiftStdlib 6 . 2 , * )
254+ private var _span : Span < UTF8 . CodeUnit > {
255+ @lifetime ( borrow self)
256+ borrowing get {
257+ #if _runtime(_ObjC)
258+ // handle non-UTF8 Objective-C bridging cases here
259+ if !_wholeGuts. isFastUTF8, _wholeGuts. _object. hasObjCBridgeableObject {
260+ let base : String . UTF8View = _slice. _base. utf8
261+ let first = base. _foreignDistance ( from: base. startIndex, to: startIndex)
262+ let count = base. _foreignDistance ( from: startIndex, to: endIndex)
263+ let span = base. span. _extracting ( first..< ( first &+ count) )
264+ return unsafe _override Lifetime ( span, borrowing: self )
265+ }
266+ #endif
267+ let first = _slice. _startIndex. _encodedOffset
268+ let end = _slice. _endIndex. _encodedOffset
269+ if _wholeGuts. isSmall {
270+ let a = Builtin . addressOfBorrow ( self )
271+ let offset = first &+ ( 2 &* MemoryLayout< String . Index> . stride)
272+ let start = unsafe UnsafePointer< UTF8 . CodeUnit > ( a) . advanced ( by: offset)
273+ let span = unsafe Span( _unsafeStart: start, count: end &- first)
274+ return unsafe _override Lifetime ( span, borrowing: self )
275+ }
276+ let isFastUTF8 = _wholeGuts. isFastUTF8
277+ _precondition ( isFastUTF8, " Substring must be contiguous UTF8 " )
278+ var span = unsafe Span( _unsafeElements: _wholeGuts. _object. fastUTF8)
279+ span = span. _extracting ( first..< end)
280+ return unsafe _override Lifetime ( span, borrowing: self )
281+ }
282+ }
235283
236-
284+ /// A UTF8Span over the code units that make up this substring.
285+ ///
286+ /// - Note: In the case of bridged UTF16 String instances (on Apple
287+ /// platforms,) this property needs to transcode the code units every time
288+ /// it is called.
289+ /// For example, if `string` has the bridged UTF16 representation,
290+ /// for word in string.split(separator: " ") {
291+ /// useSpan(word.span)
292+ /// }
293+ /// is accidentally quadratic because of this issue. A workaround is to
294+ /// explicitly convert the string into its native UTF8 representation:
295+ /// var nativeString = consume string
296+ /// nativeString.makeContiguousUTF8()
297+ /// for word in nativeString.split(separator: " ") {
298+ /// useSpan(word.span)
299+ /// }
300+ /// This second option has linear time complexity, as expected.
301+ ///
302+ /// Returns: a `UTF8Span` over the code units of this Substring.
303+ ///
304+ /// Complexity: O(1) for native UTF8 Strings, O(n) for bridged UTF16 Strings.
305+ @available ( SwiftStdlib 6 . 2 , * )
306+ public var utf8Span : UTF8Span {
307+ @lifetime ( borrow self)
308+ borrowing get {
309+ unsafe UTF8Span( unchecked: _span, isKnownASCII: base. _guts. isASCII)
310+ }
311+ }
312+ }
0 commit comments