Skip to content

Commit

Permalink
Add encoding tests to os-string
Browse files Browse the repository at this point in the history
  • Loading branch information
hasufell committed Dec 12, 2023
1 parent 935e13f commit f9b1285
Show file tree
Hide file tree
Showing 4 changed files with 268 additions and 0 deletions.
19 changes: 19 additions & 0 deletions os-string.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,25 @@ test-suite bytestring-tests
, os-string
, QuickCheck >=2.7 && <2.15

test-suite encoding-tests
default-language: Haskell2010
ghc-options: -Wall
type: exitcode-stdio-1.0
main-is: Main.hs
hs-source-dirs: tests tests/encoding
other-modules:
Arbitrary
EncodingSpec
TestUtil

build-depends:
, base
, bytestring >=0.11.3.0
, deepseq
, os-string
, QuickCheck >=2.7 && <2.15
, quickcheck-classes-base ^>=0.6.2

benchmark bench
main-is: Bench.hs
other-modules: BenchOsString
Expand Down
69 changes: 69 additions & 0 deletions tests/Arbitrary.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{-# OPTIONS_GHC -Wno-orphans #-}

module Arbitrary where

import Data.Char
import Data.Maybe
import System.OsString
import System.OsString.Internal.Types
import qualified System.OsString.Posix as Posix
import qualified System.OsString.Windows as Windows
import Data.ByteString ( ByteString )
import qualified Data.ByteString as ByteString
import Test.QuickCheck


instance Arbitrary OsString where
arbitrary = fmap fromJust $ encodeUtf <$> listOf filepathChar

instance Arbitrary PosixString where
arbitrary = fmap fromJust $ Posix.encodeUtf <$> listOf filepathChar

instance Arbitrary WindowsString where
arbitrary = fmap fromJust $ Windows.encodeUtf <$> listOf filepathChar


newtype NonNullString = NonNullString { nonNullString :: String }
deriving Show

instance Arbitrary NonNullString where
arbitrary = NonNullString <$> listOf filepathChar

filepathChar :: Gen Char
filepathChar = arbitraryUnicodeChar `suchThat` (\c -> not (isNull c) && isValidUnicode c)
where
isNull = (== '\NUL')
isValidUnicode c = case generalCategory c of
Surrogate -> False
NotAssigned -> False
_ -> True


newtype NonNullAsciiString = NonNullAsciiString { nonNullAsciiString :: String }
deriving Show

instance Arbitrary NonNullAsciiString where
arbitrary = NonNullAsciiString <$> listOf filepathAsciiChar

filepathAsciiChar :: Gen Char
filepathAsciiChar = arbitraryASCIIChar `suchThat` (\c -> not (isNull c))
where
isNull = (== '\NUL')

newtype NonNullSurrogateString = NonNullSurrogateString { nonNullSurrogateString :: String }
deriving Show

instance Arbitrary NonNullSurrogateString where
arbitrary = NonNullSurrogateString <$> listOf filepathWithSurrogates

filepathWithSurrogates :: Gen Char
filepathWithSurrogates =
frequency
[(3, arbitraryASCIIChar),
(1, arbitraryUnicodeChar),
(1, arbitraryBoundedEnum)
]


instance Arbitrary ByteString where arbitrary = ByteString.pack <$> arbitrary
instance CoArbitrary ByteString where coarbitrary = coarbitrary . ByteString.unpack
170 changes: 170 additions & 0 deletions tests/encoding/EncodingSpec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE TypeApplications #-}

module EncodingSpec where

import Data.ByteString ( ByteString )
import qualified Data.ByteString as BS

import Arbitrary
import Test.QuickCheck

import Data.Either ( isRight )
import qualified System.OsString.Data.ByteString.Short as BS8
import qualified System.OsString.Data.ByteString.Short.Word16 as BS16
import System.OsString.Encoding.Internal
import GHC.IO (unsafePerformIO)
import GHC.IO.Encoding ( setFileSystemEncoding )
import System.IO
( utf16le )
import Control.Exception
import Control.DeepSeq
import Data.Bifunctor ( first )
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
import GHC.IO.Encoding.UTF8 ( mkUTF8 )


tests :: [(String, Property)]
tests =
[ ("ucs2le_decode . ucs2le_encode == id",
property $ \(padEven -> ba) ->
let decoded = decodeWithTE ucs2le (BS8.toShort ba)
encoded = encodeWithTE ucs2le =<< decoded
in (BS8.fromShort <$> encoded) === Right ba)
, ("utf16 doesn't handle invalid surrogate pairs",
property $
let str = [toEnum 55296, toEnum 55297]
encoded = encodeWithTE utf16le str
decoded = decodeWithTE utf16le =<< encoded
#if __GLASGOW_HASKELL__ >= 904
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing))
#else
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
#endif
, ("ucs2 handles invalid surrogate pairs",
property $
let str = [toEnum 55296, toEnum 55297]
encoded = encodeWithTE ucs2le str
decoded = decodeWithTE ucs2le =<< encoded
in decoded === Right str)
, ("can roundtrip arbitrary bytes through utf-8 (with RoundtripFailure)",
property $
\bs ->
let decoded = decodeWithTE (mkUTF8 RoundtripFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF8 RoundtripFailure) =<< decoded
in (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))

, ("can decode arbitrary strings through utf-8 (with RoundtripFailure)",
property $
\(NonNullSurrogateString str) ->
let encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
in expectFailure $ (either (const 0) length decoded, decoded) === (length str, Right str))

, ("utf-8 roundtrip encode cannot deal with some surrogates",
property $
let str = [toEnum 0xDFF0, toEnum 0xDFF2]
encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
#if __GLASGOW_HASKELL__ >= 904
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing))
#else
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
#endif

, ("cannot roundtrip arbitrary bytes through utf-16 (with RoundtripFailure)",
property $
\(padEven -> bs) ->
let decoded = decodeWithTE (mkUTF16le RoundtripFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF16le RoundtripFailure) =<< decoded
in expectFailure $ (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))
, ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf16le)",
property $
\(padEven -> bs) ->
let decoded = decodeWithTE (mkUTF16le ErrorOnCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF16le ErrorOnCodingFailure) =<< decoded
in expectFailure $ (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf8)",
property $
\bs ->
let decoded = decodeWithTE (mkUTF8 ErrorOnCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF8 ErrorOnCodingFailure) =<< decoded
in expectFailure $ (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf16le)",
property $
\(padEven -> bs) ->
let decoded = decodeWithTE (mkUTF16le TransliterateCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF16le TransliterateCodingFailure) =<< decoded
in (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf8)",
property $
\bs ->
let decoded = decodeWithTE (mkUTF8 TransliterateCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF8 TransliterateCodingFailure) =<< decoded
in (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithBaseWindows/decodeWithBaseWindows never fails (utf16le)",
property $
\(padEven -> bs) ->
let decoded = decodeW' (BS8.toShort bs)
encoded = encodeW' =<< decoded
in (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithBasePosix/decodeWithBasePosix never fails (utf8b)",
property $
\bs -> ioProperty $ do
setFileSystemEncoding (mkUTF8 TransliterateCodingFailure)
let decoded = decodeP' (BS8.toShort bs)
encoded = encodeP' =<< decoded
pure $ (isRight encoded, isRight decoded) === (True, True))

, ("decodeWithBaseWindows == utf16le_b",
property $
\(BS8.toShort . padEven -> bs) ->
let decoded = decodeW' bs
decoded' = first displayException $ decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) bs
in decoded === decoded')

, ("encodeWithBaseWindows == utf16le_b",
property $
\(NonNullSurrogateString str) ->
let decoded = encodeW' str
decoded' = first displayException $ encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) str
in decoded === decoded')

, ("encodeWithTE/decodeWithTE never fails (utf16le_b)",
property $
\(padEven -> bs) ->
let decoded = decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) =<< decoded
in (isRight encoded, isRight decoded) === (True, True))
]


padEven :: ByteString -> ByteString
padEven bs
| even (BS.length bs) = bs
| otherwise = bs `BS.append` BS.pack [70]


decodeP' :: BS8.ShortByteString -> Either String String
decodeP' ba = unsafePerformIO $ do
r <- try @SomeException $ decodeWithBasePosix ba
evaluate $ force $ first displayException r

encodeP' :: String -> Either String BS8.ShortByteString
encodeP' str = unsafePerformIO $ do
r <- try @SomeException $ encodeWithBasePosix str
evaluate $ force $ first displayException r

decodeW' :: BS16.ShortByteString -> Either String String
decodeW' ba = unsafePerformIO $ do
r <- try @SomeException $ decodeWithBaseWindows ba
evaluate $ force $ first displayException r

encodeW' :: String -> Either String BS8.ShortByteString
encodeW' str = unsafePerformIO $ do
r <- try @SomeException $ encodeWithBaseWindows str
evaluate $ force $ first displayException r

10 changes: 10 additions & 0 deletions tests/encoding/Main.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{-# LANGUAGE TypeApplications #-}

module Main (main) where

import qualified EncodingSpec as Spec
import TestUtil

main :: IO ()
main = runTests (Spec.tests)

0 comments on commit f9b1285

Please sign in to comment.