-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
268 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
{-# OPTIONS_GHC -Wno-orphans #-} | ||
|
||
module Arbitrary where | ||
|
||
import Data.Char | ||
import Data.Maybe | ||
import System.OsString | ||
import System.OsString.Internal.Types | ||
import qualified System.OsString.Posix as Posix | ||
import qualified System.OsString.Windows as Windows | ||
import Data.ByteString ( ByteString ) | ||
import qualified Data.ByteString as ByteString | ||
import Test.QuickCheck | ||
|
||
|
||
instance Arbitrary OsString where | ||
arbitrary = fmap fromJust $ encodeUtf <$> listOf filepathChar | ||
|
||
instance Arbitrary PosixString where | ||
arbitrary = fmap fromJust $ Posix.encodeUtf <$> listOf filepathChar | ||
|
||
instance Arbitrary WindowsString where | ||
arbitrary = fmap fromJust $ Windows.encodeUtf <$> listOf filepathChar | ||
|
||
|
||
newtype NonNullString = NonNullString { nonNullString :: String } | ||
deriving Show | ||
|
||
instance Arbitrary NonNullString where | ||
arbitrary = NonNullString <$> listOf filepathChar | ||
|
||
filepathChar :: Gen Char | ||
filepathChar = arbitraryUnicodeChar `suchThat` (\c -> not (isNull c) && isValidUnicode c) | ||
where | ||
isNull = (== '\NUL') | ||
isValidUnicode c = case generalCategory c of | ||
Surrogate -> False | ||
NotAssigned -> False | ||
_ -> True | ||
|
||
|
||
newtype NonNullAsciiString = NonNullAsciiString { nonNullAsciiString :: String } | ||
deriving Show | ||
|
||
instance Arbitrary NonNullAsciiString where | ||
arbitrary = NonNullAsciiString <$> listOf filepathAsciiChar | ||
|
||
filepathAsciiChar :: Gen Char | ||
filepathAsciiChar = arbitraryASCIIChar `suchThat` (\c -> not (isNull c)) | ||
where | ||
isNull = (== '\NUL') | ||
|
||
newtype NonNullSurrogateString = NonNullSurrogateString { nonNullSurrogateString :: String } | ||
deriving Show | ||
|
||
instance Arbitrary NonNullSurrogateString where | ||
arbitrary = NonNullSurrogateString <$> listOf filepathWithSurrogates | ||
|
||
filepathWithSurrogates :: Gen Char | ||
filepathWithSurrogates = | ||
frequency | ||
[(3, arbitraryASCIIChar), | ||
(1, arbitraryUnicodeChar), | ||
(1, arbitraryBoundedEnum) | ||
] | ||
|
||
|
||
instance Arbitrary ByteString where arbitrary = ByteString.pack <$> arbitrary | ||
instance CoArbitrary ByteString where coarbitrary = coarbitrary . ByteString.unpack |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
{-# LANGUAGE CPP #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
{-# LANGUAGE ViewPatterns #-} | ||
{-# LANGUAGE TypeApplications #-} | ||
|
||
module EncodingSpec where | ||
|
||
import Data.ByteString ( ByteString ) | ||
import qualified Data.ByteString as BS | ||
|
||
import Arbitrary | ||
import Test.QuickCheck | ||
|
||
import Data.Either ( isRight ) | ||
import qualified System.OsString.Data.ByteString.Short as BS8 | ||
import qualified System.OsString.Data.ByteString.Short.Word16 as BS16 | ||
import System.OsString.Encoding.Internal | ||
import GHC.IO (unsafePerformIO) | ||
import GHC.IO.Encoding ( setFileSystemEncoding ) | ||
import System.IO | ||
( utf16le ) | ||
import Control.Exception | ||
import Control.DeepSeq | ||
import Data.Bifunctor ( first ) | ||
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) | ||
import GHC.IO.Encoding.UTF16 ( mkUTF16le ) | ||
import GHC.IO.Encoding.UTF8 ( mkUTF8 ) | ||
|
||
|
||
tests :: [(String, Property)] | ||
tests = | ||
[ ("ucs2le_decode . ucs2le_encode == id", | ||
property $ \(padEven -> ba) -> | ||
let decoded = decodeWithTE ucs2le (BS8.toShort ba) | ||
encoded = encodeWithTE ucs2le =<< decoded | ||
in (BS8.fromShort <$> encoded) === Right ba) | ||
, ("utf16 doesn't handle invalid surrogate pairs", | ||
property $ | ||
let str = [toEnum 55296, toEnum 55297] | ||
encoded = encodeWithTE utf16le str | ||
decoded = decodeWithTE utf16le =<< encoded | ||
#if __GLASGOW_HASKELL__ >= 904 | ||
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) | ||
#else | ||
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing)) | ||
#endif | ||
, ("ucs2 handles invalid surrogate pairs", | ||
property $ | ||
let str = [toEnum 55296, toEnum 55297] | ||
encoded = encodeWithTE ucs2le str | ||
decoded = decodeWithTE ucs2le =<< encoded | ||
in decoded === Right str) | ||
, ("can roundtrip arbitrary bytes through utf-8 (with RoundtripFailure)", | ||
property $ | ||
\bs -> | ||
let decoded = decodeWithTE (mkUTF8 RoundtripFailure) (BS8.toShort bs) | ||
encoded = encodeWithTE (mkUTF8 RoundtripFailure) =<< decoded | ||
in (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs))) | ||
|
||
, ("can decode arbitrary strings through utf-8 (with RoundtripFailure)", | ||
property $ | ||
\(NonNullSurrogateString str) -> | ||
let encoded = encodeWithTE (mkUTF8 RoundtripFailure) str | ||
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded | ||
in expectFailure $ (either (const 0) length decoded, decoded) === (length str, Right str)) | ||
|
||
, ("utf-8 roundtrip encode cannot deal with some surrogates", | ||
property $ | ||
let str = [toEnum 0xDFF0, toEnum 0xDFF2] | ||
encoded = encodeWithTE (mkUTF8 RoundtripFailure) str | ||
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded | ||
#if __GLASGOW_HASKELL__ >= 904 | ||
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) | ||
#else | ||
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing)) | ||
#endif | ||
|
||
, ("cannot roundtrip arbitrary bytes through utf-16 (with RoundtripFailure)", | ||
property $ | ||
\(padEven -> bs) -> | ||
let decoded = decodeWithTE (mkUTF16le RoundtripFailure) (BS8.toShort bs) | ||
encoded = encodeWithTE (mkUTF16le RoundtripFailure) =<< decoded | ||
in expectFailure $ (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs))) | ||
, ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf16le)", | ||
property $ | ||
\(padEven -> bs) -> | ||
let decoded = decodeWithTE (mkUTF16le ErrorOnCodingFailure) (BS8.toShort bs) | ||
encoded = encodeWithTE (mkUTF16le ErrorOnCodingFailure) =<< decoded | ||
in expectFailure $ (isRight encoded, isRight decoded) === (True, True)) | ||
, ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf8)", | ||
property $ | ||
\bs -> | ||
let decoded = decodeWithTE (mkUTF8 ErrorOnCodingFailure) (BS8.toShort bs) | ||
encoded = encodeWithTE (mkUTF8 ErrorOnCodingFailure) =<< decoded | ||
in expectFailure $ (isRight encoded, isRight decoded) === (True, True)) | ||
, ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf16le)", | ||
property $ | ||
\(padEven -> bs) -> | ||
let decoded = decodeWithTE (mkUTF16le TransliterateCodingFailure) (BS8.toShort bs) | ||
encoded = encodeWithTE (mkUTF16le TransliterateCodingFailure) =<< decoded | ||
in (isRight encoded, isRight decoded) === (True, True)) | ||
, ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf8)", | ||
property $ | ||
\bs -> | ||
let decoded = decodeWithTE (mkUTF8 TransliterateCodingFailure) (BS8.toShort bs) | ||
encoded = encodeWithTE (mkUTF8 TransliterateCodingFailure) =<< decoded | ||
in (isRight encoded, isRight decoded) === (True, True)) | ||
, ("encodeWithBaseWindows/decodeWithBaseWindows never fails (utf16le)", | ||
property $ | ||
\(padEven -> bs) -> | ||
let decoded = decodeW' (BS8.toShort bs) | ||
encoded = encodeW' =<< decoded | ||
in (isRight encoded, isRight decoded) === (True, True)) | ||
, ("encodeWithBasePosix/decodeWithBasePosix never fails (utf8b)", | ||
property $ | ||
\bs -> ioProperty $ do | ||
setFileSystemEncoding (mkUTF8 TransliterateCodingFailure) | ||
let decoded = decodeP' (BS8.toShort bs) | ||
encoded = encodeP' =<< decoded | ||
pure $ (isRight encoded, isRight decoded) === (True, True)) | ||
|
||
, ("decodeWithBaseWindows == utf16le_b", | ||
property $ | ||
\(BS8.toShort . padEven -> bs) -> | ||
let decoded = decodeW' bs | ||
decoded' = first displayException $ decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) bs | ||
in decoded === decoded') | ||
|
||
, ("encodeWithBaseWindows == utf16le_b", | ||
property $ | ||
\(NonNullSurrogateString str) -> | ||
let decoded = encodeW' str | ||
decoded' = first displayException $ encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) str | ||
in decoded === decoded') | ||
|
||
, ("encodeWithTE/decodeWithTE never fails (utf16le_b)", | ||
property $ | ||
\(padEven -> bs) -> | ||
let decoded = decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) (BS8.toShort bs) | ||
encoded = encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) =<< decoded | ||
in (isRight encoded, isRight decoded) === (True, True)) | ||
] | ||
|
||
|
||
padEven :: ByteString -> ByteString | ||
padEven bs | ||
| even (BS.length bs) = bs | ||
| otherwise = bs `BS.append` BS.pack [70] | ||
|
||
|
||
decodeP' :: BS8.ShortByteString -> Either String String | ||
decodeP' ba = unsafePerformIO $ do | ||
r <- try @SomeException $ decodeWithBasePosix ba | ||
evaluate $ force $ first displayException r | ||
|
||
encodeP' :: String -> Either String BS8.ShortByteString | ||
encodeP' str = unsafePerformIO $ do | ||
r <- try @SomeException $ encodeWithBasePosix str | ||
evaluate $ force $ first displayException r | ||
|
||
decodeW' :: BS16.ShortByteString -> Either String String | ||
decodeW' ba = unsafePerformIO $ do | ||
r <- try @SomeException $ decodeWithBaseWindows ba | ||
evaluate $ force $ first displayException r | ||
|
||
encodeW' :: String -> Either String BS8.ShortByteString | ||
encodeW' str = unsafePerformIO $ do | ||
r <- try @SomeException $ encodeWithBaseWindows str | ||
evaluate $ force $ first displayException r | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{-# LANGUAGE TypeApplications #-} | ||
|
||
module Main (main) where | ||
|
||
import qualified EncodingSpec as Spec | ||
import TestUtil | ||
|
||
main :: IO () | ||
main = runTests (Spec.tests) | ||
|