|  | 
|  | 1 | +# frozen_string_literal: true | 
|  | 2 | + | 
|  | 3 | +require 'rchardet' | 
|  | 4 | + | 
|  | 5 | +module RubyGit | 
|  | 6 | +  module CommandLine | 
|  | 7 | +    # Utility to normalize string encoding | 
|  | 8 | +    # @api public | 
|  | 9 | +    module EncodingNormalizer | 
|  | 10 | +      # Detects the character encoding used to create a string or binary data | 
|  | 11 | +      # | 
|  | 12 | +      # Detects the encoding of a string or return binary if it cannot be detected | 
|  | 13 | +      # | 
|  | 14 | +      # @example | 
|  | 15 | +      #   EncodingNormalizer.detect_encoding("Hello, world!") #=> "ascii" | 
|  | 16 | +      #   EncodingNormalizer.detect_encoding("\xCB\xEF\xF1\xE5\xEC") #=> "ISO-8859-7" | 
|  | 17 | +      #   EncodingNormalizer.detect_encoding("\xC0\xCC\xB0\xCD\xC0\xBA") #=> "EUC-KR" | 
|  | 18 | +      # | 
|  | 19 | +      # @param str [String] the string to detect the encoding of | 
|  | 20 | +      # @return [String] the detected encoding | 
|  | 21 | +      # | 
|  | 22 | +      def self.detect_encoding(str) | 
|  | 23 | +        CharDet.detect(str)&.dig('encoding') || Encoding::BINARY.name | 
|  | 24 | +      end | 
|  | 25 | + | 
|  | 26 | +      # Normalizes the encoding to normalize_to | 
|  | 27 | +      # | 
|  | 28 | +      # @example | 
|  | 29 | +      #   EncodingNormalizer.normalize("Hello, world!") #=> "Hello, world!" | 
|  | 30 | +      #   EncodingNormalizer.normalize("\xCB\xEF\xF1\xE5\xEC") #=> "Λορεμ" | 
|  | 31 | +      #   EncodingNormalizer.normalize("\xC0\xCC\xB0\xCD\xC0\xBA") #=> "이것은" | 
|  | 32 | +      # | 
|  | 33 | +      # @param str [String] the string to normalize | 
|  | 34 | +      # @param normalize_to [String] the name of the encoding to normalize to | 
|  | 35 | +      # | 
|  | 36 | +      # @return [String] the string with encoding converted to normalize_to | 
|  | 37 | +      # | 
|  | 38 | +      # @raise [Encoding::UndefinedConversionError] if the string cannot be converted to the default encoding | 
|  | 39 | +      # | 
|  | 40 | +      def self.normalize(str, normalize_to: Encoding::UTF_8.name) | 
|  | 41 | +        encoding_options = { invalid: :replace, undef: :replace } | 
|  | 42 | + | 
|  | 43 | +        detected_encoding = detect_encoding(str) | 
|  | 44 | + | 
|  | 45 | +        return str if str.valid_encoding? && detected_encoding == normalize_to | 
|  | 46 | + | 
|  | 47 | +        str.encode(normalize_to, detected_encoding, **encoding_options) | 
|  | 48 | +      end | 
|  | 49 | +    end | 
|  | 50 | +  end | 
|  | 51 | +end | 
0 commit comments