From 6a51eed2f988849134aa6f47ad6d9a4a75bf902e Mon Sep 17 00:00:00 2001
From: Alexander Fisher <alex@linfratech.co.uk>
Date: Wed, 14 Feb 2024 12:42:32 +0000
Subject: [PATCH] PoC/RFC: Support sensitive values in `to_json_pretty`

The idea is being able to do something like this, (where just the call
to `to_json_pretty` is explicitly `Deferred` - needed because
`node_encrypt` returns a `Deferred`)

```puppet
file { '/etc/my_secret.json':
  content => Deferred('to_json_pretty',[{
    username => 'myuser',
    password => lookup('my_eyaml_secret').node_encrypt::secret,
  }]),
}
```

... instead of having to also explicitly defer `unwrap` and `Sensitive` and
end up with a huge mess similar to...

```puppet
file { '/etc/my_secret.json':
  content => Sensitive(
    Deferred('to_json_pretty',[{
      username => 'myuser',
      password => Deferred('unwrap', [lookup('my_eyaml_secret').node_encrypt::secret]),
    }])
  ),
}
```

The thought behind `rewrap_sensitive_data` is it makes it easy to
extend this functionality into other similar functions, (`to_yaml`,
`to_toml` etc.)

Later, we might consider adding a `deferrable_to_XXX` functions to
further simplify this sort of use-case.
---
 .../functions/stdlib/rewrap_sensitive_data.rb |  60 ++++++++++
 lib/puppet/functions/stdlib/to_json_pretty.rb |   8 +-
 spec/functions/rewrap_sensitive_data_spec.rb  | 111 ++++++++++++++++++
 spec/functions/to_json_pretty_spec.rb         |   4 +
 4 files changed, 180 insertions(+), 3 deletions(-)
 create mode 100644 lib/puppet/functions/stdlib/rewrap_sensitive_data.rb
 create mode 100644 spec/functions/rewrap_sensitive_data_spec.rb

diff --git a/lib/puppet/functions/stdlib/rewrap_sensitive_data.rb b/lib/puppet/functions/stdlib/rewrap_sensitive_data.rb
new file mode 100644
index 000000000..4e8b56bdd
--- /dev/null
+++ b/lib/puppet/functions/stdlib/rewrap_sensitive_data.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+# @summary Unwraps any sensitives in data and returns a sensitive
+#
+# It's not uncommon to have Sensitive strings as values within a hash or array.
+# Before passing the data to a type property or another function, it's useful
+# to be able to `unwrap` these values first. This function does this. If
+# sensitive data was included in the data, the whole result is then rewrapped
+# as Sensitive.
+#
+# Optionally, this function can be passed a block. When a block is given, it will
+# be run with the unwrapped data, but before the final rewrapping.  This is useful
+# to provide transparent rewrapping to other functions in stdlib especially.
+#
+# This is analogous to the way `epp` transparently handles sensitive parameters.
+Puppet::Functions.create_function(:'stdlib::rewrap_sensitive_data') do
+  # @param data The data
+  # @param block A lambda that will be run after the data has been unwrapped, but before it is rewrapped, (if it contained sensitives)
+  # @return Returns the rewrapped data
+  dispatch :rewrap_sensitive_data do
+    param 'Any', :data
+    optional_block_param 'Callable[Any]', :block
+    return_type 'Any'
+  end
+
+  def rewrap_sensitive_data(data)
+    @contains_sensitive = false
+
+    unwrapped = deep_unwrap(data)
+
+    result = block_given? ? yield(unwrapped) : unwrapped
+
+    if @contains_sensitive
+      Puppet::Pops::Types::PSensitiveType::Sensitive.new(result)
+    else
+      result
+    end
+  end
+
+  def deep_unwrap(obj)
+    case obj
+    when Hash
+      obj.each_with_object({}) do |(key, value), result|
+        if key.is_a?(Puppet::Pops::Types::PSensitiveType::Sensitive)
+          # This situation is probably fairly unlikely in reality, but easy enough to support
+          @contains_sensitive = true
+          key = key.unwrap
+        end
+        result[key] = deep_unwrap(value)
+      end
+    when Array
+      obj.map { |element| deep_unwrap(element) }
+    when Puppet::Pops::Types::PSensitiveType::Sensitive
+      @contains_sensitive = true
+      deep_unwrap(obj.unwrap)
+    else
+      obj
+    end
+  end
+end
diff --git a/lib/puppet/functions/stdlib/to_json_pretty.rb b/lib/puppet/functions/stdlib/to_json_pretty.rb
index 6fc748575..25ffacd6a 100644
--- a/lib/puppet/functions/stdlib/to_json_pretty.rb
+++ b/lib/puppet/functions/stdlib/to_json_pretty.rb
@@ -67,8 +67,10 @@ def to_json_pretty(data, skip_undef = false, opts = nil)
     end
 
     data = data.compact if skip_undef && (data.is_a?(Array) || Hash)
-    # Call ::JSON to ensure it references the JSON library from Ruby's standard library
-    # instead of a random JSON namespace that might be in scope due to user code.
-    JSON.pretty_generate(data, opts) << "\n"
+    call_function('stdlib::rewrap_sensitive_data', data) do |unwrapped_data|
+      # Call ::JSON to ensure it references the JSON library from Ruby's standard library
+      # instead of a random JSON namespace that might be in scope due to user code.
+      ::JSON.pretty_generate(unwrapped_data, opts) << "\n"
+    end
   end
 end
diff --git a/spec/functions/rewrap_sensitive_data_spec.rb b/spec/functions/rewrap_sensitive_data_spec.rb
new file mode 100644
index 000000000..314fa7336
--- /dev/null
+++ b/spec/functions/rewrap_sensitive_data_spec.rb
@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe 'stdlib::rewrap_sensitive_data' do
+  it { is_expected.not_to be_nil }
+
+  context 'when called with data containing no sensitive elements' do
+    it { is_expected.to run.with_params({}).and_return({}) }
+    it { is_expected.to run.with_params([]).and_return([]) }
+    it { is_expected.to run.with_params('a_string').and_return('a_string') }
+    it { is_expected.to run.with_params(42).and_return(42) }
+    it { is_expected.to run.with_params(true).and_return(true) }
+    it { is_expected.to run.with_params(false).and_return(false) }
+
+    it { is_expected.to run.with_params({ 'foo' => 'bar' }).and_return({ 'foo' => 'bar' }) }
+  end
+
+  context 'when called with a hash containing a sensitive string' do
+    it 'unwraps the sensitive string and returns a sensitive hash' do
+      is_expected.to run.with_params(
+        {
+          'username' => 'my_user',
+          'password' => sensitive('hunter2')
+        },
+      ).and_return(sensitive(
+        {
+          'username' => 'my_user',
+          'password' => 'hunter2'
+        },
+      ))
+    end
+  end
+
+  context 'when called with data containing lots of sensitive elements (including nested in arrays, and sensitive hashes etc)' do
+    it 'recursively unwraps everything and marks the whole result as sensitive' do
+      is_expected.to run.with_params(
+        {
+          'a' => sensitive('bar'),
+          'b' => [
+            1,
+            2,
+            :undef,
+            true,
+            false,
+            {
+              'password'      => sensitive('secret'),
+              'weird_example' => sensitive({ 'foo' => sensitive(42) }) # A sensitive hash containing a sensitive Int as the value to a hash contained in an array which is the value of a hash key...
+            },
+          ],
+          'c' => :undef,
+          'd' => [],
+          'e' => true,
+          'f' => false,
+        },
+      ).and_return(sensitive(
+        {
+          'a' => 'bar',
+          'b' => [
+            1,
+            2,
+            :undef,
+            true,
+            false,
+            {
+              'password'      => 'secret',
+              'weird_example' => { 'foo' => 42 }
+            },
+          ],
+          'c' => :undef,
+          'd' => [],
+          'e' => true,
+          'f' => false,
+        },
+      ))
+    end
+  end
+
+  context 'when a hash _key_ is sensitive' do
+    it 'unwraps the key' do
+      is_expected.to run.with_params(
+        {
+          sensitive('key') => 'value',
+        },
+      ).and_return(sensitive(
+        {
+          'key' => 'value',
+        },
+      ))
+    end
+  end
+
+  context 'when called with a block' do
+    context 'that upcases hash values' do
+      it do
+        is_expected.to run
+          .with_params({ 'secret' => sensitive('hunter2') })
+          .with_lambda { |data| data.transform_values { |value| value.upcase } }
+          .and_return(sensitive({ 'secret' => 'HUNTER2' }))
+      end
+    end
+    context 'that converts data to yaml' do
+      it do
+        is_expected.to run
+          .with_params({ 'secret' => sensitive('hunter2') })
+          .with_lambda { |data| data.to_yaml }
+          .and_return(sensitive("---\nsecret: hunter2\n"))
+      end
+    end
+  end
+end
diff --git a/spec/functions/to_json_pretty_spec.rb b/spec/functions/to_json_pretty_spec.rb
index f1a54e54b..605b818b3 100644
--- a/spec/functions/to_json_pretty_spec.rb
+++ b/spec/functions/to_json_pretty_spec.rb
@@ -28,4 +28,8 @@
     pending('Current implementation only elides nil values for hashes of depth=1')
     expect(subject).to run.with_params({ 'omg' => { 'lol' => nil }, 'what' => nil }, true).and_return("{\n}\n")
   }
+
+  context 'with data containing sensitive' do
+    it { is_expected.to run.with_params('key' => sensitive('value')).and_return(sensitive("{\n  \"key\": \"value\"\n}\n")) }
+  end
 end