diff --git a/lib/log_struct/builders/active_job.rb b/lib/log_struct/builders/active_job.rb index 5691bf8..d7c8fa0 100644 --- a/lib/log_struct/builders/active_job.rb +++ b/lib/log_struct/builders/active_job.rb @@ -21,9 +21,12 @@ def self.safe_executions(job) job.respond_to?(:executions) ? job.executions : nil end + # Respect log_arguments? setting on job classes. + # Arguments are logged by default but can be opted-out per job class. + # When logged, sensitive keys are filtered by Formatter.process_values. sig { params(job: T.untyped).returns(T.nilable(T::Array[T.untyped])) } def self.safe_arguments(job) - return nil unless job.class.respond_to?(:log_arguments?) + return job.arguments unless job.class.respond_to?(:log_arguments?) job.class.log_arguments? ? job.arguments : nil end diff --git a/lib/log_struct/config_struct/filters.rb b/lib/log_struct/config_struct/filters.rb index 09fe14f..56600f4 100644 --- a/lib/log_struct/config_struct/filters.rb +++ b/lib/log_struct/config_struct/filters.rb @@ -88,11 +88,32 @@ class Filters < T::Struct # Default: false prop :mac_addresses, T::Boolean, default: false + # Default regex pattern for matching sensitive keys. + # Matches keys containing: password, token, secret, auth, cred + # Also matches specific key patterns: api_key, secret_key, private_key, access_key, encryption_key + # Examples: access_token, api_key, auth_header, credentials + # Uses start/end of string or underscore/hyphen boundaries to prevent + # false positives like "keyboard" or "turkey" (which contain "key" mid-word) + # Note: "key" alone is too broad (matches cron_key, primary_key), so we only match + # specific sensitive key patterns + DEFAULT_SENSITIVE_KEY_PATTERN = T.let( + /(^|[_-])(password|token|secret|auth|cred)([_-]|$)|(^|[_-])(api|secret|private|access|encryption)_key([_-]|$)/i, + Regexp + ) + # Additional filter matchers built from Rails filter_parameters entries that aren't simple symbols. # Each matcher receives the key (String) and optional value, returning true when the pair should be filtered. + # By default, includes a regex matcher for common sensitive key patterns. prop :filter_matchers, T::Array[FilterMatcher], - factory: -> { [] } + factory: -> { + [ + FilterMatcher.new( + callable: ->(key, _value) { DEFAULT_SENSITIVE_KEY_PATTERN.match?(key) }, + label: "default_sensitive_pattern" + ) + ] + } end end end diff --git a/lib/log_struct/integrations/active_record.rb b/lib/log_struct/integrations/active_record.rb index 45eeed0..2e0e03d 100644 --- a/lib/log_struct/integrations/active_record.rb +++ b/lib/log_struct/integrations/active_record.rb @@ -281,6 +281,12 @@ def self.looks_sensitive?(value) return true if value.match?(/\A[A-Za-z0-9+\/]{20,}={0,2}\z/) # Base64 return true if value.match?(/(password|secret|token|key|auth)/i) + # Filter JWT tokens (header.payload.signature format, starts with "ey") + return true if value.match?(/\Aey[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\z/) + + # Filter Bearer tokens + return true if value.match?(/\ABearer\s+/i) + false end end diff --git a/lib/log_struct/integrations/good_job/log_subscriber.rb b/lib/log_struct/integrations/good_job/log_subscriber.rb index 5dad56c..e63fa26 100644 --- a/lib/log_struct/integrations/good_job/log_subscriber.rb +++ b/lib/log_struct/integrations/good_job/log_subscriber.rb @@ -148,11 +148,21 @@ def build_base_fields(job, payload) job_id: job&.job_id, job_class: job&.job_class, queue_name: job&.queue_name&.to_sym, - arguments: job&.arguments, + arguments: safe_arguments(job), executions: execution&.executions ) end + # Respect log_arguments? setting on job classes (consistent with ActiveJob behavior). + # Arguments are logged by default but can be opted-out per job class. + # When logged, sensitive keys are filtered by Formatter.process_values. + sig { params(job: T.untyped).returns(T.nilable(T::Array[T.untyped])) } + def safe_arguments(job) + return nil unless job + return job.arguments unless job.class.respond_to?(:log_arguments?) + job.class.log_arguments? ? job.arguments : nil + end + # Calculate wait time from job creation to execution start sig { params(execution: T.untyped).returns(T.nilable(Float)) } def calculate_wait_time(execution) diff --git a/lib/log_struct/shared/merge_additional_data_fields.rb b/lib/log_struct/shared/merge_additional_data_fields.rb index 186fbf4..e865cef 100644 --- a/lib/log_struct/shared/merge_additional_data_fields.rb +++ b/lib/log_struct/shared/merge_additional_data_fields.rb @@ -2,6 +2,7 @@ # frozen_string_literal: true require_relative "interfaces/additional_data_field" +require_relative "../enums/source" module LogStruct module Log @@ -13,12 +14,24 @@ module MergeAdditionalDataFields requires_ancestor { T::Struct } requires_ancestor { Interfaces::AdditionalDataField } + # Reserved keys that cannot be overwritten by additional_data. + # These are the core log structure fields that must not be modified. + RESERVED_KEYS = T.let(%i[src evt lvl ts].freeze, T::Array[Symbol]) + sig { params(hash: T::Hash[Symbol, T.untyped]).void } def merge_additional_data_fields(hash) ad = additional_data return unless ad ad.each do |key, value| - hash[key.to_sym] = value + sym_key = key.to_sym + if RESERVED_KEYS.include?(sym_key) + LogStruct.handle_exception( + ArgumentError.new("additional_data attempted to overwrite reserved key: #{sym_key}"), + source: Source::Internal + ) + next + end + hash[sym_key] = value end end end diff --git a/rails_test_app/create_app.rb b/rails_test_app/create_app.rb index e133fd3..482e8cc 100755 --- a/rails_test_app/create_app.rb +++ b/rails_test_app/create_app.rb @@ -132,6 +132,9 @@ def rails_supports_skip_kamal?(clean_env) "BUNDLER_SETUP" => nil, "GEM_HOME" => nil, "GEM_PATH" => nil, + # Install gems locally to avoid issues with world-writable system gem directories on CI + # (Ruby 4.0 bundler rejects reinstalling gems in world-writable directories for security) + "BUNDLE_PATH" => File.join(RAILS_APP_DIR, "vendor", "bundle"), # Silence noisy warnings from duplicate gems during generator execution "RUBYOPT" => "-W0" } diff --git a/scripts/merge_coverage.sh b/scripts/merge_coverage.sh index ed5626d..8aa9f26 100755 --- a/scripts/merge_coverage.sh +++ b/scripts/merge_coverage.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash -rake coverage:merge +bundle exec rake coverage:merge diff --git a/scripts/rails_tests.sh b/scripts/rails_tests.sh index 0642fe0..7044b8f 100755 --- a/scripts/rails_tests.sh +++ b/scripts/rails_tests.sh @@ -75,6 +75,10 @@ export RUBYOPT="-W0" # Set CI=true so LogStruct is enabled for integration tests export CI=true +# Install gems locally to avoid issues with world-writable system gem directories on CI +# (Ruby 4.0 bundler rejects reinstalling gems in world-writable directories for security) +export BUNDLE_PATH="$TEST_APP_DIR/vendor/bundle" + # Install gems for the test app under the selected Ruby bundle install diff --git a/test/log_struct/integrations/active_record_test.rb b/test/log_struct/integrations/active_record_test.rb index 3278a26..c433b3a 100644 --- a/test/log_struct/integrations/active_record_test.rb +++ b/test/log_struct/integrations/active_record_test.rb @@ -293,6 +293,58 @@ class ActiveRecordTest < ActiveSupport::TestCase assert_includes T.must(log).bind_params, 123 end + test "filters JWT tokens in bind parameters" do + LogStruct.config.integrations.sql_log_bind_params = true + setup_activerecord_for_test + + # Real-looking JWT token (header.payload.signature format, starts with "ey") + # cspell:disable-next-line + jwt_token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c" + + logged_events = [] + LogStruct.stub(:info, ->(log) { logged_events << log }) do + simulate_sql_notification( + binds: [jwt_token, "normal_value"], + type_casted_binds: [jwt_token, "normal_value"] + ) + end + + assert_equal 1, logged_events.size + log = T.let(logged_events.first, T.nilable(LogStruct::Log::SQL)) + + assert_not_nil log + + # JWT token should be filtered + assert_includes T.must(log).bind_params, "[FILTERED]" + # Normal value should pass through + assert_includes T.must(log).bind_params, "normal_value" + end + + test "filters Bearer tokens in bind parameters" do + LogStruct.config.integrations.sql_log_bind_params = true + setup_activerecord_for_test + + bearer_token = "Bearer abc123def456" + + logged_events = [] + LogStruct.stub(:info, ->(log) { logged_events << log }) do + simulate_sql_notification( + binds: [bearer_token, "safe_value"], + type_casted_binds: [bearer_token, "safe_value"] + ) + end + + assert_equal 1, logged_events.size + log = T.let(logged_events.first, T.nilable(LogStruct::Log::SQL)) + + assert_not_nil log + + # Bearer token should be filtered + assert_includes T.must(log).bind_params, "[FILTERED]" + # Safe value should pass through + assert_includes T.must(log).bind_params, "safe_value" + end + test "excludes bind parameters when disabled" do LogStruct.config.integrations.sql_log_bind_params = false setup_activerecord_for_test diff --git a/test/log_struct/integrations/good_job/log_subscriber_test.rb b/test/log_struct/integrations/good_job/log_subscriber_test.rb index 99ece8a..b0b3145 100644 --- a/test/log_struct/integrations/good_job/log_subscriber_test.rb +++ b/test/log_struct/integrations/good_job/log_subscriber_test.rb @@ -266,6 +266,67 @@ class LogSubscriberTest < ActiveSupport::TestCase assert_nil wait_time end + test "respects log_arguments? when job class opts out" do + clear_log_buffer(@log_output) + + # Create a job class that opts out of logging arguments + job_class = Class.new do + def self.log_arguments? + false + end + end + Object.const_set(:NoLogArgsJob, job_class) + + begin + log = run_enqueue_with_job_class(:NoLogArgsJob) + + refute log.key?("arguments") + ensure + Object.send(:remove_const, :NoLogArgsJob) + end + end + + test "logs arguments by default when job class does not define log_arguments?" do + clear_log_buffer(@log_output) + + # Standard job without log_arguments? defined - should log arguments + job = create_mock_job("StandardJob", "job_123", "default") + event_data = create_test_event({ + job: job, + duration: 0.1 + }) + + @subscriber.enqueue(event_data) + ::SemanticLogger.flush + + output = @log_output.string + log = JSON.parse(output.lines.first.strip) + + # Arguments should be present by default + assert log.key?("arguments") + assert_equal ["arg1", "arg2"], log["arguments"] + end + + test "logs arguments when job class log_arguments? returns true" do + clear_log_buffer(@log_output) + + job_class = Class.new do + def self.log_arguments? + true + end + end + Object.const_set(:LogArgsJob, job_class) + + begin + log = run_enqueue_with_job_class(:LogArgsJob) + + assert log.key?("arguments") + assert_equal ["arg1", "arg2"], log["arguments"] + ensure + Object.send(:remove_const, :LogArgsJob) + end + end + private def create_test_event(payload_data, start_time: nil, finish_time: nil) @@ -293,6 +354,35 @@ def create_mock_job(job_class, job_id, queue_name, extra_attributes = {}) }.merge(extra_attributes)) end + # Create a mock job where job.class returns the actual job class (for testing log_arguments?) + def create_mock_job_with_class(klass, job_id, queue_name, extra_attributes = {}) + mock = Object.new + mock.define_singleton_method(:class) { klass } + mock.define_singleton_method(:job_class) { klass.name } + mock.define_singleton_method(:job_id) { job_id } + mock.define_singleton_method(:queue_name) { queue_name } + mock.define_singleton_method(:arguments) { ["arg1", "arg2"] } + mock.define_singleton_method(:priority) { 0 } + mock.define_singleton_method(:scheduled_at) { nil } + mock.define_singleton_method(:enqueue_caller_location) { nil } + extra_attributes.each do |key, value| + mock.define_singleton_method(key) { value } + end + mock + end + + # Run an enqueue event with a job class that has log_arguments? method and return the parsed log + def run_enqueue_with_job_class(job_class) + # rubocop:disable Sorbet/ConstantsFromStrings + klass = Object.const_get(job_class) + # rubocop:enable Sorbet/ConstantsFromStrings + job = create_mock_job_with_class(klass, "job_123", "default") + event_data = create_test_event({job: job, duration: 0.1}) + @subscriber.enqueue(event_data) + ::SemanticLogger.flush + JSON.parse(@log_output.string.lines.first.strip) + end + def create_mock_execution(attributes = {}) OpenStruct.new({ executions: 1, diff --git a/test/log_struct/integrations/lograge_formatter_test.rb b/test/log_struct/integrations/lograge_formatter_test.rb index c529ea4..51cce60 100644 --- a/test/log_struct/integrations/lograge_formatter_test.rb +++ b/test/log_struct/integrations/lograge_formatter_test.rb @@ -180,6 +180,41 @@ def test_prefers_payload_request_id_over_header assert_equal "payload-request-id", options[:request_id] end + + def test_additional_data_cannot_overwrite_reserved_keys + # Reserved keys are: src, evt, lvl, ts + data = { + method: "GET", + path: "/users", + status: 200, + duration: 12.34, + # These should be in additional_data but NOT overwrite the real values + src: "attacker", + evt: "spoofed", + lvl: "fake", + ts: "2000-01-01T00:00:00Z" + } + + exceptions_reported = [] + LogStruct.stub(:handle_exception, ->(exc, source:) { exceptions_reported << [exc, source] }) do + log = @formatter.call(data) + json_hash = log.serialize + + # Reserved keys should retain their real values + assert_equal "rails", json_hash[:src] + assert_equal "request", json_hash[:evt] + assert_equal "info", json_hash[:lvl] + refute_equal "2000-01-01T00:00:00Z", json_hash[:ts] + end + + # Should have reported 4 exceptions (one for each reserved key attempted) + assert_equal 4, exceptions_reported.size + exceptions_reported.each do |exc, source| + assert_kind_of ArgumentError, exc + assert_match(/additional_data attempted to overwrite reserved key/, exc.message) + assert_equal LogStruct::Source::Internal, source + end + end end end end diff --git a/test/log_struct/param_filters_test.rb b/test/log_struct/param_filters_test.rb index 323bda6..eef1a74 100644 --- a/test/log_struct/param_filters_test.rb +++ b/test/log_struct/param_filters_test.rb @@ -103,5 +103,51 @@ class ParamFiltersTest < ActiveSupport::TestCase assert_equal Integer, s5[:_class] end + + test "default regex filter matcher catches common sensitive key patterns" do + # Reset config to use fresh defaults (with default filter_matchers) + LogStruct.configuration = LogStruct::Configuration.new + + # Should filter keys containing sensitive patterns with word boundaries + assert ParamFilters.should_filter_key?(:access_token) + assert ParamFilters.should_filter_key?(:refresh_token) + assert ParamFilters.should_filter_key?(:api_key) + assert ParamFilters.should_filter_key?(:private_key) + assert ParamFilters.should_filter_key?(:secret_key) + assert ParamFilters.should_filter_key?(:access_key) + assert ParamFilters.should_filter_key?(:encryption_key) + assert ParamFilters.should_filter_key?(:client_secret) + assert ParamFilters.should_filter_key?(:auth_header) + assert ParamFilters.should_filter_key?(:auth_token) + assert ParamFilters.should_filter_key?(:credentials) + assert ParamFilters.should_filter_key?(:password_hash) + + # Case insensitive + assert ParamFilters.should_filter_key?(:ACCESS_TOKEN) + assert ParamFilters.should_filter_key?(:Api_Key) + end + + test "default regex filter matcher does not match false positives" do + # Reset config to use fresh defaults (with default filter_matchers) + LogStruct.configuration = LogStruct::Configuration.new + + # Word boundaries should prevent these false positives + refute ParamFilters.should_filter_key?(:keyboard) + refute ParamFilters.should_filter_key?(:turkey) + refute ParamFilters.should_filter_key?(:monkey) + refute ParamFilters.should_filter_key?(:hockey) + refute ParamFilters.should_filter_key?(:credenza) + + # "key" alone is too broad - these should NOT be filtered + refute ParamFilters.should_filter_key?(:cron_key) + refute ParamFilters.should_filter_key?(:primary_key) + refute ParamFilters.should_filter_key?(:foreign_key) + refute ParamFilters.should_filter_key?(:unique_key) + + # Normal fields should not be filtered + refute ParamFilters.should_filter_key?(:username) + refute ParamFilters.should_filter_key?(:email_address) + refute ParamFilters.should_filter_key?(:first_name) + end end end