diff --git a/Gemfile b/Gemfile index d947bb1ad..515d11574 100644 --- a/Gemfile +++ b/Gemfile @@ -15,6 +15,7 @@ gem "govuk_app_config" gem "govuk_document_types" gem "govuk_publishing_components" gem "plek" +gem "rack-utf8_sanitizer" gem "rails-i18n" gem "rails_translation_manager" gem "rinku", require: "rails_rinku" diff --git a/Gemfile.lock b/Gemfile.lock index 9b09089b7..49c5e59f6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -572,6 +572,8 @@ GEM rack (>= 3.0.0) rack-test (2.1.0) rack (>= 1.3) + rack-utf8_sanitizer (1.9.1) + rack (>= 1.0, < 4.0) rackup (2.1.0) rack (>= 3) webrick (~> 1.8) @@ -795,6 +797,7 @@ DEPENDENCIES pact_broker-client plek pry-byebug + rack-utf8_sanitizer rails (= 7.2.1.2) rails-controller-testing rails-i18n diff --git a/config/application.rb b/config/application.rb index 8b0c14e32..157326cb8 100644 --- a/config/application.rb +++ b/config/application.rb @@ -13,6 +13,7 @@ # require "action_view/railtie" # require "action_cable/engine" require "rails/test_unit/railtie" +require_relative "../lib/sanitiser/strategy" # Require the gems listed in Gemfile, including any gems # you've limited to :test, :development, or :production. @@ -117,12 +118,21 @@ class Application < Rails::Application # Please, add to the `ignore` list any other `lib` subdirectories that do # not contain `.rb` files, or that should not be reloaded or eager loaded. # Common ones are `templates`, `generators`, or `middleware`, for example. - config.autoload_lib(ignore: %w[assets tasks]) + config.autoload_lib(ignore: %w[assets tasks sanitiser]) # Configuration for the application, engines, and railties goes here. # # These settings can be overridden in specific environments using the files # in config/environments, which are processed later. - # + + # Protect from "invalid byte sequence in UTF-8" errors, + # when a query or a cookie is a string with incorrect UTF-8 encoding. + config.middleware.insert_before( + 0, + Rack::UTF8Sanitizer, + sanitizable_content_types: [], + only: %w[QUERY_STRING], + strategy: Sanitiser::Strategy, + ) end end diff --git a/lib/sanitiser/strategy.rb b/lib/sanitiser/strategy.rb new file mode 100644 index 000000000..fc2414343 --- /dev/null +++ b/lib/sanitiser/strategy.rb @@ -0,0 +1,18 @@ +module Sanitiser + class Strategy + class SanitisingError < StandardError; end + + class << self + def call(input, sanitize_null_bytes: false) + input + .force_encoding(Encoding::ASCII_8BIT) + .encode!(Encoding::UTF_8) + if sanitize_null_bytes && Rack::UTF8Sanitizer::NULL_BYTE_REGEX.match?(input) + raise NullByteInString + end + rescue StandardError + raise SanitisingError, "Non-UTF-8 (or null) character in the query or in the cookie" + end + end + end +end diff --git a/spec/requests/sanitiser_spec.rb b/spec/requests/sanitiser_spec.rb new file mode 100644 index 000000000..8655fb73f --- /dev/null +++ b/spec/requests/sanitiser_spec.rb @@ -0,0 +1,86 @@ +require "slimmer/test" + +RSpec.describe "Sanitiser" do + context "with query being correct percent-encoded UTF-8 string" do + it "does not raise exception" do + get "/?%41" + expect(response).to have_http_status(:ok) + end + end + + context "with query being incorrect percent-encoded UTF-8 string" do + it "raises SanitisingError" do + expect { get "/?%AD" }.to raise_error(Sanitiser::Strategy::SanitisingError) + end + end + + context "with cookie key being correct UTF-8" do + it "does not raise exception" do + get "/", headers: { Cookie: "\x41=value" } + expect(response).to have_http_status(:ok) + end + end + + context "with cookie key being incorrect UTF-8" do + it "raises exception" do + expect { get "/", headers: { Cookie: "\xAD=value" } } + .to raise_error(ArgumentError, "invalid byte sequence in UTF-8") + end + end + + context "with cookie value being correct UTF-8" do + it "does not raise exception" do + get "/", headers: { Cookie: "key=\x41" } + expect(response).to have_http_status(:ok) + end + end + + context "with cookie value being incorrect UTF-8" do + it "raises exception" do + expect { get "/", headers: { Cookie: "key=\xAD" } } + .to raise_error(ArgumentError, "invalid byte sequence in UTF-8") + end + end + + context "with cookie path being correct UTF-8" do + it "does not raise exception" do + get "/", headers: { Cookie: "key=value; Path=/\x41" } + expect(response).to have_http_status(:ok) + end + end + + context "with cookie path being incorrect UTF-8" do + it "raises exception" do + expect { get "/", headers: { Cookie: "key=value; Path=/\xAD" } } + .to raise_error(ArgumentError, "invalid byte sequence in UTF-8") + end + end + + context "with cookie path being correct percent-encoded UTF-8" do + it "does not raise exception" do + get "/", headers: { Cookie: "key=value; Path=/%41" } + expect(response).to have_http_status(:ok) + end + end + + context "with cookie path being incorrect percent-encoded UTF-8" do + it "raises SanitisingError" do + expect { get "/", headers: { Cookie: "key=value; Path=/%AD" } } + .to raise_error(Sanitiser::Strategy::SanitisingError) + end + end + + context "with referrer header being correct percent-encoded UTF-8" do + it "does not raise exception" do + get "/", headers: { Referer: "http://example.com/?%41" } + expect(response).to have_http_status(:ok) + end + end + + context "with referrer header being incorrect percent-encoded UTF-8" do + it "does not raise exception" do + get "/", headers: { Referer: "http://example.com/?%AD" } + expect(response).to have_http_status(:ok) + end + end +end