diff --git a/cloudinit/config/cc_apt_configure.py b/cloudinit/config/cc_apt_configure.py index 35445711e60..72a81f9c7b4 100644 --- a/cloudinit/config/cc_apt_configure.py +++ b/cloudinit/config/cc_apt_configure.py @@ -1111,7 +1111,7 @@ def apt_key_add(gpg_context): ) return file_name - def apt_key_list(gpg_context): + def apt_key_list(gpg_context: GPG): """apt-key list returns string of all trusted keys (in /etc/apt/trusted.gpg and diff --git a/cloudinit/gpg.py b/cloudinit/gpg.py index 8ade82185f2..496da753fe2 100644 --- a/cloudinit/gpg.py +++ b/cloudinit/gpg.py @@ -9,6 +9,7 @@ import logging import os +import pathlib import re import signal import time @@ -22,6 +23,10 @@ HOME = "GNUPGHOME" +class GpgVerificationError(Exception): + """GpgVerificationError is raised when a signature verification fails.""" + + class GPG: def __init__(self): self.gpg_started = False @@ -68,6 +73,75 @@ def export_armour(self, key: str) -> Optional[str]: LOG.debug('Failed to export armoured key "%s": %s', key, error) return None + def import_key(self, key: pathlib.Path) -> None: + """Import gpg key from a file to the temporary keyring. + + :param key: path to the key file + """ + try: + subp.subp( + [ + "gpg", + "--batch", + "--import", + str(key), + ], + update_env=self.env, + ) + except subp.ProcessExecutionError as error: + LOG.warning("Failed to import key %s: %s", key, error) + + def decrypt(self, data: str, *, require_signature=False) -> str: + """Process data using gpg. + + This can be used to decrypt encrypted data, verify signed data, + or both depending on the data provided. + + :param data: ASCII-armored GPG message to process + :return: decrypted data + :raises: ProcessExecutionError if gpg fails to decrypt data + """ + if require_signature: + try: + subp.subp( + ["gpg", "--verify"], + data=data, + update_env=self.env, + ) + except subp.ProcessExecutionError: + # If the message is signed then encrypted (the default), + # the message can't be verified until it's decrypted + try: + stdout, _ = subp.subp( + ["gpg", "--unwrap"], + data=data, + update_env=self.env, + decode=False, + ) + except subp.ProcessExecutionError as e: + raise GpgVerificationError( + "Signature verification failed. Could not unwrap." + ) from e + try: + subp.subp( + ["gpg", "--verify"], + data=stdout, + update_env=self.env, + ) + except subp.ProcessExecutionError as e: + raise GpgVerificationError( + "Signature verification failed. Could not verify." + ) from e + result = subp.subp( + [ + "gpg", + "--decrypt", + ], + data=data, + update_env=self.env, + ) + return result.stdout + def dearmor(self, key: str) -> str: """Dearmor gpg key, dearmored key gets returned diff --git a/cloudinit/settings.py b/cloudinit/settings.py index a73f25118d7..ff4831db4fa 100644 --- a/cloudinit/settings.py +++ b/cloudinit/settings.py @@ -79,3 +79,4 @@ FREQUENCIES = [PER_INSTANCE, PER_ALWAYS, PER_ONCE] HOTPLUG_ENABLED_FILE = "/var/lib/cloud/hotplug.enabled" +KEY_DIR = "/etc/cloud/keys" diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 973d140ff36..07fd1ab7705 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -647,19 +647,34 @@ def get_url_params(self): def get_userdata(self, apply_filter=False): if self.userdata is None: - self.userdata = self.ud_proc.process(self.get_userdata_raw()) + self.userdata = self.ud_proc.process( + self.get_userdata_raw(), + require_signature=self.sys_cfg.get("user_data", {}).get( + "require_signature", False + ), + ) if apply_filter: return self._filter_xdata(self.userdata) return self.userdata def get_vendordata(self): if self.vendordata is None: - self.vendordata = self.ud_proc.process(self.get_vendordata_raw()) + self.vendordata = self.ud_proc.process( + self.get_vendordata_raw(), + require_signature=self.sys_cfg.get("vendor_data", {}).get( + "require_signature", False + ), + ) return self.vendordata def get_vendordata2(self): if self.vendordata2 is None: - self.vendordata2 = self.ud_proc.process(self.get_vendordata2_raw()) + self.vendordata2 = self.ud_proc.process( + self.get_vendordata2_raw(), + require_signature=self.sys_cfg.get("vendor_data2", {}).get( + "require_signature", False + ), + ) return self.vendordata2 @property diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 84bc0ce07a0..33325bb678c 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -793,6 +793,41 @@ def finalize_handlers(): finally: finalize_handlers() + def _consume_userdata_if_enabled(self, frequency: str) -> None: + """Consume userdata if not disabled in base config. + + Base config can have a definition like: + user_data: + enabled: false + require_signature: true + or a deprecated `allow_userdata` key. + + Parse them and maybe consume userdata accordingly. + """ + user_data_cfg = self.cfg.get("user_data", {}) + enabled = user_data_cfg.get("enabled", True) + + if "allow_userdata" in self.cfg: + lifecycle.deprecate( + deprecated="Key 'allow_userdata'", + deprecated_version="24.3", + extra_message="Use 'user_data.enabled' instead.", + ) + if "enabled" in user_data_cfg: + LOG.warning( + "Both 'allow_userdata' and 'user_data.enabled' are set." + " 'allow_userdata' will be ignored." + ) + else: + enabled = util.get_cfg_option_bool(self.cfg, "allow_userdata") + + if enabled: + self._consume_userdata(frequency) + else: + LOG.debug( + "User data disabled in base config: discarding user-data" + ) + def consume_data(self, frequency=PER_INSTANCE): # Consume the userdata first, because we need want to let the part # handlers run first (for merging stuff) @@ -801,10 +836,7 @@ def consume_data(self, frequency=PER_INSTANCE): "reading and applying user-data", parent=self.reporter, ): - if util.get_cfg_option_bool(self.cfg, "allow_userdata", True): - self._consume_userdata(frequency) - else: - LOG.debug("allow_userdata = False: discarding user-data") + self._consume_userdata_if_enabled(frequency) with events.ReportEventStack( "consume-vendor-data", diff --git a/cloudinit/subp.py b/cloudinit/subp.py index eebf009d4f0..6cc5cf88f15 100644 --- a/cloudinit/subp.py +++ b/cloudinit/subp.py @@ -7,7 +7,7 @@ import subprocess from errno import ENOEXEC from io import TextIOWrapper -from typing import List, Optional, Union +from typing import List, Literal, Optional, Union from cloudinit import performance @@ -170,7 +170,7 @@ def subp( capture=True, shell=False, logstring=False, - decode="replace", + decode: Literal[False, "strict", "ignore", "replace"] = "replace", update_env=None, cwd=None, timeout=None, diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index e99ad9a183a..e8dd031d893 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -8,14 +8,19 @@ # # This file is part of cloud-init. See LICENSE file for license information. +import email import logging import os +import pathlib +from email.message import Message from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.nonmultipart import MIMENonMultipart from email.mime.text import MIMEText +from typing import Union, cast -from cloudinit import features, handlers, util +from cloudinit import features, gpg, handlers, subp, util +from cloudinit.settings import KEY_DIR from cloudinit.url_helper import UrlError, read_file_or_url LOG = logging.getLogger(__name__) @@ -78,13 +83,23 @@ def __init__(self, paths): self.paths = paths self.ssl_details = util.fetch_ssl_details(paths) - def process(self, blob): + def process(self, blob, require_signature=False): accumulating_msg = MIMEMultipart() if isinstance(blob, list): for b in blob: - self._process_msg(convert_string(b), accumulating_msg) + self._process_msg( + convert_string( + b, is_part=True, require_signature=require_signature + ), + accumulating_msg, + ) else: - self._process_msg(convert_string(blob), accumulating_msg) + self._process_msg( + convert_string( + blob, is_part=False, require_signature=require_signature + ), + accumulating_msg, + ) return accumulating_msg def _process_msg(self, base_msg, append_msg): @@ -361,27 +376,87 @@ def is_skippable(part): return False -# Coverts a raw string into a mime message -def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE): - """convert a string (more likely bytes) or a message into - a mime message.""" +def decrypt_payload(payload: str, require_signature: bool) -> str: + """Decrypt/Verify a PGP message. + + :param payload: ASCII-armored GPG message to process + :param require_signature: Whether to require a signature + :return: decrypted data + """ + with gpg.GPG() as gpg_context: + # Import all keys from the /etc/cloud/keys directory + keys_dir = pathlib.Path(KEY_DIR) + if keys_dir.is_dir(): + for key_path in keys_dir.iterdir(): + gpg_context.import_key(key_path) + try: + return gpg_context.decrypt( + payload, require_signature=require_signature + ) + except subp.ProcessExecutionError as e: + raise RuntimeError( + "Failed decrypting user data payload. " + f"Ensure any necessary keys are present in {KEY_DIR}." + ) from e + + +def handle_encrypted( + data: bytes, is_part: bool, require_signature: bool +) -> bytes: + # Decrypt/verify a PGP message. We do this here because a signed + # MIME part could be thwarted by other user data parts + if data[:27] == b"-----BEGIN PGP MESSAGE-----": + if is_part: + raise RuntimeError( + "PGP message must encompass entire user data or vendor data." + ) + return decrypt_payload(data.decode("utf-8"), require_signature).encode( + "utf-8" + ) + elif require_signature: + raise RuntimeError( + "'require_signature' was set true in cloud-init's base " + "configuration, but content is not signed." + ) + return data + + +def _create_binmsg(data): + maintype, subtype = NOT_MULTIPART_TYPE.split("/", 1) + msg = MIMEBase(maintype, subtype) + msg.set_payload(data) + return msg + + +def convert_string( + raw_data: Union[str, bytes], + *, + is_part: bool = False, + require_signature: bool = False, +) -> Message: + """Convert the raw data into a mime message. + + 'raw_data' is the data as it was received from the user-data source. + It could be a string, bytes, or a gzip compressed version of either. + """ if not raw_data: raw_data = b"" - def create_binmsg(data, content_type): - maintype, subtype = content_type.split("/", 1) - msg = MIMEBase(maintype, subtype) - msg.set_payload(data) - return msg - if isinstance(raw_data, str): bdata = raw_data.encode("utf-8") else: bdata = raw_data - bdata = util.decomp_gzip(bdata, decode=False) - if b"mime-version:" in bdata[0:4096].lower(): - msg = util.message_from_string(bdata.decode("utf-8")) + # cast here because decode=False means return type is bytes + bdata = cast(bytes, util.decomp_gzip(bdata, decode=False)) + + bdata = handle_encrypted(bdata, is_part, require_signature) + + # Now ensure we have a MIME message + if b"mime-version:" in bdata[:4096].lower(): + # If we have a pre-existing MIME, use it + msg = email.message_from_string(bdata.decode("utf-8")) else: - msg = create_binmsg(bdata, content_type) + # Otherwise, convert to MIME + msg = _create_binmsg(bdata) return msg diff --git a/cloudinit/util.py b/cloudinit/util.py index 8025f4d51c4..214b0ced26d 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -11,7 +11,6 @@ import binascii import contextlib import copy as obj_copy -import email import glob import grp import gzip @@ -36,6 +35,7 @@ from base64 import b64decode from collections import deque, namedtuple from contextlib import contextmanager, suppress +from email.message import Message from errno import ENOENT from functools import lru_cache from pathlib import Path @@ -164,7 +164,7 @@ def maybe_b64decode(data: bytes) -> bytes: return data -def fully_decoded_payload(part): +def fully_decoded_payload(part: Message): # In Python 3, decoding the payload will ironically hand us a bytes object. # 'decode' means to decode according to Content-Transfer-Encoding, not # according to any charset in the Content-Type. So, if we end up with @@ -175,8 +175,16 @@ def fully_decoded_payload(part): cte_payload, bytes ): charset = part.get_charset() - if charset and charset.input_codec: - encoding = charset.input_codec + + # TODO: Mypy doesn't like the following code because `input_codec` + # is part of a legacy API. See first line of: + # https://docs.python.org/3/library/email.charset.html + # However, as of this writing, it is still available: + # https://github.com/python/cpython/blob/aab18f4d925528c2cbe4625211bf904db2a28317/Lib/email/charset.py#L234 # noqa: E501 + # That said, we shouldn't continue using legacy APIs, so we should + # update this code at some point. + if charset and charset.input_codec: # type: ignore + encoding = charset.input_codec # type: ignore else: encoding = "utf-8" return cte_payload.decode(encoding, "surrogateescape") @@ -2899,10 +2907,6 @@ def is_x86(uname_arch=None): return x86_arch_match -def message_from_string(string): - return email.message_from_string(string) - - def get_installed_packages(): out = subp.subp(["dpkg-query", "--list"], capture=True) diff --git a/doc/rtd/explanation/format.rst b/doc/rtd/explanation/format.rst index 7d14acb6318..8c4eb35ba7b 100644 --- a/doc/rtd/explanation/format.rst +++ b/doc/rtd/explanation/format.rst @@ -24,6 +24,7 @@ Formats that directly configure the instance: Formats that deal with other user data formats: +- `PGP Message`_ - `Include file`_ - `Jinja template`_ - `MIME multi-part archive`_ @@ -141,6 +142,64 @@ The boothook is different in that: Use :ref:`jinja templates` with :ref:`v1.instance_id` instead. +.. _user_data_formats-pgp: + +PGP Message +=========== + +Example +------- + +.. code-block:: text + + -----BEGIN PGP MESSAGE----- + + hF4DoKS6K1guBqsSAQdAE8bD7gGuKcGN91zGzW7SR+u6nnq9pyqjR8qJZl2sJRow + MgfxFaFaIw232BJvqwuoa0HGsmCaVyXmj6PFVJceBn2X7s8hXgHutlQxQf1Mju63 + 1OkBCQIQxJJj1X4dpQs44tcCjkGOWOO9XrSyAPhcR5sHIUYcsrd3cW9OUp6PUMb2 + Ci2Lmz8gG+Z1pEAxJD6TdCIFy/+Emf6UaDPWbg6CRuWmu3BtagUmVVK8+yXZBa6j + hdy0a9FZDpZsemWwsGxHJg0HRcClE/AebMtjczftIosPFfn68/fUgwGcuQ28wWGn + HG9h09fgXoO1tlOjpz/pqDou465pDMk43gxOUoCtdBaOSgkHWUiD2s9WxoAi83OY + 9uTdQCZ1EmaUQE7W/VFlcjapCtoiph/sBmQyumcMfUjJd5CCTK8q3iNcFnkulXl+ + yRMFhm1w1SBogfKeo8bv0wO9SJZ4dBq0cDSXYGzFkWOzm1k0o7rJ4pghuU3EUFk+ + Sr3QDTBlOz83BI2NPHLO+MqAwCwtao29YXA9iDYD3Cyz8fRLsWL0gLGE887s0W0Y + dh2fIhLR+5uufQWF6xj7Am3UBTbidgMmEch9FdSYZYpPamiWojMFg45jLyBBETIq + C2AAfHUYOkwZzf55yxwLEW4qyj9q+9rHF/M/XfI7t+RgBqODmNWoww4SOixEun1X + tP3W8DSp9p+PMqdfztf8Uo7HzijsolLlt4cxsUx5tN0Bz9UYtQ9VMog5IN1Oe5Vu + Wec6bO9qWdZUNV5Ty32DFycXvgmzI6w/9v4R7YKgKE/2AWOito4E3WpZZlj2mVJW + M9ZoSf3O9I3jqyCpI/pZ/F1egrydwLskM71FCx/3kkAHRRB4436TpQe0OpMFcfOp + 6qflwEWygx9EzqEsfZJ4vYgXf2ivgsSpxGVE+5Tcg90Te+0of3E= + =T2Fh + -----END PGP MESSAGE----- + +Explanation +----------- + +A PGP message is a message that has been encrypted and/or signed with one or +multiple `PGP keys `_, and then +`ASCII armored `_. The message +is decrypted and/or verified with keys contained in the image. +The decrypted message is then processed as any other user data format. + +To support such data, cloud-init requires the decryption and signing keys +to be present in the `/etc/cloud/keys` directory. To decrypt encrypted +data, the private key corresponding to the public key that encrypted the data +must be present. To verify signed data, the public key that corresponds +to the private key that signed the data must be present. + +To ensure authenticity, cloud-init can restrict user data to be a PGP +message only. To do so, the following +:ref:`base configuration` can be used: + +.. code-block:: yaml + + user_data: + require_signature: true + + +For a detailed guide to creating the PGP message and image necessary for this +user data format, see the :ref:`Use encrypted or signed user data` page. + Include file ============ @@ -384,6 +443,8 @@ as binary data and so may be processed automatically. +--------------------+-----------------------------+-------------------------+ |Cloud boothook |#cloud-boothook |text/cloud-boothook | +--------------------+-----------------------------+-------------------------+ +| PGP Message |-----BEGIN PGP MESSAGE----- |text/x-pgp-armored | ++--------------------+-----------------------------+-------------------------+ |MIME multi-part |Content-Type: multipart/mixed|multipart/mixed | +--------------------+-----------------------------+-------------------------+ |Cloud config archive|#cloud-config-archive |text/cloud-config-archive| diff --git a/doc/rtd/howto/index.rst b/doc/rtd/howto/index.rst index b6811fa3133..940f5138666 100644 --- a/doc/rtd/howto/index.rst +++ b/doc/rtd/howto/index.rst @@ -22,6 +22,7 @@ How do I...? Re-run cloud-init Change how often a module runs Validate my user data + Use encrypted or signed user data Debug cloud-init Check the status of cloud-init Report a bug diff --git a/doc/rtd/howto/pgp.rst b/doc/rtd/howto/pgp.rst new file mode 100644 index 00000000000..3bcf17c6054 --- /dev/null +++ b/doc/rtd/howto/pgp.rst @@ -0,0 +1,221 @@ +.. _pgp: + +Use encrypted or signed user data +********************************* + +Overview +======== + +This guide will show you how to use PGP encryption to secure your +:ref:`user data` +when using cloud-init. This will be accomplished with the following steps: + +1. Launch a cloud instance +2. Generate PGP key pairs +3. Encrypt and sign the user data +4. Export the keys +5. Restrict user data to require PGP message +6. Retrieve our encrypted and signed user data +7. Create a custom image containing the keys +8. Launch an instance with the encrypted user data + +We will be using the `gpg` command to for all PGP-related operations. This +guide will add new keys to the default key ring while later providing +instructions for removing them. These keys are generated for instructive +purposes only and are not intended for production use. + +.. note:: + This guide is NOT intended to be a comprehensive guide to PGP encryption or + best practices when using the `gpg` command or gpg key rings. Please + consult the `GnuPG documentation `_ for documentation + and best practices. + +Prerequisites +============= + +This guide also assumes you have the ability to launch cloud instances +with root permissions and can create snapshots or custom images from +those instances. This guide will demonstrate this using LXD, but +commands to achieve the same result will vary by cloud provider. + +The launched instance must contain the `GNU Privacy Guard` software +which provides the `gpg` command. + +Launch a cloud instance +======================= + +We will use LXD to launch an Ubuntu instance, but you can use any cloud +provider and OS that supports `gpg` and cloud-init. + +Launch the instance and connect to it: + +.. code-block:: bash + + $ lxc launch ubuntu:noble pgp-demo + $ lxc shell pgp-demo + +You should now be inside the LXD instance in the "/root" directory. +The remaining steps will be performed inside this instance in the "/root" +directory until otherwise noted. + +Generate PGP key pairs +====================== + +.. note:: + + If you already have PGP key pairs you would like to use, you can skip this + step. However, we do NOT recommend reusing or sharing any personal + private keys. + +First, generate a new key pair to be used for signing: + +.. code-block:: bash + + $ gpg --quick-generate-key --batch --passphrase "" signing_user + +Next, generate a new key pair to be used for encryption: + +.. code-block:: bash + + $ gpg --quick-generate-key --batch --passphrase "" encrypting_user + +Encrypt and sign the user data +============================== + +Create a file with your user data. For this example, we will use a simple +cloud-config file: + +.. code-block:: yaml + + #cloud-config + runcmd: + - echo 'Hello, World!' > /var/tmp/hello-world.txt + +Save this file to your working directory as `cloud-config.yaml`. + +Encrypt the user data using the public key of the encrypting user and +sign it using the private key of the signing user: + +.. code-block:: bash + + $ gpg --batch --output cloud-config.yaml.asc --sign --local-user signing_user --encrypt --recipient encrypting_user --armor cloud-config.yaml + +Our encrypted and signed user data is now saved in `cloud-config.yaml.asc`. + +Export the keys +=============== + +In order to use this user data, we will need to create a custom image +containing the public key of the encrypting user and the private key +of the signing user. + +Create the key directory: + +.. code-block:: bash + + $ mkdir /etc/cloud/keys + +Export the public key of the signing user: + +.. code-block:: bash + + $ gpg --export signing_user > /etc/cloud/keys/signing_user.gpg + +Export the private key of the encrypting user: + +.. code-block:: bash + + $ gpg --export-secret-keys encrypting_user > /etc/cloud/keys/encrypting_user.gpg + +Remove key ring +--------------- + +.. note:: + This step is optional but recommended for a clean image. + +Remove the keyring that was generated upon creating our first key: + +.. code-block:: bash + + $ rm -r ~/.gnupg/ + +Why export keys? +---------------- + +While it is more steps to export the keys in this way as opposed to +using the existing key ring in the snapshot, we do this for a few reasons: + +* Users may not want these keys in any key ring by default on a new instance +* Exporting keys is easier than copying key rings + +Note that on launch, cloud-init will import these keys into a temporary +key ring that is removed after the user data is processed. The default +key ring will not be read or modified. + +Restrict user data to require PGP message +========================================= + +To ensure that our message hasn't been replaced or tampered with, we can +require that cloud-init only process PGP messages. To do so, create a file +`/etc/cloud/cloud.cfg.d/99_pgp.cfg` with the following contents: + +.. code-block:: text + + user_data: + require_signature: true + +Clean cloud-init +================ + +This is to ensure that cloud-init runs as if it were first boot: + +.. code-block:: bash + + $ cloud-init clean --logs + +Retrieve our encrypted and signed user data +=========================================== + +Before running +these commands, copy the encrypted and signed user data +that we created earlier to the host system. + +From the host system, run: + +.. code-block:: bash + + $ lxc file pull pgp-demo/root/cloud-config.yaml.asc . + + +Create a custom image containing the keys +========================================= + +.. note:: + Before creating the image, you may want to remove the original user data + and created key ring from the instance. This is not strictly necessary + but is recommended for a clean image. + +Now that we have our instance configured, we can create a custom image from +it. This step will vary depending on your cloud provider. + +Using LXD, from the host system, run: + +.. code-block:: bash + + $ lxc stop pgp-demo + $ lxc publish pgp-demo --alias pgp-demo-image + +Launch an instance with the encrypted user data +=============================================== + +Now that we have our custom image with the keys, we can launch a +new instance with the encrypted user data. With your encrypted and signed +user data in the current working directory, run: + +.. code-block:: bash + + $ lxc launch pgp-demo-image pgp-demo-encrypted \ + --config user.user-data="$(cat cloud-config.yaml.asc)" + +On the launched system, you should see the file `/var/tmp/hello-world.txt` +containing the text `Hello, World!`. diff --git a/doc/rtd/reference/base_config_reference.rst b/doc/rtd/reference/base_config_reference.rst index 2d13675e68c..7834c6965b9 100644 --- a/doc/rtd/reference/base_config_reference.rst +++ b/doc/rtd/reference/base_config_reference.rst @@ -255,25 +255,38 @@ There are a few reasons to modify the ``datasource_list``: that the key and its contents, a list, must share a single line - no newlines. +.. _base_config_user_data: + +``user_data`` +^^^^^^^^^^^^^ + +Allows the user to set options for processing user data. + +Format is a dict with the following keys: + +* ``enabled``: A boolean value to enable or disable the use of user data. + One use case is to prevent users from accidentally breaking closed + appliances. Default: ``true``. +* ``require_signature``: A boolean indicating whether to require a PGP + signed message for user data. Default: ``false``. + This should be true if + using :ref:`signed user data`. + ``vendor_data``/``vendor_data2`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Allows the user to disable ``vendor_data`` or ``vendor_data2`` along with providing a prefix for any executed scripts. -Format is a dict with ``enabled`` and ``prefix`` keys: - -* ``enabled``: A boolean indicating whether to enable or disable the - ``vendor_data``. -* ``prefix``: A path to prepend to any ``vendor_data``-provided script. - -``allow_userdata`` -^^^^^^^^^^^^^^^^^^ +Format is a dict with the following keys: -A boolean value to disable the use of user data. -This allows custom images to prevent users from accidentally breaking closed -appliances. Setting ``allow_userdata: false`` in the configuration will disable -``cloud-init`` from processing user data. +* ``enabled``: A boolean indicating whether to enable or disable the vendor + data. Default: ``true``. +* ``require_signature``: A boolean indicating whether to require a PGP + signed message for vendor data. Default: ``false``. +* ``prefix``: A path to a binary to prefix to any executed scripts. + An example of usage would be to prefix a script with ``strace`` to + debug a script. ``manual_cache_clean`` ^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/rtd/spelling_word_list.txt b/doc/rtd/spelling_word_list.txt index 5f4783af65b..a19cc46b3fe 100644 --- a/doc/rtd/spelling_word_list.txt +++ b/doc/rtd/spelling_word_list.txt @@ -167,6 +167,7 @@ passno passthrough passw pem +pgp pid pipelining pki diff --git a/tests/integration_tests/instances.py b/tests/integration_tests/instances.py index a5ce8f1e14f..3e88aa43b96 100644 --- a/tests/integration_tests/instances.py +++ b/tests/integration_tests/instances.py @@ -83,10 +83,10 @@ def restart(self): log.info("Restarting instance and waiting for boot") self.instance.restart() - def execute(self, command, *, use_sudo=True) -> Result: + def execute(self, command, *, use_sudo=True, **kwargs) -> Result: if self.instance.username == "root" and use_sudo is False: raise RuntimeError("Root user cannot run unprivileged") - return self.instance.execute(command, use_sudo=use_sudo) + return self.instance.execute(command, use_sudo=use_sudo, **kwargs) def pull_file( self, diff --git a/tests/integration_tests/userdata/test_pgp.py b/tests/integration_tests/userdata/test_pgp.py new file mode 100644 index 00000000000..57df0293ccb --- /dev/null +++ b/tests/integration_tests/userdata/test_pgp.py @@ -0,0 +1,365 @@ +"""Test PGP signed and encrypted userdata.""" + +from pathlib import Path + +import pytest + +from cloudinit import subp +from tests.integration_tests.clouds import IntegrationCloud +from tests.integration_tests.instances import IntegrationInstance +from tests.integration_tests.integration_settings import PLATFORM +from tests.integration_tests.util import verify_clean_boot + +USER_DATA = """\ +#cloud-config +bootcmd: + - touch /var/tmp/{0} +""" + + +@pytest.fixture(scope="module") +def gpg_dir(tmp_path_factory: pytest.TempPathFactory): + yield tmp_path_factory.mktemp("gpg_dir") + + +@pytest.fixture(scope="module") +def public_key(gpg_dir: Path): + subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--quick-generate-key", + "--batch", + "--passphrase", + "", + "signing_user", + ] + ) + yield subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--export", + "--armor", + "signing_user", + ] + ).stdout + + +@pytest.fixture(scope="module") +def private_key(gpg_dir): + subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--quick-generate-key", + "--batch", + "--passphrase", + "", + "encrypting_user", + ] + ) + yield subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--batch", + "--export-secret-keys", + "--armor", + "encrypting_user", + ] + ).stdout + + +@pytest.fixture(scope="module") +def signed_and_encrypted_userdata(gpg_dir, public_key, private_key): + return subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--batch", + "--sign", + "--local-user", + "signing_user", + "--encrypt", + "--recipient", + "encrypting_user", + "--armor", + ], + data=USER_DATA.format("signed_and_encrypted"), + ).stdout + + +@pytest.fixture(scope="module") +def signed_userdata(gpg_dir, public_key): + return subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--batch", + "--sign", + "--local-user", + "signing_user", + "--armor", + ], + data=USER_DATA.format("signed"), + ).stdout + + +@pytest.fixture(scope="module") +def encrypted_userdata(gpg_dir, private_key): + return subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--batch", + "--encrypt", + "--recipient", + "encrypting_user", + "--armor", + ], + data=USER_DATA.format("encrypted"), + ).stdout + + +@pytest.fixture(scope="module") +def valid_keys_image( + public_key, + private_key, + session_cloud: IntegrationCloud, +): + with session_cloud.launch() as client: + client.execute("mkdir /etc/cloud/keys") + client.write_to_file("/etc/cloud/keys/pub_key", public_key) + client.write_to_file("/etc/cloud/keys/priv_key", private_key) + client.execute("cloud-init clean --logs") + image_id = client.snapshot() + yield image_id + client.cloud.cloud_instance.delete_image(image_id) + + +@pytest.fixture(scope="module") +def valid_pub_key_image( + public_key, + session_cloud: IntegrationCloud, +): + with session_cloud.launch() as client: + client.execute("mkdir /etc/cloud/keys") + client.write_to_file("/etc/cloud/keys/pub_key", public_key) + client.execute("cloud-init clean --logs") + image_id = client.snapshot() + yield image_id + client.cloud.cloud_instance.delete_image(image_id) + + +@pytest.fixture(scope="module") +def valid_priv_key_image( + private_key, + session_cloud: IntegrationCloud, +): + with session_cloud.launch() as client: + client.execute("mkdir /etc/cloud/keys") + client.write_to_file("/etc/cloud/keys/priv_key", private_key) + client.execute("cloud-init clean --logs") + image_id = client.snapshot() + yield image_id + client.cloud.cloud_instance.delete_image(image_id) + + +@pytest.fixture +def pgp_client(session_cloud: IntegrationCloud, request): + user_data_fixture, image_id_fixture = request.param + user_data = request.getfixturevalue(user_data_fixture) + launch_kwargs = {"image_id": request.getfixturevalue(image_id_fixture)} + with session_cloud.launch( + user_data=user_data, launch_kwargs=launch_kwargs + ) as client: + yield client + + +def _invalidate_key(client, key_path): + pub_key = client.read_from_file(key_path) + midde_index = len(pub_key) // 2 + bad_pub_key = f"{pub_key[:midde_index]}a{pub_key[midde_index:]}" + client.write_to_file(key_path, bad_pub_key) + + +@pytest.mark.parametrize( + "pgp_client", + [("signed_and_encrypted_userdata", "valid_keys_image")], + indirect=True, +) +def test_signed_and_encrypted(pgp_client: IntegrationInstance): + client = pgp_client + + client.write_to_file( + "/etc/cloud/cloud.cfg.d/99_pgp.cfg", + "user_data:\n require_signature: true", + ) + client.execute("cloud-init clean --logs") + client.restart() + + assert client.execute("test -f /var/tmp/signed_and_encrypted") + verify_clean_boot(client) + + # Invalidate our public key and ensure we fail + client.execute("cp /etc/cloud/keys/pub_key /var/tmp/pub_key") + _invalidate_key(client, "/etc/cloud/keys/pub_key") + client.execute("cloud-init clean --logs") + client.execute("rm /var/tmp/signed_and_encrypted") + client.restart() + assert not client.execute("test -f /var/tmp/signed_and_encrypted") + result = client.execute("cloud-init status --format=json") + assert result.failed + assert "Signature verification failed. Could not verify." in result.stdout + + # Restore the public key, invalidate the private key, and ensure we fail + client.execute("cp /var/tmp/pub_key /etc/cloud/keys/pub_key") + client.execute("cp /etc/cloud/keys/priv_key /var/tmp/priv_key") + _invalidate_key(client, "/etc/cloud/keys/priv_key") + client.execute("cloud-init clean --logs") + client.execute("rm /var/tmp/signed_and_encrypted") + client.restart() + assert not client.execute("test -f /var/tmp/signed_and_encrypted") + result = client.execute("cloud-init status --format=json") + assert result.failed + assert "Signature verification failed. Could not unwrap." in result.stdout + + +@pytest.mark.parametrize( + "pgp_client", + [("encrypted_userdata", "valid_priv_key_image")], + indirect=True, +) +def test_encrypted(pgp_client: IntegrationInstance): + client = pgp_client + assert client.execute("test -f /var/tmp/encrypted") + verify_clean_boot(client) + + # Invalidate our private key and ensure we fail + _invalidate_key(client, "/etc/cloud/keys/priv_key") + client.execute("cloud-init clean --logs") + client.execute("rm /var/tmp/encrypted") + client.restart() + assert not client.execute("test -f /var/tmp/encrypted") + result = client.execute("cloud-init status --format=json") + assert result.failed + assert "Failed decrypting user data" in result.stdout + + +@pytest.mark.parametrize( + "pgp_client", + [("signed_userdata", "valid_pub_key_image")], + indirect=True, +) +def test_signed(pgp_client: IntegrationInstance): + client = pgp_client + assert client.execute("test -f /var/tmp/signed") + verify_clean_boot(client) + + # Invalidate our public key and ensure we fail + _invalidate_key(client, "/etc/cloud/keys/pub_key") + client.execute("cloud-init clean --logs") + client.execute("rm /var/tmp/signed") + client.restart() + assert not client.execute("test -f /var/tmp/signed") + result = client.execute("cloud-init status --format=json") + assert result.failed + assert "Failed decrypting user data" in result.stdout + + +@pytest.fixture +def lxd_pgp_client(session_cloud: IntegrationCloud, request): + user_data_fixture, image_id_fixture = request.param + user_data = request.getfixturevalue(user_data_fixture) + launch_kwargs = { + "execute_via_ssh": False, + "username": "root", + } + if image_id_fixture: + launch_kwargs["image_id"] = request.getfixturevalue(image_id_fixture) + + with session_cloud.launch( + user_data=user_data, launch_kwargs=launch_kwargs + ) as client: + yield client + + +@pytest.mark.skipif( + PLATFORM not in ["lxd_container", "lxd_vm"], + reason=( + "failed user data means no ssh or 'ubuntu' user creation, " + "so we need special LXD options" + ), +) +@pytest.mark.parametrize( + "lxd_pgp_client", + [ + pytest.param( + ("encrypted_userdata", "valid_pub_key_image"), + id="encrypted_with_no_priv_key", + ), + pytest.param( + ("signed_userdata", "valid_priv_key_image"), + id="signed_with_no_pub_key", + ), + pytest.param( + ("signed_and_encrypted_userdata", None), + id="signed_and_encrypted_with_no_keys", + ), + ], + indirect=True, +) +def test_unparseable_userdata( + lxd_pgp_client: IntegrationInstance, +): + result = lxd_pgp_client.execute("cloud-init status --format=json") + assert result.failed + assert "Failed decrypting user data" in result.stdout + + +@pytest.mark.user_data(USER_DATA.format("no")) +def test_signature_required(client: IntegrationInstance): + client.write_to_file( + "/etc/cloud/cloud.cfg.d/99_pgp.cfg", + "user_data:\n require_signature: true", + ) + client.execute("cloud-init clean --logs") + client.restart() + + result = client.execute("cloud-init status --format=json") + assert result.failed + assert ( + "'require_signature' was set true in cloud-init's base configuration, " + "but content is not signed" + ) in result.stdout + + +@pytest.mark.parametrize( + "pgp_client", [("encrypted_userdata", "valid_keys_image")], indirect=True +) +def test_encrypted_message_but_required_signature( + pgp_client: IntegrationInstance, +): + """Ensure fail if we require signature but only have encrypted message.""" + client = pgp_client + assert client.execute("test -f /var/tmp/encrypted") + verify_clean_boot(client) + + client.write_to_file( + "/etc/cloud/cloud.cfg.d/99_pgp.cfg", + "user_data:\n require_signature: true", + ) + client.execute("cloud-init clean --logs") + client.restart() + + result = client.execute("cloud-init status --format=json") + assert result.failed + assert "Signature verification failed" in result.stdout diff --git a/tests/unittests/cloudinit/test_user_data.py b/tests/unittests/cloudinit/test_user_data.py new file mode 100644 index 00000000000..3f6a1c4b684 --- /dev/null +++ b/tests/unittests/cloudinit/test_user_data.py @@ -0,0 +1,137 @@ +"""These tests are for code in the cloudinit.user_data module. + +These are NOT general tests for user data processing. +""" + +import email +from email.mime.base import MIMEBase + +import pytest + +from cloudinit import subp, user_data +from tests.unittests.util import gzip_text + + +def count_messages(root): + return sum(not user_data.is_skippable(m) for m in root.walk()) + + +class TestMessageFromString: + def test_unicode_not_messed_up(self): + roundtripped = email.message_from_string("\n").as_string() + assert "\x00" not in roundtripped + + +class TestUDProcess: + @pytest.mark.parametrize( + "msg", + [ + pytest.param("#cloud-config\napt_update: True\n", id="str"), + pytest.param(b"#cloud-config\napt_update: True\n", id="bytes"), + pytest.param( + gzip_text("#cloud-config\napt_update: True\n"), + id="compressed", + ), + ], + ) + def test_type_in_userdata(self, msg): + ud_proc = user_data.UserDataProcessor({}) + message = ud_proc.process(msg) + assert count_messages(message) + + +class TestConvertString: + @pytest.mark.parametrize( + "blob,decode", + [ + pytest.param("hi mom", False, id="str"), + pytest.param(b"#!/bin/bash\necho \xc3\x84\n", True, id="bytes"), + pytest.param(b"\x32\x32", True, id="binary"), + ], + ) + def test_mime_conversion_preserves_content(self, blob, decode): + """Ensure for each type of input, the content is preserved.""" + assert ( + user_data.convert_string(blob).get_payload(decode=decode) == blob + ) + + def test_handle_mime_parts(self): + """Mime parts are properly returned as a mime message.""" + message = MIMEBase("text", "plain") + message.set_payload("Just text") + msg = user_data.convert_string(str(message)) + assert "Just text" == msg.get_payload(decode=False) + + +GOOD_MESSAGE = """\ +-----BEGIN PGP MESSAGE----- + +Pass:AMockWillDecryptMeIntoACloudConfig +-----END PGP MESSAGE----- +""" + +CLOUD_CONFIG = """\ +#cloud-config +password: password +""" + +BAD_MESSAGE = """\ +-----BEGIN PGP MESSAGE----- + +Fail:AMockWillFailToDecryptMe +-----END PGP MESSAGE----- +""" + + +def my_subp(*args, **kwargs): + if args[0][0] == "gpg": + if "Pass:" in kwargs["data"]: + return subp.SubpResult(CLOUD_CONFIG, "gpg: Good signature") + elif "Fail:" in kwargs["data"]: + raise subp.ProcessExecutionError( + "", "gpg: public key decryption failed", 2, args[0] + ) + return subp.SubpResult("", "") + + +class TestPgpData: + def test_pgp_decryption(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + message = ud_proc.process(GOOD_MESSAGE) + parts = [p for p in message.walk() if not user_data.is_skippable(p)] + assert len(parts) == 1 + part = parts[0] + assert part.get_payload() == CLOUD_CONFIG + + def test_pgp_decryption_failure(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + with pytest.raises( + RuntimeError, match="Failed decrypting user data payload" + ): + ud_proc.process(BAD_MESSAGE) + + def test_pgp_required(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + message = ud_proc.process(GOOD_MESSAGE, require_signature=True) + parts = [p for p in message.walk() if not user_data.is_skippable(p)] + assert len(parts) == 1 + part = parts[0] + assert part.get_payload() == CLOUD_CONFIG + + def test_pgp_required_with_no_pgp_message(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + with pytest.raises(RuntimeError, match="content is not signed"): + ud_proc.process(CLOUD_CONFIG, require_signature=True) + + def test_pgp_in_list_disallowed(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + with pytest.raises( + RuntimeError, + match="PGP message must encompass entire user data or vendor data", + ): + ud_proc.process([GOOD_MESSAGE, GOOD_MESSAGE]) diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py index 7621c5f6c80..be6721d7416 100644 --- a/tests/unittests/test_data.py +++ b/tests/unittests/test_data.py @@ -2,14 +2,12 @@ """Tests for handling of userdata within cloud init.""" -import gzip import logging import os from email import encoders from email.mime.application import MIMEApplication from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart -from io import BytesIO from pathlib import Path from unittest import mock @@ -19,35 +17,14 @@ from cloudinit import handlers from cloudinit import helpers as c_helpers -from cloudinit import safeyaml, stages -from cloudinit import user_data as ud -from cloudinit import util +from cloudinit import safeyaml, stages, util from cloudinit.config.modules import Modules from cloudinit.settings import DEFAULT_RUN_DIR, PER_INSTANCE -from tests.unittests import helpers -from tests.unittests.util import FakeDataSource +from tests.unittests.util import FakeDataSource, gzip_text MPATH = "cloudinit.stages" -def count_messages(root): - am = 0 - for m in root.walk(): - if ud.is_skippable(m): - continue - am += 1 - return am - - -def gzip_text(text): - contents = BytesIO() - f = gzip.GzipFile(fileobj=contents, mode="wb") - f.write(util.encode_text(text)) - f.flush() - f.close() - return contents.getvalue() - - @pytest.fixture(scope="function") def init_tmp(request, tmpdir): ci = stages.Init() @@ -772,53 +749,6 @@ def test_include_bad_url_no_fail( assert cc.get("included") is True -class TestUDProcess(helpers.ResourceUsingTestCase): - def test_bytes_in_userdata(self): - msg = b"#cloud-config\napt_update: True\n" - ud_proc = ud.UserDataProcessor(self.getCloudPaths()) - message = ud_proc.process(msg) - self.assertTrue(count_messages(message) == 1) - - def test_string_in_userdata(self): - msg = "#cloud-config\napt_update: True\n" - - ud_proc = ud.UserDataProcessor(self.getCloudPaths()) - message = ud_proc.process(msg) - self.assertTrue(count_messages(message) == 1) - - def test_compressed_in_userdata(self): - msg = gzip_text("#cloud-config\napt_update: True\n") - - ud_proc = ud.UserDataProcessor(self.getCloudPaths()) - message = ud_proc.process(msg) - self.assertTrue(count_messages(message) == 1) - - -class TestConvertString(helpers.TestCase): - def test_handles_binary_non_utf8_decodable(self): - """Printable unicode (not utf8-decodable) is safely converted.""" - blob = b"#!/bin/bash\necho \xc3\x84\n" - msg = ud.convert_string(blob) - self.assertEqual(blob, msg.get_payload(decode=True)) - - def test_handles_binary_utf8_decodable(self): - blob = b"\x32\x32" - msg = ud.convert_string(blob) - self.assertEqual(blob, msg.get_payload(decode=True)) - - def test_handle_headers(self): - text = "hi mom" - msg = ud.convert_string(text) - self.assertEqual(text, msg.get_payload(decode=False)) - - def test_handle_mime_parts(self): - """Mime parts are properly returned as a mime message.""" - message = MIMEBase("text", "plain") - message.set_payload("Just text") - msg = ud.convert_string(str(message)) - self.assertEqual("Just text", msg.get_payload(decode=False)) - - class TestFetchBaseConfig: @pytest.fixture(autouse=True) def mocks(self, mocker): diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py index 221e21de5c2..3b80e7a0e2e 100644 --- a/tests/unittests/test_util.py +++ b/tests/unittests/test_util.py @@ -2451,12 +2451,6 @@ def test_given_log_level_used(self): self.assertEqual((log_level, mock.ANY), logger.log.call_args[0]) -class TestMessageFromString(helpers.TestCase): - def test_unicode_not_messed_up(self): - roundtripped = util.message_from_string("\n").as_string() - self.assertNotIn("\x00", roundtripped) - - class TestReadOptionalSeed: @pytest.mark.parametrize( "seed_dir,expected_fill,retval", diff --git a/tests/unittests/util.py b/tests/unittests/util.py index 02aa6b1ac97..c7ce1fe407c 100644 --- a/tests/unittests/util.py +++ b/tests/unittests/util.py @@ -1,8 +1,10 @@ # This file is part of cloud-init. See LICENSE file for license information. +import gzip +from io import BytesIO from typing import Optional, Type from unittest import mock -from cloudinit import cloud, distros, helpers +from cloudinit import cloud, distros, helpers, util from cloudinit.config import Config from cloudinit.net.dhcp import IscDhclient from cloudinit.sources import DataSource, DataSourceHostname @@ -71,6 +73,15 @@ class concreteCls(abclass): return type("DummyConcrete" + abclass.__name__, (concreteCls,), {}) +def gzip_text(text): + contents = BytesIO() + f = gzip.GzipFile(fileobj=contents, mode="wb") + f.write(util.encode_text(text)) + f.flush() + f.close() + return contents.getvalue() + + class MockDistro(distros.Distro): # MockDistro is here to test base Distro class implementations def __init__(self, name="testingdistro", cfg=None, paths=None):