Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support encrypted and signed user data #5599

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion cloudinit/config/cc_apt_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,7 @@ def apt_key_add(gpg_context):
)
return file_name

def apt_key_list(gpg_context):
def apt_key_list(gpg_context: GPG):
"""apt-key list

returns string of all trusted keys (in /etc/apt/trusted.gpg and
Expand Down
56 changes: 56 additions & 0 deletions cloudinit/gpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import logging
import os
import pathlib
import re
import signal
import time
Expand All @@ -22,6 +23,10 @@
HOME = "GNUPGHOME"


class GpgVerificationError(Exception):
"""GpgVerificationError is raised when a signature verification fails."""


class GPG:
def __init__(self):
self.gpg_started = False
Expand Down Expand Up @@ -68,6 +73,57 @@ def export_armour(self, key: str) -> Optional[str]:
LOG.debug('Failed to export armoured key "%s": %s', key, error)
return None

def import_key(self, key: pathlib.Path) -> None:
"""Import gpg key from a file to the temporary keyring.

:param key: path to the key file
"""
try:
subp.subp(
[
"gpg",
"--batch",
"--import",
str(key),
],
update_env=self.env,
)
except subp.ProcessExecutionError as error:
LOG.warning("Failed to import key %s: %s", key, error)

def decrypt(self, data: str, *, require_signature=False) -> str:
"""Process data using gpg.

This can be used to decrypt encrypted data, verify signed data,
holmanb marked this conversation as resolved.
Show resolved Hide resolved
or both depending on the data provided.

:param data: ASCII-armored GPG message to process
:return: decrypted data
:raises: ProcessExecutionError if gpg fails to decrypt data
"""
if require_signature:
try:
subp.subp(
["gpg", "--verify"],
data=data,
update_env=self.env,
)
except subp.ProcessExecutionError as e:
if e.exit_code == 2:
raise GpgVerificationError(
"Signature verification failed"
) from e
raise
result = subp.subp(
[
"gpg",
"--decrypt",
],
data=data,
update_env=self.env,
)
return result.stdout

def dearmor(self, key: str) -> str:
"""Dearmor gpg key, dearmored key gets returned

Expand Down
1 change: 1 addition & 0 deletions cloudinit/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,4 @@
FREQUENCIES = [PER_INSTANCE, PER_ALWAYS, PER_ONCE]

HOTPLUG_ENABLED_FILE = "/var/lib/cloud/hotplug.enabled"
KEY_DIR = "/etc/cloud/keys"
21 changes: 18 additions & 3 deletions cloudinit/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,19 +647,34 @@ def get_url_params(self):

def get_userdata(self, apply_filter=False):
if self.userdata is None:
self.userdata = self.ud_proc.process(self.get_userdata_raw())
self.userdata = self.ud_proc.process(
self.get_userdata_raw(),
require_signature=self.sys_cfg.get("user_data", {}).get(
"require_signature", False
),
)
if apply_filter:
return self._filter_xdata(self.userdata)
return self.userdata

def get_vendordata(self):
if self.vendordata is None:
self.vendordata = self.ud_proc.process(self.get_vendordata_raw())
self.vendordata = self.ud_proc.process(
self.get_vendordata_raw(),
require_signature=self.sys_cfg.get("vendor_data", {}).get(
"require_signature", False
),
)
return self.vendordata

def get_vendordata2(self):
if self.vendordata2 is None:
self.vendordata2 = self.ud_proc.process(self.get_vendordata2_raw())
self.vendordata2 = self.ud_proc.process(
self.get_vendordata2_raw(),
require_signature=self.sys_cfg.get("vendor_data2", {}).get(
"require_signature", False
),
)
return self.vendordata2

@property
Expand Down
40 changes: 36 additions & 4 deletions cloudinit/stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,41 @@ def finalize_handlers():
finally:
finalize_handlers()

def _consume_userdata_if_enabled(self, frequency: str) -> None:
"""Consume userdata if not disabled in base config.

Base config can have a definition like:
user_data:
enabled: false
require_signature: true
or a deprecated `allow_userdata` key.

Parse them and maybe consume userdata accordingly.
"""
user_data_cfg = self.cfg.get("user_data", {})
enabled = user_data_cfg.get("enabled", True)

if "allow_userdata" in self.cfg:
lifecycle.deprecate(
deprecated="Key 'allow_userdata'",
deprecated_version="24.3",
extra_message="Use 'user_data.enabled' instead.",
)
if "enabled" in user_data_cfg:
LOG.warning(
"Both 'allow_userdata' and 'user_data.enabled' are set."
" 'allow_userdata' will be ignored."
)
else:
enabled = util.get_cfg_option_bool(self.cfg, "allow_userdata")

if enabled:
self._consume_userdata(frequency)
else:
LOG.debug(
"User data disabled in base config: discarding user-data"
)

def consume_data(self, frequency=PER_INSTANCE):
# Consume the userdata first, because we need want to let the part
# handlers run first (for merging stuff)
Expand All @@ -801,10 +836,7 @@ def consume_data(self, frequency=PER_INSTANCE):
"reading and applying user-data",
parent=self.reporter,
):
if util.get_cfg_option_bool(self.cfg, "allow_userdata", True):
self._consume_userdata(frequency)
else:
LOG.debug("allow_userdata = False: discarding user-data")
self._consume_userdata_if_enabled(frequency)

with events.ReportEventStack(
"consume-vendor-data",
Expand Down
111 changes: 93 additions & 18 deletions cloudinit/user_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,19 @@
#
# This file is part of cloud-init. See LICENSE file for license information.

import email
import logging
import os
import pathlib
from email.message import Message
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.nonmultipart import MIMENonMultipart
from email.mime.text import MIMEText
from typing import Union, cast

from cloudinit import features, handlers, util
from cloudinit import features, gpg, handlers, subp, util
from cloudinit.settings import KEY_DIR
from cloudinit.url_helper import UrlError, read_file_or_url

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -78,13 +83,23 @@ def __init__(self, paths):
self.paths = paths
self.ssl_details = util.fetch_ssl_details(paths)

def process(self, blob):
def process(self, blob, require_signature=False):
accumulating_msg = MIMEMultipart()
if isinstance(blob, list):
for b in blob:
self._process_msg(convert_string(b), accumulating_msg)
self._process_msg(
convert_string(
b, is_part=True, require_signature=require_signature
),
accumulating_msg,
)
else:
self._process_msg(convert_string(blob), accumulating_msg)
self._process_msg(
convert_string(
blob, is_part=False, require_signature=require_signature
),
accumulating_msg,
)
return accumulating_msg

def _process_msg(self, base_msg, append_msg):
Expand Down Expand Up @@ -361,27 +376,87 @@ def is_skippable(part):
return False


# Coverts a raw string into a mime message
def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE):
"""convert a string (more likely bytes) or a message into
a mime message."""
def decrypt_payload(payload: str, require_signature: bool) -> str:
"""Decrypt/Verify a PGP message.

:param payload: ASCII-armored GPG message to process
:param require_signature: Whether to require a signature
:return: decrypted data
"""
with gpg.GPG() as gpg_context:
# Import all keys from the /etc/cloud/keys directory
keys_dir = pathlib.Path(KEY_DIR)
if keys_dir.is_dir():
for key_path in keys_dir.iterdir():
gpg_context.import_key(key_path)
try:
return gpg_context.decrypt(
payload, require_signature=require_signature
)
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed decrypting user data payload. "
f"Ensure any necessary keys are present in {KEY_DIR}."
) from e


def handle_encrypted(
data: bytes, is_part: bool, require_signature: bool
) -> bytes:
# Decrypt/verify a PGP message. We do this here because a signed
# MIME part could be thwarted by other user data parts
if data[:27] == b"-----BEGIN PGP MESSAGE-----":
if is_part:
raise RuntimeError(
"PGP message must encompass entire user data or vendor data."
)
return decrypt_payload(data.decode("utf-8"), require_signature).encode(
"utf-8"
)
elif require_signature:
raise RuntimeError(
"'require_signature' was set true in cloud-init's base "
"configuration, but content is not signed."
)
return data


def _create_binmsg(data):
maintype, subtype = NOT_MULTIPART_TYPE.split("/", 1)
msg = MIMEBase(maintype, subtype)
msg.set_payload(data)
return msg


def convert_string(
raw_data: Union[str, bytes],
*,
is_part: bool = False,
require_signature: bool = False,
) -> Message:
"""Convert the raw data into a mime message.

'raw_data' is the data as it was received from the user-data source.
It could be a string, bytes, or a gzip compressed version of either.
"""
if not raw_data:
raw_data = b""

def create_binmsg(data, content_type):
maintype, subtype = content_type.split("/", 1)
msg = MIMEBase(maintype, subtype)
msg.set_payload(data)
return msg

if isinstance(raw_data, str):
bdata = raw_data.encode("utf-8")
else:
bdata = raw_data
bdata = util.decomp_gzip(bdata, decode=False)
if b"mime-version:" in bdata[0:4096].lower():
msg = util.message_from_string(bdata.decode("utf-8"))
# cast here because decode=False means return type is bytes
bdata = cast(bytes, util.decomp_gzip(bdata, decode=False))

bdata = handle_encrypted(bdata, is_part, require_signature)

# Now ensure we have a MIME message
if b"mime-version:" in bdata[:4096].lower():
# If we have a pre-existing MIME, use it
msg = email.message_from_string(bdata.decode("utf-8"))
else:
msg = create_binmsg(bdata, content_type)
# Otherwise, convert to MIME
msg = _create_binmsg(bdata)

return msg
20 changes: 12 additions & 8 deletions cloudinit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import binascii
import contextlib
import copy as obj_copy
import email
import glob
import grp
import gzip
Expand All @@ -36,6 +35,7 @@
from base64 import b64decode
from collections import deque, namedtuple
from contextlib import contextmanager, suppress
from email.message import Message
from errno import ENOENT
from functools import lru_cache
from pathlib import Path
Expand Down Expand Up @@ -164,7 +164,7 @@ def maybe_b64decode(data: bytes) -> bytes:
return data


def fully_decoded_payload(part):
def fully_decoded_payload(part: Message):
# In Python 3, decoding the payload will ironically hand us a bytes object.
# 'decode' means to decode according to Content-Transfer-Encoding, not
# according to any charset in the Content-Type. So, if we end up with
Expand All @@ -175,8 +175,16 @@ def fully_decoded_payload(part):
cte_payload, bytes
):
charset = part.get_charset()
if charset and charset.input_codec:
encoding = charset.input_codec

# TODO: Mypy doesn't like the following code because `input_codec`
holmanb marked this conversation as resolved.
Show resolved Hide resolved
# is part of a legacy API. See first line of:
# https://docs.python.org/3/library/email.charset.html
# However, as of this writing, it is still available:
# https://github.com/python/cpython/blob/aab18f4d925528c2cbe4625211bf904db2a28317/Lib/email/charset.py#L234 # noqa: E501
# That said, we shouldn't continue using legacy APIs, so we should
# update this code at some point.
if charset and charset.input_codec: # type: ignore
encoding = charset.input_codec # type: ignore
else:
encoding = "utf-8"
return cte_payload.decode(encoding, "surrogateescape")
Expand Down Expand Up @@ -2899,10 +2907,6 @@ def is_x86(uname_arch=None):
return x86_arch_match


def message_from_string(string):
return email.message_from_string(string)


def get_installed_packages():
out = subp.subp(["dpkg-query", "--list"], capture=True)

Expand Down
Loading