diff --git a/.dockerignore b/.dockerignore index 5af9064..e145e98 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,3 +3,5 @@ ./.travis.yml ./.env ./docker-compose.yml +*.log +*.db diff --git a/.gitignore b/.gitignore index e45e603..fc0973b 100644 --- a/.gitignore +++ b/.gitignore @@ -106,4 +106,5 @@ ENV/ .mypy_cache/ # config -config.py +.secrets.yaml +settings.local.yaml.bak diff --git a/Dockerfile b/Dockerfile index 8ab8634..8c01982 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.7 +FROM python:3.8 RUN apt-get update -y \ && apt-get upgrade -y \ @@ -15,10 +15,12 @@ WORKDIR /etc/aardvark ENV AARDVARK_DATA_DIR=/data \ AARDVARK_ROLE=Aardvark \ ARN_PARTITION=aws \ - AWS_DEFAULT_REGION=us-east-1 + AWS_DEFAULT_REGION=us-east-1 \ + FLASK_APP=aardvark EXPOSE 5000 +COPY ./settings.yaml . COPY ./entrypoint.sh /etc/aardvark/entrypoint.sh ENTRYPOINT [ "/etc/aardvark/entrypoint.sh" ] diff --git a/README.md b/README.md index fa0eb8f..5f9ae08 100644 --- a/README.md +++ b/README.md @@ -9,18 +9,27 @@ Aardvark is a multi-account AWS IAM Access Advisor API (and caching layer). ## New in `v1.0.0` -⚠️: Breaking change -✨: Enhancement +⚠️ Breaking change + +✨ Enhancement -- ✨ Pluggable persistence layer -- ✨ Pluggable retrievers - ⚠️ Upgrade to Python 3.8+ - ⚠️ New configuration format +- ✨ Pluggable persistence layer +- ✨ Pluggable retrievers -## Install: +## Install Ensure that you have Python 3.8 or later. +Use pip install Aardvark: + +```bash +pip install aardvark +``` + +Alternatively, clone the repository and install a development version: + ```bash git clone https://github.com/Netflix-Skunkworks/aardvark.git cd aardvark @@ -33,19 +42,21 @@ python setup.py develop The Aardvark config wizard will guide you through the setup. ```bash -% aardvark config +❯ aardvark config -Aardvark can use SWAG to look up accounts. https://github.com/Netflix-Skunkworks/swag-client -Do you use SWAG to track accounts? [yN]: no -ROLENAME: Aardvark -DATABASE [sqlite:////home/github/aardvark/aardvark.db]: -# Threads [5]: +Aardvark can use SWAG to look up accounts. See https://github.com/Netflix-Skunkworks/swag-client +Do you use SWAG to track accounts? [yN]: N +Role Name [Aardvark]: Aardvark +Database URI [sqlite:///aardvark.db]: +Worker Count [5]: 5 +Config file location [settings.yaml]: settings.local.yaml ->> Writing to config.py +writing config file to settings.local.yaml ``` - Whether to use [SWAG](https://github.com/Netflix-Skunkworks/swag-client) to enumerate your AWS accounts. (Optional, but useful when you have many accounts.) - The name of the IAM Role to assume into in each account. - The Database connection string. (Defaults to sqlite in the current working directory. Use RDS Postgres for production.) +- The number of workers to create. ## Create the DB tables @@ -57,27 +68,95 @@ aardvark create_db Aardvark needs an IAM Role in each account that will be queried. Additionally, Aardvark needs to be launched with a role or user which can `sts:AssumeRole` into the different account roles. -AardvarkInstanceProfile: +### Hub role (`AardvarkInstanceProfile`): + - Only create one. -- Needs the ability to call `sts:AssumeRole` into all of the AardvarkRole's +- Needs the ability to call `sts:AssumeRole` into all of the `AardvarkRole`s + +Inline policy example: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AssumeSpokeRoles", + "Effect": "Allow", + "Action": [ + "sts:assumerole" + ], + "Resource": [ + "arn:aws:iam::*:role/AardvarkRole" + ] + } + ] +} +``` + +### Spoke roles (`AardvarkRole`): -AardvarkRole: - Must exist in every account to be monitored. - Must have a trust policy allowing `AardvarkInstanceProfile`. - Has these permissions: + ``` iam:GenerateServiceLastAccessedDetails iam:GetServiceLastAccessedDetails -iam:listrolepolicies -iam:listroles +iam:ListRolePolicies +iam:ListRoles iam:ListUsers iam:ListPolicies iam:ListGroups ``` +Assume role policy document example (be sure to replace the account ID with a real one): + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowHubRoleAssume", + "Effect": "Allow", + "Principal": { + "AWS": [ + "arn:aws:iam::111111111111:role/AardvarkInstanceProfile" + ] + }, + "Action": "sts:AssumeRole" + } + ] +} +``` -So if you are monitoring `n` accounts, you will always need `n+1` roles. (`n` AardvarkRoles and `1` AardvarkInstanceProfile). +Inline policy example: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "IAMAccess", + "Effect": "Allow", + "Action": [ + "iam:GenerateServiceLastAccessedDetails", + "iam:GetServiceLastAccessedDetails", + "iam:ListRolePolicies", + "iam:ListRoles", + "iam:ListUsers", + "iam:ListPolicies", + "iam:ListGroups" + ], + "Resource": [ + "*" + ] + } + ] +} +``` + +So if you are monitoring `n` accounts, you will always need `n+1` roles. (one `AardvarkInstanceProfile` and n `AardvarkRole`s). -Note: For locally running aardvark, you don't have to take care of the AardvarkInstanceProfile. Instead, just attach a policy which contains "sts:AssumeRole" to the user you are using on the AWS CLI to assume Aardvark Role. Also, the same user should be mentioned in the trust policy of Aardvark Role for proper assignment of the privileges. +Note: For locally running aardvark, you don't have to take care of the AardvarkInstanceProfile. Instead, just attach a policy which contains `sts:AssumeRole` to the user you are using on the AWS CLI to assume Aardvark Role. Also, the same user should be mentioned in the trust policy of Aardvark Role for proper assignment of the privileges. ## Gather Access Advisor Data @@ -87,24 +166,30 @@ You'll likely want to refresh the Access Advisor data regularly. We recommend r If you don't have SWAG you can pass comma separated account numbers: - aardvark update -a 123456789012,210987654321 + aardvark update -a 123456789012 -a 210987654321 #### With SWAG: Aardvark can use [SWAG](https://github.com/Netflix-Skunkworks/swag-client) to look up accounts, so you can run against all with: - aardvark update +```bash +aardvark update +``` or by account name/tag with: - aardvark update -a dev,test,prod +```bash +aardvark update -a dev -a test -a prod +``` ## API ### Start the API - aardvark start_api -b 0.0.0.0:5000 +```bash +FLASK_APP=aardvark flask run -b 0.0.0.0:5000 +``` In production, you'll likely want to have something like supervisor starting the API for you. diff --git a/aardvark/__init__.py b/aardvark/__init__.py index ad8cb2b..f7915fc 100644 --- a/aardvark/__init__.py +++ b/aardvark/__init__.py @@ -1,32 +1,29 @@ -# ensure absolute import for python3 -from __future__ import absolute_import - import logging import os.path from logging.config import dictConfig +from dynaconf.contrib import FlaskDynaconf from flasgger import Swagger from flask import Flask -from aardvark.configuration import CONFIG +from aardvark.config import settings from aardvark.persistence.sqlalchemy import SQLAlchemyPersistence -from aardvark.view import advisor_bp +from aardvark.advisors import advisor_bp BLUEPRINTS = [advisor_bp] API_VERSION = "1" -persistence = SQLAlchemyPersistence() -dictConfig(CONFIG["logging"].get()) log = logging.getLogger("aardvark") -def create_app(test_config=None): +def create_app(*args, **kwargs): + init_logging() app = Flask(__name__, static_url_path="/static") Swagger(app) + persistence = SQLAlchemyPersistence() - if test_config is not None: - app.config.update(test_config) + FlaskDynaconf(app, **kwargs) # For ELB and/or Eureka @app.route("/healthcheck") @@ -50,6 +47,43 @@ def healthcheck(): return app +def init_logging(): + log_cfg = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'standard': { + 'format': '%(asctime)s %(levelname)s: %(message)s ' + '[in %(pathname)s:%(lineno)d]' + } + }, + 'handlers': { + 'file': { + 'class': 'logging.handlers.RotatingFileHandler', + 'level': 'DEBUG', + 'formatter': 'standard', + 'filename': 'aardvark.log', + 'maxBytes': 10485760, + 'backupCount': 100, + 'encoding': 'utf8' + }, + 'console': { + 'class': 'logging.StreamHandler', + 'level': 'DEBUG', + 'formatter': 'standard', + 'stream': 'ext://sys.stdout' + } + }, + 'loggers': { + 'aardvark': { + 'handlers': ['file', 'console'], + 'level': 'DEBUG' + } + } + } + dictConfig(log_cfg) + + def _find_config(): """Search for config.py in order of preference and return path if it exists, else None""" CONFIG_PATHS = [ diff --git a/aardvark/_config.py b/aardvark/_config.py deleted file mode 100644 index e424bf2..0000000 --- a/aardvark/_config.py +++ /dev/null @@ -1,8 +0,0 @@ -SQLALCHEMY_DATABASE_URI = "sqlite:///:memory:" -SQLALCHEMY_TRACK_MODIFICATIONS = False - -# Use a set to store ARNs that are constantly failing. -# Aardvark will only log these errors at the INFO level -# instead of the ERROR level -FAILING_ARNS = set() -# FAILING_ARNS = {'ASDF', 'DEFG'} diff --git a/aardvark/view.py b/aardvark/advisors.py similarity index 87% rename from aardvark/view.py rename to aardvark/advisors.py index 1e2afd7..84fe93b 100644 --- a/aardvark/view.py +++ b/aardvark/advisors.py @@ -1,26 +1,11 @@ -# ensure absolute import for python3 -from __future__ import absolute_import - from flask import Blueprint, abort, jsonify, request from aardvark.persistence.sqlalchemy import SQLAlchemyPersistence advisor_bp = Blueprint("advisor", __name__) -session = SQLAlchemyPersistence()._create_session() - - -@advisor_bp.teardown_request -def shutdown_session(exception=None): - session.remove() - - -# undocumented convenience pass-through so we can query directly from browser -@advisor_bp.route("/advisors") -def get(): - return post() -@advisor_bp.route("/advisors") +@advisor_bp.route("/advisors", methods=["GET", "POST"]) def post(): """Get access advisor data for role(s) Returns access advisor information for role(s) that match filters @@ -120,7 +105,6 @@ def post(): phrase=phrase, arns=arns, regex=regex, - session=session, ) return jsonify(values) diff --git a/aardvark/config.py b/aardvark/config.py new file mode 100644 index 0000000..334f83b --- /dev/null +++ b/aardvark/config.py @@ -0,0 +1,166 @@ +import logging +import os + +from dynaconf import Dynaconf, Validator + +cwd_path = os.path.join(os.getcwd(), "settings.yaml") + +settings = Dynaconf( + envvar_prefix="AARDVARK", + settings_files=[ + "settings.yaml", + ".secrets.yaml", + cwd_path, + "/etc/aardvark/settings.yaml", + ], + env_switcher="AARDVARK_ENV", + environments=True, + validators=[ + Validator('AWS_ARN_PARTITION', default='aws'), + Validator('AWS_REGION', default='us-east-1'), + Validator('AWS_ARN_PARTITION', default='aws'), + Validator('SQLALCHEMY_DATABASE_URI', default='sqlite:///aardvark.db'), + Validator('UPDATER_NUM_THREADS', default=1), + ], +) + +log = logging.getLogger(__name__) + + +def create_config( + aardvark_role: str = "", + swag_bucket: str = "", + swag_filter: str = "", + swag_service_enabled_requirement: str = "", + arn_partition: str = "", + sqlalchemy_database_uri: str = "", + sqlalchemy_track_modifications: bool = False, + num_threads: int = 5, + region: str = "", + filename: str = "settings.yaml", + environment: str = "default", +): + if aardvark_role: + settings.set("aws_rolename", aardvark_role) + if arn_partition: + settings.set("aws_arn_partition", arn_partition) + if region: + settings.set("aws_region", region) + if swag_bucket: + settings.set("swag.bucket", swag_bucket) + if swag_filter: + settings.set("swag.filter", swag_filter) + if swag_service_enabled_requirement: + settings.set( + "swag.service_enabled_requirement", swag_service_enabled_requirement + ) + if sqlalchemy_database_uri: + settings.set("sqlalchemy_database_uri", sqlalchemy_database_uri) + if sqlalchemy_track_modifications: + settings.set("sqlalchemy_track_modifications", sqlalchemy_track_modifications) + if num_threads: + settings.set("updater_num_threads", num_threads) + write_config(filename, environment=environment) + + +def find_legacy_config(): + """Search for config.py in order of preference and return path if it exists, else None""" + CONFIG_PATHS = [ + os.path.join(os.getcwd(), "config.py"), + "/etc/aardvark/config.py", + "/apps/aardvark/config.py", + ] + for path in CONFIG_PATHS: + if os.path.exists(path): + return path + return None + + +def convert_config( + filename: str, + write: bool = False, + output_filename: str = "settings.yaml", + environment: str = "default", +): + """Convert a pre-1.0 config to a YAML config file""" + import importlib.util + + spec = importlib.util.spec_from_file_location("aardvark.config.legacy", filename) + old_config = importlib.util.module_from_spec(spec) + spec.loader.exec_module(old_config) + + try: + settings.set("aws_rolename", old_config.ROLENAME) + except AttributeError: + pass + + try: + settings.set("aws_region", old_config.REGION) + except AttributeError: + pass + + try: + settings.set("aws_arn_partition", old_config.ARN_PARTITION) + except AttributeError: + pass + + try: + settings.set("sqlalchemy_database_uri", old_config.SQLALCHEMY_DATABASE_URI) + except AttributeError: + pass + + try: + settings.set( + "sqlalchemy_track_modifications", old_config.SQLALCHEMY_TRACK_MODIFICATIONS + ) + except AttributeError: + pass + + try: + settings.set("swag.bucket", old_config.SWAG_BUCKET) + except AttributeError: + pass + + try: + settings.set("swag.opts", old_config.SWAG_OPTS) + except AttributeError: + pass + + try: + settings.set("swag.filter", old_config.SWAG_FILTER) + except AttributeError: + pass + + try: + settings.set( + "swag.service_enabled_requirement", + old_config.SWAG_SERVICE_ENABLED_REQUIREMENT, + ) + except AttributeError: + pass + + try: + settings.set("updater_failing_arns", old_config.FAILING_ARNS) + except AttributeError: + pass + + try: + settings.set("updater_num_threads", old_config.NUM_THREADS) + except AttributeError: + pass + + if write: + write_config(output_filename, environment=environment) + + +def open_config(filepath: str): + settings.load_file(filepath) + + +def write_config(filename: str = "settings.yaml", environment: str = "default"): + from dynaconf import loaders + from dynaconf.utils.boxing import DynaBox + + data = settings.as_dict() + log.info("writing config file to %s", filename) + loaders.write(filename, DynaBox(data).to_dict(), env=environment) diff --git a/aardvark/config_default.yaml b/aardvark/config_default.yaml deleted file mode 100644 index 3123e8a..0000000 --- a/aardvark/config_default.yaml +++ /dev/null @@ -1,53 +0,0 @@ -aws: - rolename: Aardvark - region: us-east-1 - arn_partition: aws -updater: - num_threads: 5 -sqlalchemy: - database_uri: sqlite:///./aardvark.db -swag: - bucket: swag-data - opts: - swag.schema_version: 2 - swag.type: s3 - swag.bucket_name: swag-data - swag.data_file: v2/accounts.json - swag.region: us-east-1 - filter: "" - service_enabled_requirement: "" -logging: - disable_existing_loggers: false - formatters: - standard: - format: '%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]' - handlers: - console: - class: logging.StreamHandler - formatter: standard - level: DEBUG - stream: ext://sys.stdout - file: - backupCount: 100 - class: logging.handlers.RotatingFileHandler - encoding: utf8 - filename: aardvark.log - formatter: standard - level: DEBUG - maxBytes: 10485760 - loggers: - aardvark: - handlers: - - file - - console - level: DEBUG - gunicorn: - handlers: - - file - - console - level: DEBUG - asyncio: - handlers: - - console - level: DEBUG - version: 1 diff --git a/aardvark/configuration.py b/aardvark/configuration.py deleted file mode 100644 index 431ebc6..0000000 --- a/aardvark/configuration.py +++ /dev/null @@ -1,61 +0,0 @@ -import configparser - -import confuse - -CONFIG: confuse.Configuration = confuse.Configuration("aardvark", __name__) - - -def create_config( - aardvark_role: str = "", - swag_bucket: str = "", - swag_filter: str = "", - swag_service_enabled_requirement: str = "", - arn_partition: str = "", - sqlalchemy_database_uri: str = "", - sqlalchemy_track_modifications: bool = False, - num_threads: int = 5, - region: str = "", - filename: str = "generated.yaml", -): - if aardvark_role: - CONFIG["aws"]["rolename"] = aardvark_role - if arn_partition: - CONFIG["aws"]["arn_partition"] = arn_partition - if region: - CONFIG["aws"]["region"] = region - if swag_bucket: - CONFIG["swag"]["bucket"] = swag_bucket - if swag_filter: - CONFIG["swag"]["filter"] = swag_filter - if swag_service_enabled_requirement: - CONFIG["swag"]["service_enabled_requirement"] = swag_service_enabled_requirement - if sqlalchemy_database_uri: - CONFIG["sqlalchemy"]["database_uri"] = sqlalchemy_database_uri - if sqlalchemy_track_modifications: - CONFIG["sqlalchemy"]["track_modifications"] = sqlalchemy_track_modifications - if num_threads: - CONFIG["updater"]["num_threads"] = num_threads - with open(filename, "w") as f: - f.write(CONFIG.dump(full=False)) - - -def get_config(component: str) -> configparser.SectionProxy: - """ - Returns a different config set based on the provided environment. - This is used for testing. - """ - return CONFIG[component] - - -def get_config_from_file(filepath: str, component: str) -> configparser.SectionProxy: - """ - This returns an entirely different configuration value, and utilizes the environemnt provided - as an index into that configuration file. This is used via the CLI. - """ - alternative_config = configparser.ConfigParser() - alternative_config.read(filepath) - return alternative_config[component] - - -def open_config(filepath: str): - CONFIG.read(filepath) diff --git a/aardvark/manage.py b/aardvark/manage.py index 3994d88..8458d7a 100644 --- a/aardvark/manage.py +++ b/aardvark/manage.py @@ -1,18 +1,18 @@ import asyncio +import click import logging import os import queue import threading +from typing import List -from flask_script import Command, Manager, Option - -from aardvark import create_app +from aardvark import create_app, init_logging from aardvark.exceptions import AardvarkException -from aardvark.configuration import CONFIG, create_config +from aardvark.config import create_config, convert_config, find_legacy_config from aardvark.persistence.sqlalchemy import SQLAlchemyPersistence from aardvark.retrievers.runner import RetrieverRunner -manager = Manager(create_app) +APP = None log = logging.getLogger("aardvark") ACCOUNT_QUEUE = queue.Queue() @@ -30,17 +30,28 @@ DEFAULT_NUM_THREADS = 5 +def get_app(): + global APP + if not APP: + APP = create_app() + return APP + + +@click.group() +def cli(): + init_logging() + + # All of these default to None rather than the corresponding DEFAULT_* values # so we can tell whether they were passed or not. We don't prompt for any of # the options that were passed as parameters. -@manager.option("-a", "--aardvark-role", dest="aardvark_role_param", type=str) -@manager.option("-b", "--swag-bucket", dest="bucket_param", type=str) -@manager.option("-d", "--db-uri", dest="db_uri_param", type=str) -@manager.option("--num-threads", dest="num_threads_param", type=int) -@manager.option("--no-prompt", dest="no_prompt", action="store_true", default=False) -def config( - aardvark_role_param, bucket_param, db_uri_param, num_threads_param, no_prompt -): +@cli.command("config") +@click.option("--aardvark-role", "-a", type=str) +@click.option("--swag-bucket", "-b", type=str) +@click.option("--db-uri", "-d", type=str) +@click.option("--num-threads", type=int) +@click.option("--no-prompt", is_flag=True, default=False) +def config(aardvark_role, swag_bucket, db_uri, num_threads, no_prompt): """ Creates a config.py configuration file from user input or default values. @@ -69,41 +80,44 @@ def config( """ # We don't set these until runtime. default_db_uri = f"{LOCALDB}:///{os.getcwd()}/{DEFAULT_LOCALDB_FILENAME}" + default_save_file = "settings.local.yaml" if no_prompt: # Just take the parameters as currently constituted. - aardvark_role = aardvark_role_param or DEFAULT_AARDVARK_ROLE - num_threads = num_threads_param or DEFAULT_NUM_THREADS - db_uri = db_uri_param or default_db_uri + aardvark_role = aardvark_role or DEFAULT_AARDVARK_ROLE + num_threads = num_threads or DEFAULT_NUM_THREADS + db_uri = db_uri or default_db_uri # If a swag bucket was specified we set write_swag here so it gets # written out to the config file below. - bucket = bucket_param or DEFAULT_SWAG_BUCKET + bucket = swag_bucket or DEFAULT_SWAG_BUCKET else: # This is essentially the same "param, or input, or default" # structure as the additional parameters below. - if bucket_param: - bucket = bucket_param + if swag_bucket: + bucket = swag_bucket else: print(f"\nAardvark can use SWAG to look up accounts. See {SWAG_REPO_URL}") use_swag = input("Do you use SWAG to track accounts? [yN]: ") if len(use_swag) > 0 and "yes".startswith(use_swag.lower()): - bucket_prompt = f"SWAG_BUCKET [{DEFAULT_SWAG_BUCKET}]: " + bucket_prompt = f"SWAG bucket [{DEFAULT_SWAG_BUCKET}]: " bucket = input(bucket_prompt) or DEFAULT_SWAG_BUCKET else: bucket = "" - aardvark_role_prompt = f"ROLENAME [{DEFAULT_AARDVARK_ROLE}]: " - db_uri_prompt = f"DATABASE URI [{default_db_uri}]: " - num_threads_prompt = f"# THREADS [{DEFAULT_NUM_THREADS}]: " + aardvark_role_prompt = f"Role Name [{DEFAULT_AARDVARK_ROLE}]: " + db_uri_prompt = f"Database URI [{default_db_uri}]: " + num_threads_prompt = f"Worker Count [{DEFAULT_NUM_THREADS}]: " + save_file_prompt = f"Config file location [{default_save_file}]: " aardvark_role = ( - aardvark_role_param or input(aardvark_role_prompt) or DEFAULT_AARDVARK_ROLE + aardvark_role or input(aardvark_role_prompt) or DEFAULT_AARDVARK_ROLE ) - db_uri = db_uri_param or input(db_uri_prompt) or default_db_uri + db_uri = db_uri or input(db_uri_prompt) or default_db_uri num_threads = ( - num_threads_param or input(num_threads_prompt) or DEFAULT_NUM_THREADS + num_threads or input(num_threads_prompt) or DEFAULT_NUM_THREADS ) + save_file = input(save_file_prompt) or default_save_file create_config( aardvark_role=aardvark_role, @@ -114,19 +128,19 @@ def config( sqlalchemy_track_modifications=False, num_threads=num_threads, region="us-east-1", + filename=save_file, ) -@manager.option("-a", "--accounts", dest="accounts", type=str, default="all") -@manager.option("-r", "--arns", dest="arns", type=str, default="all") -def update(accounts, arns): +@cli.command("update") +@click.option("--account", "-a", type=str, default=[], multiple=True) +@click.option("--arn", "-r", type=str, default=[], multiple=True) +def update(account: List[str], arn: List[str]): """ Asks AWS for new Access Advisor information. """ - # The runner will default to all accounts and ARNs if None is passed in - accounts = None if accounts == "all" else accounts.split(",") - arns = None if arns == "all" else arns.split(",") - + accounts = list(account) + arns = list(arn) r = RetrieverRunner() try: asyncio.run(r.run(accounts=accounts, arns=arns)) @@ -137,69 +151,33 @@ def update(accounts, arns): exit(1) -class GunicornServer(Command): - """ - This is the main GunicornServer server, it runs the flask app with gunicorn and - uses any configuration options passed to it. - You can pass all standard gunicorn flags to this command as if you were - running gunicorn itself. - For example: - aardvark start_api -w 4 -b 127.0.0.0:8002 - Will start gunicorn with 4 workers bound to 127.0.0.0:8002 - """ - - description = "Run the app within Gunicorn" - - def get_options(self): - options = [] - try: - from gunicorn.config import make_settings - except ImportError: - # Gunicorn does not yet support Windows. - # See issue #524. https://github.com/benoitc/gunicorn/issues/524 - # For dev on Windows, make this an optional import. - print("Could not import gunicorn, skipping.") - return options - - settings = make_settings() - for setting, klass in settings.items(): - if klass.cli: - if klass.action: - if klass.action == "store_const": - options.append( - Option(*klass.cli, const=klass.const, action=klass.action) - ) - else: - options.append(Option(*klass.cli, action=klass.action)) - else: - options.append(Option(*klass.cli)) - return options - - def run(self, *args, **kwargs): - from gunicorn.app.wsgiapp import WSGIApplication - - app = WSGIApplication() - - app.app_uri = "aardvark:create_app()" - return app.run() - - -@manager.command +@cli.command("drop_db") def drop_db(): - """ Drops the database. """ + """Drops the database.""" SQLAlchemyPersistence().teardown_db() -@manager.command +@cli.command("create_db") def create_db(): - """ Creates the database. """ + """Creates the database.""" SQLAlchemyPersistence().init_db() -def main(): - manager.add_command("start_api", GunicornServer()) - manager.run() +@cli.command("migrate_config") +@click.option("--environment", "-e", type=str, default="default") +@click.option("--config-file", "-c", type=str) +@click.option("--write/--no-write", type=bool, default=True) +@click.option("--output-file", "-o", type=str, default="settings.yaml") +def migrate_config(environment, config_file, write, output_file): + if not config_file: + config_file = find_legacy_config() + convert_config( + config_file, + write=write, + output_filename=output_file, + environment=environment, + ) if __name__ == "__main__": - main() + cli() diff --git a/aardvark/persistence/__init__.py b/aardvark/persistence/__init__.py index ef07ced..701a414 100644 --- a/aardvark/persistence/__init__.py +++ b/aardvark/persistence/__init__.py @@ -1,12 +1,12 @@ from typing import Any, Dict, List, Optional -import confuse +from dynaconf import Dynaconf from aardvark.plugins import AardvarkPlugin class PersistencePlugin(AardvarkPlugin): - def __init__(self, alternative_config: confuse.Configuration = None): + def __init__(self, alternative_config: Dynaconf = None): super().__init__(alternative_config=alternative_config) def init_db(self): diff --git a/aardvark/persistence/sqlalchemy/__init__.py b/aardvark/persistence/sqlalchemy/__init__.py index e529e67..f70b87d 100644 --- a/aardvark/persistence/sqlalchemy/__init__.py +++ b/aardvark/persistence/sqlalchemy/__init__.py @@ -3,7 +3,7 @@ from contextlib import contextmanager from typing import Any, Dict, List, Optional, Union -import confuse +from dynaconf import Dynaconf from sqlalchemy import create_engine, engine from sqlalchemy import func as sa_func from sqlalchemy.exc import SQLAlchemyError @@ -18,58 +18,67 @@ class SQLAlchemyPersistence(PersistencePlugin): - sa_engine: engine = None - session_factory: sessionmaker = None + sa_engine: engine + session_factory: sessionmaker + session: session_type - def __init__( - self, alternative_config: confuse.Configuration = None, initialize: bool = True - ): + def __init__(self, alternative_config: Dynaconf = None, initialize: bool = True): super().__init__(alternative_config=alternative_config) if initialize: self.init_db() def init_db(self): - self.sa_engine = create_engine(self.config["sqlalchemy"]["database_uri"].get()) + self.sa_engine = create_engine(self.config.get("sqlalchemy_database_uri")) self.session_factory = sessionmaker( autocommit=False, autoflush=False, bind=self.sa_engine, expire_on_commit=False, ) - session = self._create_session() - Base.query = session.query_property() + self.session = scoped_session(self.session_factory) + Base.query = self.session.query_property() Base.metadata.create_all(bind=self.sa_engine) def _create_session(self) -> scoped_session: - return scoped_session(self.session_factory) + return self.session() def teardown_db(self): Base.metadata.drop_all(bind=self.sa_engine) def create_iam_object( - self, arn: str, last_updated: datetime.datetime + self, arn: str, last_updated: datetime.datetime, session: session_type = None ) -> AWSIAMObject: - with self.session_scope() as session: + with self.session_scope(session) as session: item = AWSIAMObject(arn=arn, lastUpdated=last_updated) session.add(item) return item @contextmanager - def session_scope(self): + def session_scope(self, session: session_type = None): """Provide a transactional scope around a series of operations.""" - session: session_type = self._create_session() + if not session: + log.debug("creating new SQLAlchemy DB session") + session: session_type = self._create_session() + close_session = True + else: + log.debug("using provided SQLAlchemy DB session") + close_session = False try: yield session log.debug("committing SQLAlchemy DB session") session.commit() except Exception as e: - log.debug("exception caught, rolling back session: %s", e) + log.warning("exception caught, rolling back session: %s", e, exc_info=True) session.rollback() raise finally: - log.debug("closing SQLAlchemy DB session") - session.close() + if close_session: + log.debug("closing SQLAlchemy DB session") + session.close() + self.session.remove() + else: + log.debug("not closing SQLAlchemy DB session") def _combine_results(self, access_advisor_data: Dict[str, Any]) -> Dict[str, Any]: access_advisor_data.pop("page") @@ -109,8 +118,8 @@ def _combine_results(self, access_advisor_data: Dict[str, Any]) -> Dict[str, Any return usage - def store_role_data(self, access_advisor_data: Dict[str, Any]): - with self.session_scope() as session: + def store_role_data(self, access_advisor_data: Dict[str, Any], session: session_type = None): + with self.session_scope(session) as session: if not access_advisor_data: log.warning( "Cannot persist Access Advisor Data as no data was collected." @@ -122,7 +131,7 @@ def store_role_data(self, access_advisor_data: Dict[str, Any]): if arn in arn_cache: item = arn_cache[arn] else: - item = self.get_or_create_iam_object(arn) + item = self.get_or_create_iam_object(arn, session=session) arn_cache[arn] = item for service in data: self.create_or_update_advisor_data( @@ -147,59 +156,61 @@ def get_role_data( ) -> Dict[str, Any]: offset = (page - 1) * count if page else 0 limit = count - session = session or self._create_session() - # default unfiltered query - query = session.query(AWSIAMObject) - - try: - if phrase: - query = query.filter(AWSIAMObject.arn.ilike("%" + phrase + "%")) - - if arns: - query = query.filter( - sa_func.lower(AWSIAMObject.arn).in_([arn.lower() for arn in arns]) - ) - - if regex: - query = query.filter(AWSIAMObject.arn.regexp(regex)) - - total = query.count() + with self.session_scope(session) as session: + # default unfiltered query + query = session.query(AWSIAMObject) - if offset: - query = query.offset(offset) - - if limit: - query = query.limit(limit) + try: + if phrase: + query = query.filter(AWSIAMObject.arn.ilike("%" + phrase + "%")) + + if arns: + query = query.filter( + sa_func.lower(AWSIAMObject.arn).in_( + [arn.lower() for arn in arns] + ) + ) - items = query.all() - except Exception as e: - raise DatabaseException("Could not retrieve roles from database: %s", e) - - if not items: - items = session.query(AWSIAMObject).offset(offset).limit(limit).all() - - values = dict(page=page, total=total, count=len(items)) - for item in items: - item_values = [] - for advisor_data in item.usage: - item_values.append( - dict( - lastAuthenticated=advisor_data.lastAuthenticated, - serviceName=advisor_data.serviceName, - serviceNamespace=advisor_data.serviceNamespace, - lastAuthenticatedEntity=advisor_data.lastAuthenticatedEntity, - totalAuthenticatedEntities=advisor_data.totalAuthenticatedEntities, - lastUpdated=item.lastUpdated, + if regex: + query = query.filter(AWSIAMObject.arn.regexp(regex)) + + total = query.count() + + if offset: + query = query.offset(offset) + + if limit: + query = query.limit(limit) + + items = query.all() + except Exception as e: + raise DatabaseException("Could not retrieve roles from database: %s", e) + + if not items: + items = session.query(AWSIAMObject).offset(offset).limit(limit).all() + + values = dict(page=page, total=total, count=len(items)) + for item in items: + item_values = [] + for advisor_data in item.usage: + item_values.append( + dict( + lastAuthenticated=advisor_data.lastAuthenticated, + serviceName=advisor_data.serviceName, + serviceNamespace=advisor_data.serviceNamespace, + lastAuthenticatedEntity=advisor_data.lastAuthenticatedEntity, + totalAuthenticatedEntities=advisor_data.totalAuthenticatedEntities, + lastUpdated=item.lastUpdated, + ) ) - ) - values[item.arn] = item_values + values[item.arn] = item_values - if combine and total > len(items): - raise CombineException( - "Error: Please specify a count of at least {}.".format(total) - ) - elif combine: - return self._combine_results(values) + if combine and total > len(items): + raise CombineException( + "Error: Please specify a count of at least {}.".format(total) + ) + elif combine: + return self._combine_results(values) return values @@ -213,85 +224,86 @@ def create_or_update_advisor_data( total_authenticated_entities: int, session: session_type = None, ): - session = session or self._create_session() - service_name = service_name[:128] - service_namespace = service_namespace[:64] - item = None - try: - item = ( - session.query(AdvisorData) - .filter(AdvisorData.item_id == item_id) - .filter(AdvisorData.serviceNamespace == service_namespace) - .scalar() - ) - except SQLAlchemyError as e: - log.error( - f"Database error: {e} item_id: {item_id} serviceNamespace: {service_namespace}" - ) - - if not item: - item = AdvisorData( - item_id=item_id, - lastAuthenticated=last_authenticated, - serviceName=service_name, - serviceNamespace=service_namespace, - lastAuthenticatedEntity=last_authenticated_entity, - totalAuthenticatedEntities=total_authenticated_entities, - ) - try: - session.add(item) - except SQLAlchemyError as e: - log.error(f"failed to add AdvisorData item to session: {e}") - raise - return - - # sqlite will return a string for item.lastAuthenticated, so we parse that into a datetime - if isinstance(item.lastAuthenticated, str): - ts = datetime.datetime.strptime( - item.lastAuthenticated, "%Y-%m-%d %H:%M:%S.%f" - ) - else: - ts = item.lastAuthenticated - - if last_authenticated > ts: - item.lastAuthenticated = last_authenticated + with self.session_scope(session) as session: + service_name = service_name[:128] + service_namespace = service_namespace[:64] + item = None try: - session.add(item) + item = ( + session.query(AdvisorData) + .filter(AdvisorData.item_id == item_id) + .filter(AdvisorData.serviceNamespace == service_namespace) + .scalar() + ) except SQLAlchemyError as e: - log.error(f"failed to add AdvisorData item to session: {e}") + log.error( + f"Database error: {e} item_id: {item_id} serviceNamespace: {service_namespace}" + ) raise - elif last_authenticated < ts: - """ - lastAuthenticated is obtained by calling get_service_last_accessed_details() method of the boto3 iam client. - When there is no AA data about a service, the lastAuthenticated key is missing from the returned dictionary. - This is perfectly valid, either because the service in question was not accessed in the past 365 days or - the entity granting access to it was created recently enough that no AA data is available yet (it can take up to - 4 hours for this to happen). - When this happens, the AccountToUpdate._get_job_results() method will set lastAuthenticated to 0. - Usually we don't want to persist such an entity, with one exception: there's already a recorded, non-zero lastAuthenticated - timestamp persisted for this item. That means the service was accessed at some point in time, but now more than 365 passed since - the last access, so AA no longer returns a timestamp for it. - """ - if last_authenticated == 0: - log.warning( - "Previously seen object not accessed in the past 365 days " - "(got null lastAuthenticated from AA). Setting to 0. " - f"Object {item.item_id} service {item.serviceName} previous timestamp {item.lastAuthenticated}" + if not item: + item = AdvisorData( + item_id=item_id, + lastAuthenticated=last_authenticated, + serviceName=service_name, + serviceNamespace=service_namespace, + lastAuthenticatedEntity=last_authenticated_entity, + totalAuthenticatedEntities=total_authenticated_entities, ) - item.lastAuthenticated = 0 try: session.add(item) except SQLAlchemyError as e: log.error(f"failed to add AdvisorData item to session: {e}") raise - else: - log.error( - f"Received an older time than previously seen for object {item.item_id} service {item.serviceName} ({last_authenticated} < {item.lastAuthenticated})!" + return + + # sqlite will return a string for item.lastAuthenticated, so we parse that into a datetime + if isinstance(item.lastAuthenticated, str): + ts = datetime.datetime.strptime( + item.lastAuthenticated, "%Y-%m-%d %H:%M:%S.%f" ) + else: + ts = item.lastAuthenticated + + if last_authenticated > ts: + item.lastAuthenticated = last_authenticated + try: + session.add(item) + except SQLAlchemyError as e: + log.error(f"failed to add AdvisorData item to session: {e}") + raise + + elif last_authenticated < ts: + """ + lastAuthenticated is obtained by calling get_service_last_accessed_details() method of the boto3 iam client. + When there is no AA data about a service, the lastAuthenticated key is missing from the returned dictionary. + This is perfectly valid, either because the service in question was not accessed in the past 365 days or + the entity granting access to it was created recently enough that no AA data is available yet (it can take up to + 4 hours for this to happen). + When this happens, the AccountToUpdate._get_job_results() method will set lastAuthenticated to 0. + Usually we don't want to persist such an entity, with one exception: there's already a recorded, non-zero lastAuthenticated + timestamp persisted for this item. That means the service was accessed at some point in time, but now more than 365 passed since + the last access, so AA no longer returns a timestamp for it. + """ + if last_authenticated == 0: + log.warning( + "Previously seen object not accessed in the past 365 days " + "(got null lastAuthenticated from AA). Setting to 0. " + f"Object {item.item_id} service {item.serviceName} previous timestamp {item.lastAuthenticated}" + ) + item.lastAuthenticated = 0 + try: + session.add(item) + except SQLAlchemyError as e: + log.error(f"failed to add AdvisorData item to session: {e}") + raise + else: + log.error( + f"Received an older time than previously seen for object {item.item_id} service {item.serviceName} ({last_authenticated} < {item.lastAuthenticated})!" + ) - def get_or_create_iam_object(self, arn: str): - with self.session_scope() as session: + def get_or_create_iam_object(self, arn: str, session: session_type = None): + with self.session_scope(session) as session: try: item = ( session.query(AWSIAMObject).filter(AWSIAMObject.arn == arn).scalar() diff --git a/aardvark/plugins/__init__.py b/aardvark/plugins/__init__.py index 6e30904..2616f50 100644 --- a/aardvark/plugins/__init__.py +++ b/aardvark/plugins/__init__.py @@ -1,11 +1,11 @@ -import confuse +from dynaconf import Dynaconf -from aardvark.configuration import CONFIG +from aardvark.config import settings class AardvarkPlugin: - def __init__(self, alternative_config: confuse.Configuration = None): + def __init__(self, alternative_config: Dynaconf = None): if alternative_config: self.config = alternative_config else: - self.config = CONFIG + self.config = settings diff --git a/aardvark/retrievers/__init__.py b/aardvark/retrievers/__init__.py index 1c14a85..6576cfb 100644 --- a/aardvark/retrievers/__init__.py +++ b/aardvark/retrievers/__init__.py @@ -23,7 +23,7 @@ import logging from typing import Any, Dict -import confuse +from dynaconf import Dynaconf from aardvark.plugins import AardvarkPlugin @@ -33,7 +33,7 @@ class RetrieverPlugin(AardvarkPlugin): _name: str - def __init__(self, name: str, alternative_config: confuse.Configuration = None): + def __init__(self, name: str, alternative_config: Dynaconf = None): super().__init__(alternative_config=alternative_config) self._name = name diff --git a/aardvark/retrievers/access_advisor/__init__.py b/aardvark/retrievers/access_advisor/__init__.py index 29dada9..b761edf 100644 --- a/aardvark/retrievers/access_advisor/__init__.py +++ b/aardvark/retrievers/access_advisor/__init__.py @@ -4,7 +4,7 @@ from typing import Any, Dict, Union from asgiref.sync import sync_to_async -import confuse +from dynaconf import Dynaconf from cloudaux.aws.sts import boto3_cached_conn from aardvark.exceptions import AccessAdvisorException @@ -14,7 +14,7 @@ class AccessAdvisorRetriever(RetrieverPlugin): - def __init__(self, alternative_config: confuse.Configuration = None): + def __init__(self, alternative_config: Dynaconf = None): super().__init__("access_advisor", alternative_config=alternative_config) async def _generate_service_last_accessed_details(self, iam_client, arn): @@ -69,10 +69,10 @@ async def run(self, arn: str, data: Dict[str, Any]) -> Dict[str, Any]: account = self._get_account_from_arn(arn) conn_details: Dict[str, str] = { "account_number": account, - "assume_role": self.config["aws"]["rolename"].as_str(), + "assume_role": self.config.get('aws_rolename'), "session_name": "aardvark", - "region": self.config["aws"]["region"].as_str() or "us-east-1", - "arn_partition": self.config["aws"]["arn_partition"].as_str() or "aws", + "region": self.config.get('aws_region', 'us-east-1'), + "arn_partition": self.config('aws_arn_partition', 'aws'), } iam_client = await sync_to_async(boto3_cached_conn)("iam", **conn_details) try: diff --git a/aardvark/retrievers/runner.py b/aardvark/retrievers/runner.py index c20c021..ea4145c 100644 --- a/aardvark/retrievers/runner.py +++ b/aardvark/retrievers/runner.py @@ -4,11 +4,11 @@ from copy import copy from typing import Any, Dict, List -import confuse from asgiref.sync import sync_to_async from botocore.exceptions import ClientError from cloudaux.aws.iam import list_roles, list_users from cloudaux.aws.sts import boto3_cached_conn +from dynaconf import Dynaconf from swag_client import InvalidSWAGDataException from swag_client.backend import SWAGManager from swag_client.util import parse_swag_config_options @@ -20,8 +20,9 @@ from aardvark.retrievers.access_advisor import AccessAdvisorRetriever log = logging.getLogger("aardvark") -sap = SQLAlchemyPersistence() re_account_id = re.compile(r"\d{12}") +EMPTY_QUEUE_DELAY = 1 +EMPTY_QUEUE_RETRIES = 5 class RetrieverRunner(AardvarkPlugin): @@ -31,24 +32,29 @@ class RetrieverRunner(AardvarkPlugin): account_queue: asyncio.Queue arn_queue: asyncio.Queue results_queue: asyncio.Queue + failure_queue: asyncio.Queue failed_arns: List[str] tasks: List[asyncio.Future] num_workers: int swag: SWAGManager - swag_config: confuse.ConfigView + swag_config: Dict[str, str] + accounts_complete: bool + persistence: SQLAlchemyPersistence def __init__( self, - alternative_config: confuse.Configuration = None, + alternative_config: Dynaconf = None, ): super().__init__(alternative_config=alternative_config) self.tasks = [] self.retrievers = [] self.failed_arns = [] - self.num_workers = self.config["updater"]["num_threads"].as_number() - self.swag_config = self.config["swag"] - swag_opts = parse_swag_config_options(self.swag_config["opts"].get()) + self.num_workers = self.config.get("updater_num_threads") + self.swag_config = self.config.get("swag") + swag_opts = parse_swag_config_options(self.swag_config["opts"]) self.swag = SWAGManager(**swag_opts) + self.accounts_complete = False + self.persistence = SQLAlchemyPersistence(alternative_config=alternative_config) def register_retriever(self, r: RetrieverPlugin): """Add a retriever instance to be called during the run process.""" @@ -70,22 +76,23 @@ async def _run_retrievers(self, arn: str) -> Dict[str, Any]: try: data = await r.run(arn, data) except Exception as e: - log.error(f"failed to run {r} on ARN {arn}") + log.error("failed to run %s on ARN %s", r, arn) raise RetrieverException from e return data async def _retriever_loop(self, name: str): """Loop to consume from self.arn_queue and call the retriever runner function.""" - log.debug(f"creating {name}") + log.debug("creating %s", name) while True: log.debug("getting arn from queue") arn = await self.arn_queue.get() - log.debug(f"{name} retrieving data for {arn}") + log.debug("%s retrieving data for %s", name, arn) try: data = await self._run_retrievers(arn) except Exception as e: - log.error(f"failed to run retriever on ARN {arn}: {e}") + log.exception("failed to run retriever on ARN %s: %s", arn, str(e)) self.failed_arns.append(arn) + await self.failure_queue.put(arn) self.arn_queue.task_done() continue # TODO: handle nested data from retrievers in persistence layer @@ -94,23 +101,27 @@ async def _retriever_loop(self, name: str): async def _results_loop(self, name: str): """Loop to consume from self.results_queue and handle results.""" - log.debug(f"creating {name}") + log.debug("creating %s", name) while True: data = await self.results_queue.get() - log.debug(f"{name} storing results for {data['arn']}") - await sync_to_async(sap.store_role_data)( - {data["arn"]: data["access_advisor"]} - ) + log.debug("%s storing results for %s", name, data['arn']) + try: + await sync_to_async(self.persistence.store_role_data)( + {data["arn"]: data["access_advisor"]} + ) + except Exception as e: + log.exception("exception occurred in results loop: %s", str(e)) + await self.failure_queue.put(data) self.results_queue.task_done() async def _get_arns_for_account(self, account: str): """Retrieve ARNs for roles, users, policies, and groups in an account and add them to the ARN queue.""" conn_details: Dict[str, str] = { "account_number": account, - "assume_role": self.config["aws"]["rolename"].as_str(), + "assume_role": self.config.get("aws_rolename"), "session_name": "aardvark", - "region": self.config["aws"]["region"].as_str() or "us-east-1", - "arn_partition": self.config["aws"]["arn_partition"].as_str() or "aws", + "region": self.config.get("aws_region", "us-east-1"), + "arn_partition": self.config.get("aws_arn_partition", "aws"), } client = await sync_to_async(boto3_cached_conn)( "iam", service_type="client", **conn_details @@ -136,28 +147,33 @@ async def _arn_lookup_loop(self, name: str): """Loop to consume from self.account_queue to retrieve and enqueue ARNs for each account.""" log.debug(f"creating {name}") while True: + log.debug("getting account from queue") account = await self.account_queue.get() log.debug(f"{name} retrieving ARNs for {account}") - await self._get_arns_for_account(account) + try: + await self._get_arns_for_account(account) + except Exception as e: + log.exception("exception occurred in arn lookup loop: %s", str(e)) + await self.failure_queue.put(account) self.account_queue.task_done() async def _get_swag_accounts(self) -> List[Dict]: """Retrieve AWS accounts from SWAG based on the SWAG options in the application configuration.""" log.debug("getting accounts from SWAG") try: - all_accounts: List[Dict] = self.swag.get_all( - self.swag_config["filter"].get() - ) - swag_service = self.swag_config["service_enabled_requirement"].get() + all_accounts: List[Dict] = self.swag.get_all(self.swag_config["filter"]) + swag_service = self.swag_config["service_enabled_requirement"] if swag_service: all_accounts = await sync_to_async(self.swag.get_service_enabled)( swag_service, accounts_list=all_accounts ) + else: + all_accounts = await sync_to_async(self.swag.get_all)(search_filter=self.swag_config["filter"]) except (KeyError, InvalidSWAGDataException, ClientError) as e: log.error( - f"Account names passed but SWAG not configured or unavailable: {e}" + "account names passed but SWAG not configured or unavailable: %s", str(e) ) - raise RetrieverException("Could not retrieve SWAG data") from e + raise RetrieverException("could not retrieve SWAG data") from e return all_accounts @@ -202,10 +218,10 @@ async def _queue_accounts(self, account_names: List[str]): def cancel(self): """Send a cancel signal to all running workers.""" - log.info("Stopping runner tasks") + log.info("stopping runner tasks") for task in self.tasks: task.cancel() - log.info(f"Task {task} canceled") + log.info("task %s canceled", task) async def run(self, accounts: List[str] = None, arns: List[str] = None): """Prep account queue and kick off ARN lookup, retriever, and results workers. @@ -223,6 +239,7 @@ async def run(self, accounts: List[str] = None, arns: List[str] = None): self.arn_queue = asyncio.Queue() self.account_queue = asyncio.Queue() self.results_queue = asyncio.Queue() + self.failure_queue = asyncio.Queue() lookup_accounts = True if arns: @@ -258,4 +275,8 @@ async def run(self, accounts: List[str] = None, arns: List[str] = None): # Clean up our workers self.cancel() + while not self.failure_queue.empty(): + failure = await self.failure_queue.get() + log.error("failure: %s", failure) + await asyncio.gather(*self.tasks, return_exceptions=True) diff --git a/docker-compose.yml b/docker-compose.yml index 9fc52db..7ad25f7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,6 +7,7 @@ services: image: netflixoss/aardvark:latest volumes: - data:/data + - ./settings.local.yaml.bak:/etc/aardvark/settings.local.yaml.bak env_file: .env command: [ "aardvark", "create_db" ] @@ -19,8 +20,9 @@ services: - 5000:5000 volumes: - data:/data + - ./settings.local.yaml.bak:/etc/aardvark/settings.local.yaml.bak env_file: .env - command: [ "aardvark", "start_api", "-b", "0.0.0.0:5000" ] + command: [ "flask", "run", "-h", "0.0.0.0", "-p", "5000" ] collector: build: . @@ -30,8 +32,11 @@ services: restart: always volumes: - data:/data + - ./settings.local.yaml.bak:/etc/aardvark/settings.local.yaml.bak env_file: .env - command: [ "aardvark", "update", "-a", "$AARDVARK_ACCOUNTS" ] + command: [ "aardvark", "update" ] + # If you're not using SWAG, you'll need to specify one or more accounts here: + # command: [ "aardvark", "update", "-a", "123456789012", "-a", "234567890123" ] volumes: data: diff --git a/requirements-test.in b/requirements-test.in index ddf6630..48d753c 100644 --- a/requirements-test.in +++ b/requirements-test.in @@ -1,5 +1,3 @@ -cryptography flake8 -moto pytest pytest-asyncio diff --git a/requirements-test.txt b/requirements-test.txt index 7ddfc6f..578c09b 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,51 +1,18 @@ # -# This file is autogenerated by pip-compile +# This file is autogenerated by pip-compile with python 3.9 # To update, run: # # pip-compile --no-emit-index-url --output-file=requirements-test.txt requirements-test.in # attrs==21.2.0 # via pytest -boto3==1.17.94 - # via moto -botocore==1.20.94 - # via - # boto3 - # moto - # s3transfer -certifi==2021.5.30 - # via requests -cffi==1.14.5 - # via cryptography -chardet==4.0.0 - # via requests -cryptography==3.4.7 - # via - # -r requirements-test.in - # moto flake8==3.9.2 # via -r requirements-test.in -idna==2.10 - # via requests iniconfig==1.1.1 # via pytest -jinja2==3.0.1 - # via moto -jmespath==0.10.0 - # via - # boto3 - # botocore -markupsafe==2.0.1 - # via - # jinja2 - # moto mccabe==0.6.1 # via flake8 -more-itertools==8.8.0 - # via moto -moto==2.0.9 - # via -r requirements-test.in -packaging==20.9 +packaging==21.0 # via pytest pluggy==0.13.1 # via pytest @@ -53,48 +20,15 @@ py==1.10.0 # via pytest pycodestyle==2.7.0 # via flake8 -pycparser==2.20 - # via cffi pyflakes==2.3.1 # via flake8 pyparsing==2.4.7 # via packaging -pytest-asyncio==0.15.1 - # via -r requirements-test.in pytest==6.2.4 # via # -r requirements-test.in # pytest-asyncio -python-dateutil==2.8.1 - # via - # botocore - # moto -pytz==2021.1 - # via moto -requests==2.25.1 - # via - # moto - # responses -responses==0.13.3 - # via moto -s3transfer==0.4.2 - # via boto3 -six==1.16.0 - # via - # moto - # python-dateutil - # responses +pytest-asyncio==0.15.1 + # via -r requirements-test.in toml==0.10.2 # via pytest -urllib3==1.26.5 - # via - # botocore - # requests - # responses -werkzeug==2.0.1 - # via moto -xmltodict==0.12.0 - # via moto - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements.in b/requirements.in index 99cec23..c935103 100644 --- a/requirements.in +++ b/requirements.in @@ -1,11 +1,12 @@ -SQLAlchemy -Flask +asgiref blinker +bunch +click cloudaux confuse -bunch +dynaconf flasgger +flask +SQLAlchemy swag_client -flask_script -gunicorn -asgiref \ No newline at end of file +toml diff --git a/requirements.txt b/requirements.txt index 1d18bbd..b98a142 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,38 +1,39 @@ # -# This file is autogenerated by pip-compile +# This file is autogenerated by pip-compile with python 3.9 # To update, run: # # pip-compile --no-emit-index-url --output-file=requirements.txt requirements.in # -asgiref==3.3.4 +asgiref==3.4.1 # via -r requirements.in attrs==21.2.0 # via jsonschema blinker==1.4 # via -r requirements.in -boto3==1.17.94 +boto==2.49.0 + # via cloudaux +boto3==1.18.3 # via # cloudaux # swag-client -boto==2.49.0 - # via cloudaux -botocore==1.20.94 +botocore==1.21.3 # via # boto3 # cloudaux # s3transfer bunch==1.0.1 # via -r requirements.in -click-log==0.3.2 - # via swag-client click==8.0.1 # via + # -r requirements.in # click-log # flask # swag-client +click-log==0.3.2 + # via swag-client cloudaux==1.9.4 # via -r requirements.in -confuse==1.4.0 +confuse==1.5.0 # via -r requirements.in decorator==5.0.9 # via dogpile.cache @@ -42,21 +43,18 @@ defusedxml==0.7.1 # via cloudaux dogpile.cache==1.1.3 # via swag-client +dynaconf==3.1.4 + # via -r requirements.in flagpole==1.1.1 # via cloudaux flasgger==0.9.5 # via -r requirements.in -flask-script==2.0.6 - # via -r requirements.in flask==2.0.1 # via # -r requirements.in # flasgger - # flask-script greenlet==1.1.0 # via sqlalchemy -gunicorn==20.1.0 - # via -r requirements.in inflection==0.5.1 # via cloudaux itsdangerous==2.0.1 @@ -74,7 +72,7 @@ jsonschema==3.2.0 # via flasgger markupsafe==2.0.1 # via jinja2 -marshmallow==3.12.1 +marshmallow==3.12.2 # via swag-client mistune==0.8.4 # via flasgger @@ -82,9 +80,9 @@ ordered-set==4.0.2 # via deepdiff pbr==5.6.0 # via stevedore -pyrsistent==0.17.3 +pyrsistent==0.18.0 # via jsonschema -python-dateutil==2.8.1 +python-dateutil==2.8.2 # via botocore pyyaml==5.4.1 # via @@ -92,9 +90,9 @@ pyyaml==5.4.1 # flasgger retrying==1.3.3 # via swag-client -s3transfer==0.4.2 +s3transfer==0.5.0 # via boto3 -simplejson==3.17.2 +simplejson==3.17.3 # via swag-client six==1.16.0 # via @@ -103,7 +101,7 @@ six==1.16.0 # jsonschema # python-dateutil # retrying -sqlalchemy==1.4.18 +sqlalchemy==1.4.21 # via -r requirements.in stevedore==3.3.0 # via dogpile.cache @@ -111,7 +109,9 @@ swag-client==3.0.0 # via -r requirements.in tabulate==0.8.9 # via swag-client -urllib3==1.26.5 +toml==0.10.2 + # via -r requirements.in +urllib3==1.26.6 # via botocore werkzeug==2.0.1 # via flask diff --git a/settings.yaml b/settings.yaml new file mode 100644 index 0000000..e8dac13 --- /dev/null +++ b/settings.yaml @@ -0,0 +1,35 @@ +--- +default: + AWS_ARN_PARTITION: aws + AWS_REGION: us-east-1 + AWS_ROLENAME: Aardvark + SQLALCHEMY_DATABASE_URI: "sqlite:///:memory:" + SQLALCHEMY_TRACK_MODIFICATIONS: false + UPDATER_NUM_THREADS: 1 + SWAG: + bucket: "swag-bucket" + filter: "" + service_enabled_requirement: "" + opts: + swag.schema_version: 2 + swag.type: "s3" + swag.bucket_name: "swag-bucket" + swag.data_file: "v2/accounts.json" + swag.region: "us-east-1" +testing: + AWS_ARN_PARTITION: test + AWS_REGION: test + AWS_ROLENAME: test + SQLALCHEMY_DATABASE_URI: "sqlite:///:memory:" + SQLALCHEMY_TRACK_MODIFICATIONS: false + UPDATER_NUM_THREADS: 1 + SWAG: + bucket: "swag-bucket" + filter: "mock swag filter" + service_enabled_requirement: "glowcloud" + opts: + swag.schema_version: 2 + swag.type: "s3" + swag.bucket_name: "swag-bucket" + swag.data_file: "v2/accounts.json" + swag.region: "us-east-1" diff --git a/setup.py b/setup.py index c174393..1950c24 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,6 @@ name: aardvark description: Multi-Account AWS IAM Access Advisor API author: Patrick Kelley, Travis McPeak, Patrick Sanders -maintainer: Patrick Sanders contact: aardvark-maintainers@netflix.com """ from setuptools import setup @@ -12,5 +11,5 @@ python_requires="~=3.8", versioning="dev", setup_requires="setupmeta", - entry_points={"console_scripts": ["aardvark = aardvark.manage:main"]}, + entry_points={"console_scripts": ["aardvark = aardvark.manage:cli"]}, ) diff --git a/test/conftest.py b/test/conftest.py index 494af80..cb3cc0e 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,35 +1,39 @@ -import confuse import pytest +from dynaconf import Dynaconf -import aardvark.configuration +import aardvark.config +from aardvark.config import settings +from aardvark import init_logging + + +init_logging() + +@pytest.fixture(scope="session", autouse=True) +def set_test_settings(): + settings.configure(FORCE_ENV_FOR_DYNACONF="testing") @pytest.fixture def temp_config_file(tmp_path): - config_path = tmp_path / "config.yaml" + config_path = tmp_path / "settings.yaml" return str(config_path) -@pytest.fixture(autouse=True) +@pytest.fixture(autouse=True, scope="function") def patch_config(monkeypatch, tmp_path): - config = confuse.Configuration("aardvark-test", read=False) - db_path = tmp_path / "aardvark-test.db" db_uri = f"sqlite:///{db_path}" - config["sqlalchemy"]["database_uri"] = str(db_uri) - log_path = tmp_path / "aardvark-test.log" - config["logging"]["handlers"]["file"]["filename"] = str(log_path) + config = Dynaconf( + envvar_prefix="AARDVARK", + settings_files=[ + "test/settings.yaml", + ], + environments=True, + ) + config.configure(FORCE_ENV_FOR_DYNACONF="testing") + config.set("sqlalchemy_database_uri", db_uri) # Monkeypatch the actual config object so we don't poison it for future tests - monkeypatch.setattr(aardvark.configuration, "CONFIG", config) + monkeypatch.setattr(aardvark.config, "settings", config) yield config - - -@pytest.fixture -def mock_config(patch_config): - patch_config["updater"]["num_threads"] = 1 - patch_config["swag"]["opts"] = {} - patch_config["swag"]["filter"] = "" - patch_config["swag"]["service_enabled_requirement"] = "" - return patch_config diff --git a/test/persistence/test_sqlalchemy.py b/test/persistence/test_sqlalchemy.py index 67097b4..eca79d2 100644 --- a/test/persistence/test_sqlalchemy.py +++ b/test/persistence/test_sqlalchemy.py @@ -1,7 +1,7 @@ import datetime -import confuse import pytest +from dynaconf.utils import DynaconfDict from sqlalchemy.exc import OperationalError from aardvark.persistence import PersistencePlugin @@ -34,8 +34,12 @@ @pytest.fixture(scope="function") def temp_sqlite_db_config(): db_uri = "sqlite:///:memory:" - custom_config = confuse.Configuration("aardvark_test", __name__, read=False) - custom_config["sqlalchemy"]["database_uri"] = db_uri + custom_config = DynaconfDict( + { + "sqlalchemy": {"database_uri": str(db_uri)}, + } + ) + custom_config["sqlalchemy_database_uri"] = db_uri return custom_config @@ -43,19 +47,17 @@ def test_sqlalchemypersistence(): sap = SQLAlchemyPersistence() assert isinstance(sap, AardvarkPlugin) assert isinstance(sap, PersistencePlugin) - assert sap.config is not None - assert isinstance(sap.config, confuse.Configuration) + assert sap.config def test_sqlalchemypersistence_custom_config(): - custom_config = confuse.Configuration("aardvark", __name__) + custom_config = DynaconfDict({"test_key": "test_value"}) custom_config["test_key"] = "test_value" sap = SQLAlchemyPersistence(alternative_config=custom_config, initialize=False) assert isinstance(sap, AardvarkPlugin) assert isinstance(sap, PersistencePlugin) assert sap.config - assert isinstance(sap.config, confuse.Configuration) - assert sap.config["test_key"].get() == "test_value" + assert sap.config["test_key"] == "test_value" def test_init_db(temp_sqlite_db_config): diff --git a/test/retrievers/conftest.py b/test/retrievers/conftest.py index d72ae3d..0876e19 100644 --- a/test/retrievers/conftest.py +++ b/test/retrievers/conftest.py @@ -2,14 +2,14 @@ import pytest from typing import Any, Dict -import confuse +from dynaconf import Dynaconf from aardvark.retrievers import RetrieverPlugin from aardvark.retrievers.runner import RetrieverRunner class RetrieverStub(RetrieverPlugin): - def __init__(self, alternative_config: confuse.Configuration = None): + def __init__(self, alternative_config: Dynaconf = None): super().__init__("retriever_stub", alternative_config=alternative_config) async def run(self, arn: str, data: Dict[str, Any]) -> Dict[str, Any]: @@ -18,7 +18,7 @@ async def run(self, arn: str, data: Dict[str, Any]) -> Dict[str, Any]: class FailingRetriever(RetrieverPlugin): - def __init__(self, alternative_config: confuse.Configuration = None): + def __init__(self, alternative_config: Dynaconf = None): super().__init__("retriever_stub", alternative_config=alternative_config) async def run(self, arn: str, data: Dict[str, Any]) -> Dict[str, Any]: diff --git a/test/retrievers/test_access_advisor.py b/test/retrievers/test_access_advisor.py index 0ef8583..b6dbdea 100644 --- a/test/retrievers/test_access_advisor.py +++ b/test/retrievers/test_access_advisor.py @@ -193,7 +193,9 @@ def test_run(mock_boto3_cached_conn, event_loop, arn, data, expected): @pytest.mark.parametrize("arn,data,expected", [("arn", {}, {})]) @patch("aardvark.retrievers.access_advisor.boto3_cached_conn") -def test_run_missing_arn(mock_boto3_cached_conn, event_loop, arn, data, expected): +def test_run_missing_arn( + mock_boto3_cached_conn, event_loop, arn, data, expected +): mock_iam_client = MagicMock() mock_iam_client.exceptions.NoSuchEntityException = Exception mock_iam_client.generate_service_last_accessed_details.side_effect = ( diff --git a/test/retrievers/test_runner.py b/test/retrievers/test_runner.py index 833a7f7..466708a 100644 --- a/test/retrievers/test_runner.py +++ b/test/retrievers/test_runner.py @@ -57,12 +57,17 @@ async def test_retriever_loop_failure(runner, mock_failing_retriever): runner.arn_queue = arn_queue results_queue = asyncio.Queue() runner.results_queue = results_queue + failure_queue = asyncio.Queue() + runner.failure_queue = failure_queue task = asyncio.create_task(runner._retriever_loop("")) await arn_queue.join() task.cancel() assert len(runner.failed_arns) == 1 assert runner.failed_arns[0] == "abc123" assert runner.results_queue.empty() + assert not runner.failure_queue.empty() + failed = await runner.failure_queue.get() + assert failed == "abc123" @pytest.mark.asyncio @@ -72,13 +77,12 @@ async def test_results_loop(runner, mock_retriever): await results_queue.put({"arn": "abc123", "access_advisor": {"access": "advised"}}) runner.results_queue = results_queue expected = {"abc123": {"access": "advised"}} - with patch("aardvark.retrievers.runner.sap") as sap: - sap.store_role_data = MagicMock() - task = asyncio.create_task(runner._results_loop("")) - await runner.results_queue.join() - task.cancel() - sap.store_role_data.assert_called() - sap.store_role_data.assert_called_with(expected) + runner.persistence.store_role_data = MagicMock() + task = asyncio.create_task(runner._results_loop("")) + await runner.results_queue.join() + task.cancel() + runner.persistence.store_role_data.assert_called() + runner.persistence.store_role_data.assert_called_with(expected) @patch("aardvark.retrievers.runner.boto3_cached_conn") @@ -136,12 +140,9 @@ async def test_arn_lookup_loop(mock_get_arns_for_account, runner): @pytest.mark.asyncio -async def test_get_swag_accounts(mock_config): - mock_config["swag"]["opts"] = {} - mock_config["swag"]["filter"] = "mock swag filter" - mock_config["swag"]["service_enabled_requirement"] = "glowcloud" +async def test_get_swag_accounts(): swag_response = {"foo": "bar"} - runner = RetrieverRunner(alternative_config=mock_config) + runner = RetrieverRunner() runner.swag = MagicMock() runner.swag.get_all.return_value = swag_response runner.swag.get_service_enabled.return_value = swag_response @@ -154,12 +155,9 @@ async def test_get_swag_accounts(mock_config): @pytest.mark.asyncio -async def test_get_swag_accounts_failure(mock_config): - mock_config["swag"]["opts"] = {} - mock_config["swag"]["filter"] = "mock swag filter" - mock_config["swag"]["service_enabled_requirement"] = "glowcloud" +async def test_get_swag_accounts_failure(): swag_response = {"foo": "bar"} - runner = RetrieverRunner(alternative_config=mock_config) + runner = RetrieverRunner() runner.swag = MagicMock() runner.swag.get_all.side_effect = InvalidSWAGDataException runner.swag.get_service_enabled.return_value = swag_response @@ -224,8 +222,8 @@ async def test_queue_arns(runner): @pytest.mark.asyncio -async def test_run(mock_config): - runner = RetrieverRunner(alternative_config=mock_config) +async def test_run(): + runner = RetrieverRunner() runner._queue_accounts = AsyncMock() runner._queue_arns = AsyncMock() runner._queue_all_accounts = AsyncMock() @@ -243,8 +241,8 @@ async def test_run(mock_config): @pytest.mark.asyncio -async def test_run_with_accounts(mock_config): - runner = RetrieverRunner(alternative_config=mock_config) +async def test_run_with_accounts(): + runner = RetrieverRunner() runner._queue_accounts = AsyncMock() runner._queue_arns = AsyncMock() runner._queue_all_accounts = AsyncMock() @@ -262,8 +260,8 @@ async def test_run_with_accounts(mock_config): @pytest.mark.asyncio -async def test_run_with_arns(mock_config): - runner = RetrieverRunner(alternative_config=mock_config) +async def test_run_with_arns(): + runner = RetrieverRunner() runner._queue_accounts = AsyncMock() runner._queue_arns = AsyncMock() runner._queue_all_accounts = AsyncMock() diff --git a/test/test_config.py b/test/test_config.py index 279c4ca..1e9f59a 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -1,6 +1,6 @@ import yaml -from aardvark.configuration import create_config +from aardvark.config import create_config def test_create_config(temp_config_file): @@ -15,17 +15,18 @@ def test_create_config(temp_config_file): num_threads=99, region="us-underground-5", filename=temp_config_file, + environment="testtesttest", ) with open(temp_config_file, "r") as f: file_data = yaml.safe_load(f) - assert file_data["aws"]["rolename"] == "role" - assert file_data["aws"]["region"] == "us-underground-5" - assert file_data["aws"]["arn_partition"] == "aws" - assert file_data["swag"]["bucket"] == "bucket" - assert file_data["swag"]["filter"] == "filter" - assert file_data["swag"]["service_enabled_requirement"] == "service" - assert file_data["updater"]["num_threads"] == 99 - assert file_data["sqlalchemy"]["database_uri"] == "sqlite://////////////hi.db" - assert file_data["sqlalchemy"]["track_modifications"] + assert file_data["testtesttest"]["AWS_ROLENAME"] == "role" + assert file_data["testtesttest"]["AWS_REGION"] == "us-underground-5" + assert file_data["testtesttest"]["AWS_ARN_PARTITION"] == "aws" + assert file_data["testtesttest"]["SWAG"]["bucket"] == "bucket" + assert file_data["testtesttest"]["SWAG"]["filter"] == "filter" + assert file_data["testtesttest"]["SWAG"]["service_enabled_requirement"] == "service" + assert file_data["testtesttest"]["UPDATER_NUM_THREADS"] == 99 + assert file_data["testtesttest"]["SQLALCHEMY_DATABASE_URI"] == "sqlite://////////////hi.db" + assert file_data["testtesttest"]["SQLALCHEMY_TRACK_MODIFICATIONS"] is True