Skip to content
This repository has been archived by the owner on Jan 30, 2019. It is now read-only.

Add support for creating envs from requirements.txt files via pip #172

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
2 changes: 2 additions & 0 deletions conda.recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ source:
requirements:
build:
- python
- pip
run:
- python
- pip

test:
commands:
Expand Down
2 changes: 2 additions & 0 deletions conda_env/cli/main_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ def execute(args, parser):
# FIXME conda code currently requires args to have a name or prefix
if args.name is None:
args.name = env.name
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't fully understand what's going on here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this the environment won't get a name, because requirements.txt files cannot contain the name of the environment.

env.name = args.name

except exceptions.SpecNotFound as e:
common.error_and_exit(str(e), json=args.json)
Expand Down
90 changes: 84 additions & 6 deletions conda_env/env.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
from __future__ import absolute_import, print_function

import errno
import os
import subprocess
from collections import OrderedDict
from copy import copy
import os
from io import open
from os.path import isdir
from shutil import rmtree

# Try to import PipSession to support new pips
try:
from pip.download import PipSession
except ImportError:
pass
from pip.req import parse_requirements

# TODO This should never have to import from conda.cli
from conda.cli import common
from conda.cli import main_list
from conda import install
from conda.api import get_index
from conda.cli import common, main_list
from conda.resolve import NoPackagesFound, Resolve, MatchSpec

from . import compat
from . import exceptions
Expand Down Expand Up @@ -50,19 +64,83 @@ def from_environment(name, prefix, no_builds=False):


def from_yaml(yamlstr, **kwargs):
"""Load and return a ``Environment`` from a given ``yaml string``"""
"""Load and return an ``Environment`` from a given ``yaml string``"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice catch!

data = yaml.load(yamlstr)
if kwargs is not None:
for key, value in kwargs.items():
data[key] = value
return Environment(**data)


def get_all_pip_dependencies(requirements_path, temp_dir='_delete_when_done'):
"""
Use pip to gather all of the packages that would be installed for the given
requirements.txt file.
"""
pip_cmd = ('pip', 'install', '--src', temp_dir, '--download', temp_dir,
'--no-use-wheel', '-r', requirements_path)
try:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment elsewhere. Please use isdir to check if dir exists, then create if necessary rather than try/except.

os.makedirs(temp_dir)
except OSError as exc:
# Don't raise exception if directory already exists
if not (exc.errno == errno.EEXIST and isdir(temp_dir)):
raise
process = subprocess.Popen(pip_cmd, stdout=subprocess.PIPE,
universal_newlines=True)
stdout_data = process.communicate()[0]
reqs = []
for line in stdout_data.splitlines():
req = ''
if line.startswith('Collecting'):
req = line.split(' (', 1)[0][11:]
elif line.startswith('Obtaining'):
req = line.split(' (', 1)[0][10:]
if req:
if ' from ' in req:
req = '-e {}'.format(req.split(' from ', 1)[1])
reqs.append(req)
# Remove temporary directory
rmtree(temp_dir)
return reqs


def from_requirements_txt(filename, **kwargs):
"""Load and return an ``Environment`` from a given ``requirements.txt``"""
pip_reqs = []
dep_list = []
r = Resolve(get_index())
parsed_reqs = get_all_pip_dependencies(filename)
for req in parsed_reqs:
if req.startswith('-e'):
pip_reqs.append(req)
# If it's not an editable package, check if it's available via conda
else:
try:
# If package is available via conda, use that
r.get_pkgs(MatchSpec(common.arg2spec(req)))
dep_list.append(req)
except NoPackagesFound:
# Otherwise, just use pip
pip_reqs.append(req)
# Add pip requirements to environment if there were any left
if pip_reqs:
dep_list.append('pip')
dep_list.append({'pip': pip_reqs})
data = {'dependencies': dep_list}
if kwargs is not None:
for key, value in kwargs.items():
data[key] = value
return Environment(**data)


def from_file(filename):
if not os.path.exists(filename):
raise exceptions.EnvironmentFileNotFound(filename)
with open(filename, 'rb') as fp:
return from_yaml(fp.read(), filename=filename)
if filename.endswith('.txt'):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems a little too broad of a catch-all. Since the standard name is requirements.txt, would it be OK to match just that? Otherwise, I can see this leading to some interesting bug hunts.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like people frequently have things like dev-requirements.txt and test-requirements.txt, so maybe we could do:

if filename.endswith('requirements.txt'):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I'd like to match *requirements*.txt so that files like requirements_test.txt will work too.

return from_requirements_txt(filename)
else:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, rather than a catch-all else statement, replace with the standard for yaml input (I think this is environment.yaml?)

If we don't match standard names, then we need to fall out, or provide a way for people to manually specify what kind of file they are providing. This means modifying this function to accept another argument to specify this.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like checking for specific file names is a little overly strict. If people are passing in file names as arguments part of conda env create -n foo -f FILENAME, it seems strange to restrict what the filenames must be. Why not just assume that .yml or .yaml files should be loaded as such, and .txt files should be treated as pip/setuptools requirements files?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine, until someone else defines a new, shiny format for their package management tool that also happens to use yaml. Making it explicit outside of standard filenames is safer over the long haul. I think your idea of "endswith" is a good compromise between flexibility and prevention of future unexpected failure.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The --file argument is making it explicit. The user is explicitly saying, "This is the file that defines my environment." The default value for that argument is environment.yml, but anything else that is assigned to it is obviously what the person wants to create their environment from. If in the future someone comes up with a new package format that use YAML that they would like to be supported by conda env, they're going to need to create pull request to add it anyway, so I don't see how just checking based on the extension right now (and raising an error if the extension isn't in {'txt', 'yml', 'yaml'}) is dangerous in any way.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not dangerous, but the error messages that someone will encounter in the future will not be helpful. All I am really asking is that you provide a helpful error message if loading fails. Loading should never fail, so long as files follow known standards (environment.yml and requirements.txt). YAML and txt as file extensions happen to currently capture the current range of possibilities. When someone adds something new (and you know they will, and I hope they use YAML, because I think it is technically good), I'd like users to meet a friendly error message along the lines of:

The specified file is not recognized as a conda-env input.  Conda-env currently support conda-env's environment.yaml specification, and pip's requirements.txt specification.  Please file an issue for the new format you're trying to use at https://github.com/conda/conda-env/issues

Rather than something obscure like:
KeyError: ...

with open(filename, 'rb') as fp:
return from_yaml(fp.read(), filename=filename)


# TODO test explicitly
Expand Down
11 changes: 10 additions & 1 deletion conda_env/installers/pip.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
from __future__ import absolute_import

import subprocess
from itertools import chain
from os.path import join

import conda.config as config
from conda.cli import main_list


def install(prefix, specs, args, env):
pip_cmd = main_list.pip_args(prefix) + ['install', ] + specs
# This mess is necessary to get --editable package sent to subprocess
# properly.
specs = list(chain(*[s.split() if '-e' in s else [s] for s in specs]))
# Directory where pip will store VCS checkouts for editable packages
src_dir = join(config.envs_dirs[0], env.name, 'src')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like config.envx_dirs[0] is the location of the root environment.
how is this gonna work when we want to install into some other environment? (like the one we have just created)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

config.envs_dirs[0] should be $CONDA_ROOT/envs, and I'm joining that with the name of the environment and src, so the final path will be $CONDA_ROOT/envs/$CONDA_ENV/src. That's exactly what we want.

pip_cmd = main_list.pip_args(prefix) + ['install', '--src', src_dir] + specs
process = subprocess.Popen(pip_cmd, universal_newlines=True)
process.communicate()
5 changes: 5 additions & 0 deletions tests/support/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Comment to be ignored
skll
# Comment 2
simplejson
-e git+https://github.com/fabric/fabric.git#egg=fabric
13 changes: 13 additions & 0 deletions tests/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,19 @@ def test_with_pip(self):
self.assert_('foo' in e.dependencies['pip'])
self.assert_('baz' in e.dependencies['pip'])

def test_requirements_txt(self):
e = env.from_file(utils.support_file('requirements.txt'))
# skll could be in either, depending on active channels
self.assert_('skll' in e.dependencies['pip'] or
'skll' in e.dependencies['conda'])
self.assert_('simplejson' in e.dependencies['pip'])
self.assert_('-e git+https://github.com/fabric/fabric.git#egg=fabric' in
e.dependencies['pip'])
self.assert_('numpy' in e.dependencies['conda'])
self.assert_('# Comment to be ignored' not in e.dependencies['pip'])
self.assert_('# Comment 2' not in e.dependencies['pip'])
self.assert_(' # Comment 2' not in e.dependencies['pip'])


class EnvironmentTestCase(unittest.TestCase):
def test_has_empty_filename_by_default(self):
Expand Down