1

I've uploaded my personnal generic functions called ofunctions to github in order to share them between my projects, and having separate CI and coverage tests. Link to github project here.

So far so good, I have a package called ofunctions which has several subpacakges like ofunctions.network.

I want to be able to install subpackages without having to install the whole package, ie pip install ofunctions.network. So I've created a single setup.py file that creates the necessary dist files to upload on PyPI.

My problem:

Whenever I use python setup.py sdist bdist_wheel, it generates the full ofunctions package and a package for each subpackage, but:

  • source packages like ofunctions.network-0.5.0.tar.gz only contain the subpackage (expected behavior)
  • wheel packages like ofunctions.network-0.5.0-py3-non-any.whl which contain the whole package (unexpected behavior)

The wheel packages contain the whole ofunctions library including all subpackages which obviously should only contain the same subpackage as the source dist files.

Can anybody have a look at my setup.py file and tell me why the sdist and wheel files don't contain strictly the same subpackages only ?

#! /usr/bin/env python
#  -*- coding: utf-8 -*-
#
# This file is part of ofunctions package

"""
Namespace packaging here

# Make sure we declare an __init__.py file as namespace holder in the package root containing the following

try:
    __import__('pkg_resources').declare_namespace(__name__)
except ImportError:
    from pkgutil import extend_path
    __path__ = extend_path(__path__, __name__)
"""

import codecs
import os

import pkg_resources
import setuptools


def get_metadata(package_file):
    """
    Read metadata from pacakge file
    """

    def _read(_package_file):
        here = os.path.abspath(os.path.dirname(__file__))
        with codecs.open(os.path.join(here, _package_file), 'r') as fp:
            return fp.read()

    _metadata = {}

    for line in _read(package_file).splitlines():
        if line.startswith('__version__'):
            delim = '"' if '"' in line else "'"
            _metadata['version'] = line.split(delim)[1]
        if line.startswith('__description__'):
            delim = '"' if '"' in line else "'"
            _metadata['description'] = line.split(delim)[1]
    return _metadata


def parse_requirements(filename):
    """
    There is a parse_requirements function in pip but it keeps changing import path
    Let's build a simple one
    """
    try:
        with open(filename, 'r') as requirements_txt:
            install_requires = [
                str(requirement)
                for requirement
                in pkg_resources.parse_requirements(requirements_txt)
            ]
        return install_requires
    except OSError:
        print('WARNING: No requirements.txt file found as "{}". Please check path or create an empty one'
              .format(filename))


def get_long_description(filename):
    with open(filename, 'r', encoding='utf-8') as readme_file:
        _long_description = readme_file.read()
    return _long_description


#  ######### ACTUAL SCRIPT ENTRY POINT

NAMESPACE_PACKAGE_NAME = 'ofunctions'
namespace_package_path = os.path.abspath(NAMESPACE_PACKAGE_NAME)
namespace_package_file = os.path.join(namespace_package_path, '__init__.py')
metadata = get_metadata(namespace_package_file)
requirements = parse_requirements(os.path.join(namespace_package_path, 'requirements.txt'))

# Generic namespace package
setuptools.setup(
    name=NAMESPACE_PACKAGE_NAME,
    namespace_packages=[NAMESPACE_PACKAGE_NAME],
    packages=setuptools.find_namespace_packages(include=['ofunctions.*']),
    version=metadata['version'],
    install_requires=requirements,
    classifiers=[
        "Development Status :: 5 - Production/Stable",
        "Intended Audience :: Developers",
        "Topic :: Software Development",
        "Topic :: System",
        "Topic :: System :: Operating System",
        "Topic :: System :: Shells",
        "Programming Language :: Python",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: Implementation :: CPython",
        "Programming Language :: Python :: Implementation :: PyPy",
        "Operating System :: POSIX :: Linux",
        "Operating System :: POSIX :: BSD :: FreeBSD",
        "Operating System :: POSIX :: BSD :: NetBSD",
        "Operating System :: POSIX :: BSD :: OpenBSD",
        "Operating System :: Microsoft",
        "Operating System :: Microsoft :: Windows",
        "License :: OSI Approved :: BSD License",
    ],
    description=metadata['description'],
    author='NetInvent - Orsiris de Jong',
    author_email='contact@netinvent.fr',
    url='https://github.com/netinvent/ofunctions',
    keywords=['network', 'bisection', 'logging'],
    long_description=get_long_description('README.md'),
    long_description_content_type="text/markdown",
    python_requires='>=3.5',
    # namespace packages don't work well with zipped eggs
    # ref https://packaging.python.org/guides/packaging-namespace-packages/
    zip_safe=False
)

for package in setuptools.find_namespace_packages(include=['ofunctions.*']):
    package_path = os.path.abspath(package.replace('.', os.sep))
    package_file = os.path.join(package_path, '__init__.py')
    metadata = get_metadata(package_file)
    requirements = parse_requirements(os.path.join(package_path, 'requirements.txt'))
    print(package_path)
    print(package_file)
    print(metadata)
    print(requirements)

    setuptools.setup(
        name=package,
        namespace_packages=[NAMESPACE_PACKAGE_NAME],
        packages=[package],
        package_data={package: ['__init__.py']},
        version=metadata['version'],
        install_requires=requirements,
        classifiers=[
            "Development Status :: 5 - Production/Stable",
            "Intended Audience :: Developers",
            "Topic :: Software Development",
            "Topic :: System",
            "Topic :: System :: Operating System",
            "Topic :: System :: Shells",
            "Programming Language :: Python",
            "Programming Language :: Python :: 3",
            "Programming Language :: Python :: Implementation :: CPython",
            "Programming Language :: Python :: Implementation :: PyPy",
            "Operating System :: POSIX :: Linux",
            "Operating System :: POSIX :: BSD :: FreeBSD",
            "Operating System :: POSIX :: BSD :: NetBSD",
            "Operating System :: POSIX :: BSD :: OpenBSD",
            "Operating System :: Microsoft",
            "Operating System :: Microsoft :: Windows",
            "License :: OSI Approved :: BSD License",
        ],
        description=metadata['description'],
        author='NetInvent - Orsiris de Jong',
        author_email='contact@netinvent.fr',
        url='https://github.com/netinvent/ofunctions',
        keywords=['network', 'bisection', 'logging'],
        long_description=get_long_description('README.md'),
        long_description_content_type="text/markdown",
        python_requires='>=3.5',
        # namespace packages don't work well with zipped eggs
        # ref https://packaging.python.org/guides/packaging-namespace-packages/
        zip_safe=False
    )

Thanks 8-|

sinoroc
  • 18,409
  • 2
  • 39
  • 70
Orsiris de Jong
  • 2,819
  • 1
  • 26
  • 48
  • You probably need to create different projects, containing a `setup.py` each. I do not thinkg I have ever seen a project with a `setup.py` containing multiple calls to `setuptools.setup` that was working as expected. – sinoroc Feb 11 '21 at 19:12
  • I actually saw your post on https://discuss.python.org/t/use-case-supported-2-namespace-packages-in-single-source-repo-with-1-setup-py/5440/5 which insipred me. I'd be damned to make a setup.py file for every subpackage. I'll still search a bit. Pretty sure I have to remove some temp files somewhere so built packages stay clean. I'll report back if I happen to find the solution ;) – Orsiris de Jong Feb 11 '21 at 19:50
  • 1
    I forgot I had written this, haha! -- I'd need to re-read all this, I don't remember the whole thing. But one key thing, in that other discussion the code I suggested runs `setuptools.setup` only once in the `setup.py`. -- If you manage to create a minimal reproducible example [mre] I can probably help. – sinoroc Feb 11 '21 at 20:21
  • I did manage to come up with a working implementation (see my answer below). But I noticed `setuptools.setup()` never cleans the build directory, and if I happen to put a big file out of nowhere inside, next build will contain that file. Isn't that somehow a bug that should be reported to setuptools ? Btw, thanks for your help and your former implementation ;) – Orsiris de Jong Feb 11 '21 at 21:12
  • 1
    OK, If it works, it works. -- One more note though: Maybe if you are worried about having to write too many `setup.py` files, you could write a script that generates those `setup.py` files (or better `setup.cfg`), and keep things more "standard". – sinoroc Feb 11 '21 at 21:30
  • You're right, I was trying to be lazy... in the end I got more work ;) But that problem with non empty build dirs bloated by previous build files (regardless of namespace packages I think) worries me more now. Not an setuptools expert. Do you think I should file a bug report ? – Orsiris de Jong Feb 11 '21 at 21:35
  • 1
    I think it's by design that the build directory is not deleted every time. I think I remember seeing discussions on that topic. Might be it is about reusing previously built artifacts to speed up the global build process. – sinoroc Feb 11 '21 at 22:45
  • I think in your case, I would write a script that creates setup.py files in each of the subdirectories (i.e. 1 per distribution package), and calls them all one after the other (`subprocess.check_call([sys.executable, 'setup.py', 'sdist'])` I guess). No need to create monster `setup.py` files that would only cause issues down the line. – sinoroc Feb 11 '21 at 22:48
  • At the end, it would still endup in the same build dir, so I'd still have to empty that one between each setup.py run. – Orsiris de Jong Feb 11 '21 at 22:59
  • 1
    Depends... In that case, it depends how you organize the project directory structure. It's all a matter of compromises. I do not have one right answer. – sinoroc Feb 12 '21 at 07:54
  • Indeed, but then I'd ne better off making a full project out of every subpackage, which is overkill to me. Anyway, I'll use build dir clean solution. Big thanks for tour time. – Orsiris de Jong Feb 12 '21 at 08:15

1 Answers1

3

Okay so I think I found the problem. The build directory isn't cleaned between setuptools runs.

Worse, build directory is never cleaned unless you manually remove it, so old build files may endup in newer wheel package builds, even on single package builds I think.

I added a function clear_package_build_path() before running setuptools.setup() run that just cleans the build/lib/package dir. Now my wheel files build with only the necessary files, no bloating anymore.

For instance here's the full working code:

#! /usr/bin/env python
#  -*- coding: utf-8 -*-
#
# This file is part of ofunctions package

"""
Namespace packaging here

# Make sure we declare an __init__.py file as namespace holder in the package root containing the following

try:
    __import__('pkg_resources').declare_namespace(__name__)
except ImportError:
    from pkgutil import extend_path
    __path__ = extend_path(__path__, __name__)
"""

import codecs
import os
import shutil

import pkg_resources
import setuptools


def get_metadata(package_file):
    """
    Read metadata from package file
    """

    def _read(_package_file):
        here = os.path.abspath(os.path.dirname(__file__))
        with codecs.open(os.path.join(here, _package_file), 'r') as fp:
            return fp.read()

    _metadata = {}

    for line in _read(package_file).splitlines():
        if line.startswith('__version__'):
            delim = '"' if '"' in line else "'"
            _metadata['version'] = line.split(delim)[1]
        if line.startswith('__description__'):
            delim = '"' if '"' in line else "'"
            _metadata['description'] = line.split(delim)[1]
    return _metadata


def parse_requirements(filename):
    """
    There is a parse_requirements function in pip but it keeps changing import path
    Let's build a simple one
    """
    try:
        with open(filename, 'r') as requirements_txt:
            install_requires = [
                str(requirement)
                for requirement
                in pkg_resources.parse_requirements(requirements_txt)
            ]
        return install_requires
    except OSError:
        print('WARNING: No requirements.txt file found as "{}". Please check path or create an empty one'
              .format(filename))


def get_long_description(filename):
    with open(filename, 'r', encoding='utf-8') as readme_file:
        _long_description = readme_file.read()
    return _long_description


def clear_package_build_path(package_rel_path):
    """
    We need to clean build path, but setuptools will wait for build/lib/package_name so we need to create that
    """
    build_path = os.path.abspath(os.path.join('build', 'lib', package_rel_path))
    try:
        # We need to use shutil.rmtree() instead of os.remove() since the latter implementation
        # produces "WindowsError: [Error 5] Access is denied"
        shutil.rmtree('build')
    except FileNotFoundError:
        print('build path: {} does not exist'.format(build_path))
    # Now we need to create the 'build/lib/package/subpackage' path so setuptools won't fail
    os.makedirs(build_path)


#  ######### ACTUAL SCRIPT ENTRY POINT

NAMESPACE_PACKAGE_NAME = 'ofunctions'
namespace_package_path = os.path.abspath(NAMESPACE_PACKAGE_NAME)
namespace_package_file = os.path.join(namespace_package_path, '__init__.py')
metadata = get_metadata(namespace_package_file)
requirements = parse_requirements(os.path.join(namespace_package_path, 'requirements.txt'))

# First lets make sure build path is clean (avoiding namespace package pollution in subpackages)
# Clean build dir before every run so we don't make cumulative wheel files
clear_package_build_path(NAMESPACE_PACKAGE_NAME)

# Generic namespace package
setuptools.setup(
    name=NAMESPACE_PACKAGE_NAME,
    namespace_packages=[NAMESPACE_PACKAGE_NAME],
    packages=setuptools.find_namespace_packages(include=['ofunctions.*']),
    version=metadata['version'],
    install_requires=requirements,
    classifiers=[
        "Development Status :: 5 - Production/Stable",
        "Intended Audience :: Developers",
        "Topic :: Software Development",
        "Topic :: System",
        "Topic :: System :: Operating System",
        "Topic :: System :: Shells",
        "Programming Language :: Python",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: Implementation :: CPython",
        "Programming Language :: Python :: Implementation :: PyPy",
        "Operating System :: POSIX :: Linux",
        "Operating System :: POSIX :: BSD :: FreeBSD",
        "Operating System :: POSIX :: BSD :: NetBSD",
        "Operating System :: POSIX :: BSD :: OpenBSD",
        "Operating System :: Microsoft",
        "Operating System :: Microsoft :: Windows",
        "License :: OSI Approved :: BSD License",
    ],
    description=metadata['description'],
    author='NetInvent - Orsiris de Jong',
    author_email='contact@netinvent.fr',
    url='https://github.com/netinvent/ofunctions',
    keywords=['network', 'bisection', 'logging'],
    long_description=get_long_description('README.md'),
    long_description_content_type="text/markdown",
    python_requires='>=3.5',
    # namespace packages don't work well with zipped eggs
    # ref https://packaging.python.org/guides/packaging-namespace-packages/
    zip_safe=False
)



for package in setuptools.find_namespace_packages(include=['ofunctions.*']):
    rel_package_path = package.replace('.', os.sep)
    package_path = os.path.abspath(rel_package_path)
    package_file = os.path.join(package_path, '__init__.py')
    metadata = get_metadata(package_file)
    requirements = parse_requirements(os.path.join(package_path, 'requirements.txt'))
    print(package_path)
    print(package_file)
    print(metadata)
    print(requirements)

    # Again, we need to clean build paths between runs
    clear_package_build_path(rel_package_path)

    setuptools.setup(
        name=package,
        namespace_packages=[NAMESPACE_PACKAGE_NAME],
        packages=[package],
        package_data={package: ['__init__.py']},
        version=metadata['version'],
        install_requires=requirements,
        classifiers=[
            "Development Status :: 5 - Production/Stable",
            "Intended Audience :: Developers",
            "Topic :: Software Development",
            "Topic :: System",
            "Topic :: System :: Operating System",
            "Topic :: System :: Shells",
            "Programming Language :: Python",
            "Programming Language :: Python :: 3",
            "Programming Language :: Python :: Implementation :: CPython",
            "Programming Language :: Python :: Implementation :: PyPy",
            "Operating System :: POSIX :: Linux",
            "Operating System :: POSIX :: BSD :: FreeBSD",
            "Operating System :: POSIX :: BSD :: NetBSD",
            "Operating System :: POSIX :: BSD :: OpenBSD",
            "Operating System :: Microsoft",
            "Operating System :: Microsoft :: Windows",
            "License :: OSI Approved :: BSD License",
        ],
        description=metadata['description'],
        author='NetInvent - Orsiris de Jong',
        author_email='contact@netinvent.fr',
        url='https://github.com/netinvent/ofunctions',
        keywords=['network', 'bisection', 'logging'],
        long_description=get_long_description('README.md'),
        long_description_content_type="text/markdown",
        python_requires='>=3.5',
        # namespace packages don't work well with zipped eggs
        # ref https://packaging.python.org/guides/packaging-namespace-packages/
        zip_safe=False
    )

As a side node, I noticed that os.remove() will fail with WindowsError: [Error 5] Access is denied from time to time because os.remove() waits for all handles to be closed, which can take time because of garbage collector (AFAIK). Using shutil.rmtree() does work in any case.

Orsiris de Jong
  • 2,819
  • 1
  • 26
  • 48