Call PySys_SetArgv when initializing interpreter. (#2341)

* Call PySys_SetArgv when initializing interpreter.

* Document argc/argv parameters in initialize_interpreter.

* Remove manual memory management from set_interpreter_argv in favor of smart pointers.

* Use size_t for indexers in set_interpreter_argv.

* Minimize macros for flow control in set_interpreter_argv.

* Fix 'unused variable' warning on Py2

* whitespace

* Define wide_char_arg_deleter outside set_interpreter_argv.

* Do sys.path workaround in C++ rather than eval.

* Factor out wchar conversion to a separate function.

* Restore widened_argv variable declaration.

* Fix undeclared widened_arg variable on some paths.

* Use delete[] to match new wchar_t[].

* Fix compiler errors

* Use PY_VERSION_HEX for a cleaner CVE-2008-5983 mode check.

* Fix typo

* Use explicit type for deleter so delete[] works cross-compiler.

* Always use PySys_SetArgvEx because pybind11 doesn't support pythons that don't include it.

* Remove pointless ternary operator.

* Use unique_ptr.reset instead of a second initialization.

* Rename add_program_dir_to_path parameter to clarify intent.

* Add defined() check before evaluating HAVE_BROKEN_MBSTOWCS.

* Apply clang-tidy fixes

* Pre-commit

* refactor: use const for set_interpreter_argv

* Try to fix const issue and allocate vector properly

* fix: copy strings on Python 2

* Applying clang-format-diff relative to master.

The only manual change is an added empty line between pybind11 and system `#include`s.

```
git diff -U0 --no-color master | python3 $HOME/clone/llvm-project/clang/tools/clang-format/clang-format-diff.py -p1 -style=file -i
```

Co-authored-by: Boris Staletic <boris.staletic@gmail.com>
Co-authored-by: Aaron Gokaslan <skylion.aaron@gmail.com>
Co-authored-by: Henry Schreiner <henryschreineriii@gmail.com>
Co-authored-by: Ralf W. Grosse-Kunstleve <rwgk@google.com>
This commit is contained in:
Dan 2021-08-26 17:12:54 -04:00 committed by GitHub
parent 503ff2a6fb
commit 930bb16c79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 122 additions and 8 deletions

View File

@ -12,6 +12,9 @@
#include "pybind11.h" #include "pybind11.h"
#include "eval.h" #include "eval.h"
#include <memory>
#include <vector>
#if defined(PYPY_VERSION) #if defined(PYPY_VERSION)
# error Embedding the interpreter is not supported with PyPy # error Embedding the interpreter is not supported with PyPy
#endif #endif
@ -83,29 +86,106 @@ struct embedded_module {
} }
}; };
struct wide_char_arg_deleter {
void operator()(wchar_t *ptr) const {
#if PY_VERSION_HEX >= 0x030500f0
// API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale
PyMem_RawFree(ptr);
#else
delete[] ptr;
#endif
}
};
inline wchar_t *widen_chars(const char *safe_arg) {
#if PY_VERSION_HEX >= 0x030500f0
wchar_t *widened_arg = Py_DecodeLocale(safe_arg, nullptr);
#else
wchar_t *widened_arg = nullptr;
# if defined(HAVE_BROKEN_MBSTOWCS) && HAVE_BROKEN_MBSTOWCS
size_t count = strlen(safe_arg);
# else
size_t count = mbstowcs(nullptr, safe_arg, 0);
# endif
if (count != static_cast<size_t>(-1)) {
widened_arg = new wchar_t[count + 1];
mbstowcs(widened_arg, safe_arg, count + 1);
}
#endif
return widened_arg;
}
/// Python 2.x/3.x-compatible version of `PySys_SetArgv`
inline void set_interpreter_argv(int argc, const char *const *argv, bool add_program_dir_to_path) {
// Before it was special-cased in python 3.8, passing an empty or null argv
// caused a segfault, so we have to reimplement the special case ourselves.
bool special_case = (argv == nullptr || argc <= 0);
const char *const empty_argv[]{"\0"};
const char *const *safe_argv = special_case ? empty_argv : argv;
if (special_case)
argc = 1;
auto argv_size = static_cast<size_t>(argc);
#if PY_MAJOR_VERSION >= 3
// SetArgv* on python 3 takes wchar_t, so we have to convert.
std::unique_ptr<wchar_t *[]> widened_argv(new wchar_t *[argv_size]);
std::vector<std::unique_ptr<wchar_t[], wide_char_arg_deleter>> widened_argv_entries;
widened_argv_entries.reserve(argv_size);
for (size_t ii = 0; ii < argv_size; ++ii) {
widened_argv_entries.emplace_back(widen_chars(safe_argv[ii]));
if (!widened_argv_entries.back()) {
// A null here indicates a character-encoding failure or the python
// interpreter out of memory. Give up.
return;
}
widened_argv[ii] = widened_argv_entries.back().get();
}
auto pysys_argv = widened_argv.get();
#else
// python 2.x
std::vector<std::string> strings{safe_argv, safe_argv + argv_size};
std::vector<char *> char_strings{argv_size};
for (std::size_t i = 0; i < argv_size; ++i)
char_strings[i] = &strings[i][0];
char **pysys_argv = char_strings.data();
#endif
PySys_SetArgvEx(argc, pysys_argv, static_cast<int>(add_program_dir_to_path));
}
PYBIND11_NAMESPACE_END(detail) PYBIND11_NAMESPACE_END(detail)
/** \rst /** \rst
Initialize the Python interpreter. No other pybind11 or CPython API functions can be Initialize the Python interpreter. No other pybind11 or CPython API functions can be
called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The
optional parameter can be used to skip the registration of signal handlers (see the optional `init_signal_handlers` parameter can be used to skip the registration of
`Python documentation`_ for details). Calling this function again after the interpreter signal handlers (see the `Python documentation`_ for details). Calling this function
has already been initialized is a fatal error. again after the interpreter has already been initialized is a fatal error.
If initializing the Python interpreter fails, then the program is terminated. (This If initializing the Python interpreter fails, then the program is terminated. (This
is controlled by the CPython runtime and is an exception to pybind11's normal behavior is controlled by the CPython runtime and is an exception to pybind11's normal behavior
of throwing exceptions on errors.) of throwing exceptions on errors.)
The remaining optional parameters, `argc`, `argv`, and `add_program_dir_to_path` are
used to populate ``sys.argv`` and ``sys.path``.
See the |PySys_SetArgvEx documentation|_ for details.
.. _Python documentation: https://docs.python.org/3/c-api/init.html#c.Py_InitializeEx .. _Python documentation: https://docs.python.org/3/c-api/init.html#c.Py_InitializeEx
.. |PySys_SetArgvEx documentation| replace:: ``PySys_SetArgvEx`` documentation
.. _PySys_SetArgvEx documentation: https://docs.python.org/3/c-api/init.html#c.PySys_SetArgvEx
\endrst */ \endrst */
inline void initialize_interpreter(bool init_signal_handlers = true) { inline void initialize_interpreter(bool init_signal_handlers = true,
int argc = 0,
const char *const *argv = nullptr,
bool add_program_dir_to_path = true) {
if (Py_IsInitialized() != 0) if (Py_IsInitialized() != 0)
pybind11_fail("The interpreter is already running"); pybind11_fail("The interpreter is already running");
Py_InitializeEx(init_signal_handlers ? 1 : 0); Py_InitializeEx(init_signal_handlers ? 1 : 0);
// Make .py files in the working directory available by default detail::set_interpreter_argv(argc, argv, add_program_dir_to_path);
module_::import("sys").attr("path").cast<list>().append(".");
} }
/** \rst /** \rst
@ -167,6 +247,8 @@ inline void finalize_interpreter() {
Scope guard version of `initialize_interpreter` and `finalize_interpreter`. Scope guard version of `initialize_interpreter` and `finalize_interpreter`.
This a move-only guard and only a single instance can exist. This a move-only guard and only a single instance can exist.
See `initialize_interpreter` for a discussion of its constructor arguments.
.. code-block:: cpp .. code-block:: cpp
#include <pybind11/embed.h> #include <pybind11/embed.h>
@ -178,8 +260,11 @@ inline void finalize_interpreter() {
\endrst */ \endrst */
class scoped_interpreter { class scoped_interpreter {
public: public:
scoped_interpreter(bool init_signal_handlers = true) { scoped_interpreter(bool init_signal_handlers = true,
initialize_interpreter(init_signal_handlers); int argc = 0,
const char *const *argv = nullptr,
bool add_program_dir_to_path = true) {
initialize_interpreter(init_signal_handlers, argc, argv, add_program_dir_to_path);
} }
scoped_interpreter(const scoped_interpreter &) = delete; scoped_interpreter(const scoped_interpreter &) = delete;

View File

@ -23,6 +23,7 @@ public:
std::string the_message() const { return message; } std::string the_message() const { return message; }
virtual int the_answer() const = 0; virtual int the_answer() const = 0;
virtual std::string argv0() const = 0;
private: private:
std::string message; std::string message;
@ -32,6 +33,7 @@ class PyWidget final : public Widget {
using Widget::Widget; using Widget::Widget;
int the_answer() const override { PYBIND11_OVERRIDE_PURE(int, Widget, the_answer); } int the_answer() const override { PYBIND11_OVERRIDE_PURE(int, Widget, the_answer); }
std::string argv0() const override { PYBIND11_OVERRIDE_PURE(std::string, Widget, argv0); }
}; };
PYBIND11_EMBEDDED_MODULE(widget_module, m) { PYBIND11_EMBEDDED_MODULE(widget_module, m) {
@ -299,3 +301,25 @@ TEST_CASE("Reload module from file") {
result = module_.attr("test")().cast<int>(); result = module_.attr("test")().cast<int>();
REQUIRE(result == 2); REQUIRE(result == 2);
} }
TEST_CASE("sys.argv gets initialized properly") {
py::finalize_interpreter();
{
py::scoped_interpreter default_scope;
auto module = py::module::import("test_interpreter");
auto py_widget = module.attr("DerivedWidget")("The question");
const auto &cpp_widget = py_widget.cast<const Widget &>();
REQUIRE(cpp_widget.argv0().empty());
}
{
char *argv[] = {strdup("a.out")};
py::scoped_interpreter argv_scope(true, 1, argv);
free(argv[0]);
auto module = py::module::import("test_interpreter");
auto py_widget = module.attr("DerivedWidget")("The question");
const auto &cpp_widget = py_widget.cast<const Widget &>();
REQUIRE(cpp_widget.argv0() == "a.out");
}
py::initialize_interpreter();
}

View File

@ -1,4 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys
from widget_module import Widget from widget_module import Widget
@ -8,3 +10,6 @@ class DerivedWidget(Widget):
def the_answer(self): def the_answer(self):
return 42 return 42
def argv0(self):
return sys.argv[0]