rstudio · kdkavanagh · Sep 2, 2019 · Sep 2, 2019 · Feb 15, 2020 · Feb 15, 2020
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -45,6 +45,7 @@ Imports:
     methods,
     png,
     rappdirs,
+    bit64,
     utils,
     rlang,
     withr

diff --git a/src/libpython.h b/src/libpython.h
@@ -352,6 +352,12 @@ LIBPYTHON_EXTERN PyObject* (*PyInt_FromLong)(long);
 LIBPYTHON_EXTERN long (*PyInt_AsLong)(PyObject *);
 LIBPYTHON_EXTERN PyObject* (*PyLong_FromLong)(long);
 LIBPYTHON_EXTERN long (*PyLong_AsLong)(PyObject *);
+LIBPYTHON_EXTERN PyObject* (*PyLong_FromUnsignedLong)(long);
+LIBPYTHON_EXTERN unsigned long (*PyLong_AsUnsignedLong)(PyObject *);
+LIBPYTHON_EXTERN long (*PyLong_AsLongAndOverflow)(PyObject *, int*);
+LIBPYTHON_EXTERN PyObject* (*PyInt_FromUnsignedLong)(long);
+LIBPYTHON_EXTERN unsigned long (*PyInt_AsUnsignedLong)(PyObject *);
+LIBPYTHON_EXTERN long (*PyInt_AsLongAndOverflow)(PyObject *, int*);
 
 LIBPYTHON_EXTERN PyObject* (*PyBool_FromLong)(long);
 
@@ -440,6 +446,7 @@ typedef struct tagPyArrayObject {
 
 typedef unsigned char npy_bool;
 typedef long npy_long;
+typedef unsigned long npy_ulong;
 typedef double npy_double;
 typedef struct { double real, imag; } npy_cdouble;
 typedef npy_cdouble npy_complex128;

diff --git a/src/python.cpp b/src/python.cpp
@@ -82,6 +82,27 @@ std::wstring s_python_v3;
 std::string s_pythonhome;
 std::wstring s_pythonhome_v3;
 
+const std::string CONFIG_LONG_AS_BIT64="reticulate.long_as_bit64";
+const std::string CONFIG_ULONG_AS_BIT64="reticulate.ulong_as_bit64";
+
+template<typename T>
+T getConfig(std::string config, T defValue) {
+  Environment base( "package:base" ) ;
+  Function getOption = base["getOption"];
+  SEXP s = getOption(config, defValue);
+  if(s == NULL) {
+    return defValue;
+  }
+  return as<T>(s);
+}
+
+bool convertLongToBit64() {
+  return getConfig<bool>(CONFIG_LONG_AS_BIT64, false);
+}
+
+bool convertULongToBit64() {
+  return getConfig<bool>(CONFIG_ULONG_AS_BIT64, false);
+}
 
 
 // helper to convert std::string to std::wstring
@@ -294,12 +315,19 @@ int narrow_array_typenum(int typenum) {
   case NPY_INT:
     typenum = NPY_LONG;
     break;
-    // double
-  case NPY_UINT:
-  case NPY_ULONG:
-  case NPY_ULONGLONG:
+
   case NPY_LONG:
   case NPY_LONGLONG:
+    typenum = convertLongToBit64() ? NPY_LONG : NPY_DOUBLE;
+    break;
+
+  case NPY_ULONG:
+  case NPY_ULONGLONG:
+    typenum = convertULongToBit64() ? NPY_ULONG : NPY_DOUBLE;
+    break;
+
+    // double
+  case NPY_UINT:
   case NPY_HALF:
   case NPY_FLOAT:
   case NPY_DOUBLE:
@@ -328,12 +356,24 @@ int narrow_array_typenum(int typenum) {
   return typenum;
 }
 
+int typenum(PyArrayObject* array) {
+  return PyArray_TYPE(array);
+}
+
+int typenum(PyArray_Descr* descr) {
+  return descr->type_num;
+}
+
+int typenum(int typeenum) {
+  return typeenum;
+}
+
 int narrow_array_typenum(PyArrayObject* array) {
-  return narrow_array_typenum(PyArray_TYPE(array));
+  return narrow_array_typenum(typenum(array));
 }
 
 int narrow_array_typenum(PyArray_Descr* descr) {
-  return narrow_array_typenum(descr->type_num);
+  return narrow_array_typenum(typenum(descr->type_num));
-  return narrow_array_typenum(typenum(descr->type_num));
+  return narrow_array_typenum(typenum(descr));
-  return narrow_array_typenum(typenum(descr->type_num));
+  return narrow_array_typenum(typenum(descr));
 }
 
 bool is_numpy_str(PyObject* x) {
@@ -911,10 +951,8 @@ bool py_is_callable(PyObjectRef x) {
     return py_is_callable(x.get());
 }
 
-
 // convert a python object to an R object
 SEXP py_to_r(PyObject* x, bool convert) {
-
   // NULL for Python None
   if (py_is_none(x))
     return R_NilValue;
@@ -928,9 +966,18 @@ SEXP py_to_r(PyObject* x, bool convert) {
       return LogicalVector::create(x == Py_True);
 
     // integer
-    else if (scalarType == INTSXP)
-      return IntegerVector::create(PyInt_AsLong(x));
-
+    else if (scalarType == INTSXP) {
+      long val = PyLong_AsLong(x);
+      if((val > std::numeric_limits<int>::max() || val < std::numeric_limits<int>::min()) && convertLongToBit64()) {
+        Rcpp::NumericVector vec(1);
+        std::memcpy(&(vec[0]), &(val), sizeof(double));
+        vec.attr("class") = "integer64";
+        return vec;
+      }
+      else{
+        return IntegerVector::create(val);
+      }
+    }
     // double
     else if (scalarType == REALSXP)
       return NumericVector::create(PyFloat_AsDouble(x));
@@ -963,9 +1010,23 @@ SEXP py_to_r(PyObject* x, bool convert) {
       return vec;
     } else if (scalarType == INTSXP) {
       Rcpp::IntegerVector vec(len);
-      for (Py_ssize_t i = 0; i<len; i++)
-        vec[i] = PyInt_AsLong(PyList_GetItem(x, i));
+      for (Py_ssize_t i = 0; i<len; i++) {
+        long num = PyLong_AsLong(PyList_GetItem(x, i));
+        if((num > std::numeric_limits<int>::max() || num < std::numeric_limits<int>::min()) && convertLongToBit64()) {
+          //We need to start over an interpret as 64 bit int
+          Rcpp::NumericVector nVec(len);
+          long long* res_ptr  = (long long*) dataptr(nVec);
+          for (Py_ssize_t j = 0; j<len; j++) {
+            res_ptr[j] = PyInt_AsLong(PyList_GetItem(x, j));;
+          }
+          nVec.attr("class") = "integer64";
+          return nVec;
+        } else {
+          vec[i] = num;
+        }
+      }
       return vec;
+
     } else if (scalarType == CPLXSXP) {
       Rcpp::ComplexVector vec(len);
       for (Py_ssize_t i = 0; i<len; i++) {
@@ -1055,6 +1116,7 @@ SEXP py_to_r(PyObject* x, bool convert) {
     }
 
     // determine the target type of the array
+    int oriType = typenum(array);
     int typenum = narrow_array_typenum(array);
 
     // cast it to a fortran array (PyArray_CastToType steals the descr)
@@ -1079,10 +1141,32 @@ SEXP py_to_r(PyObject* x, bool convert) {
       }
 
       case NPY_LONG: {
-        npy_long* pData = (npy_long*)PyArray_DATA(array);
-        rArray = Rf_allocArray(INTSXP, dimsVector);
-        for (int i=0; i<len; i++)
-          INTEGER(rArray)[i] = pData[i];
+        if((oriType == NPY_LONG || oriType == NPY_LONGLONG) && convertLongToBit64()) {
+          Rcpp::NumericVector nVec(len);
+          //Inspired by https://gallery.rcpp.org/articles/creating-integer64-and-nanotime-vectors/
+          npy_ulong* pData = (npy_ulong*)PyArray_DATA(array);
+          std::memcpy(&(nVec[0]), &(pData[0]), len * sizeof(double));
+          nVec.attr("class") = "integer64";
+          nVec.attr("dim") = dimsVector;
+          rArray = nVec;
+        } else {
+          npy_long* pData = (npy_long*)PyArray_DATA(array);
+          rArray = Rf_allocArray(INTSXP, dimsVector);
+          for (int i=0; i<len; i++) {
+            INTEGER(rArray)[i] = pData[i];
+          }
+        }
+
+        break;
+      }
+      case NPY_ULONG: {
+        npy_ulong* pData = (npy_ulong*)PyArray_DATA(array);
+        Rcpp::NumericVector nVec(len);
+        //Inspired by https://gallery.rcpp.org/articles/creating-integer64-and-nanotime-vectors/
+        std::memcpy(&(nVec[0]), &(pData[0]), len * sizeof(double));
+        nVec.attr("class") = CharacterVector::create("integer64", "np.ulong");
+        nVec.attr("dim") = dimsVector;
+        rArray = nVec;
         break;
       }
 
@@ -1342,6 +1426,7 @@ SEXP py_to_r(PyObject* x, bool convert) {
 
 }
 
+
 /* stretchy list, modified from R sources
    CAR of the list points to the last cons-cell
    CDR points to the first.
@@ -1359,6 +1444,7 @@ void GrowList(SEXP args_list, SEXP tag, SEXP dflt) {
   SEXP tmp = PROTECT(Rf_cons(dflt, R_NilValue));
   SET_TAG(tmp, tag);
 
+
   SETCDR(CAR(args_list), tmp); // set cdr on the last cons-cell
   SETCAR(args_list, tmp);      // update pointer to last cons cell
   UNPROTECT(2);
@@ -1477,6 +1563,7 @@ SEXP py_get_formals(PyObjectRef callable)
   return CDR(r_args);
 }
 
+
 bool is_convertible_to_numpy(RObject x) {
 
   if (!haveNumPy())
@@ -1517,7 +1604,7 @@ PyObject* r_to_py_numpy(RObject x, bool convert) {
       typenum = NPY_INT;
     data = &(INTEGER(sexp)[0]);
   } else if (type == REALSXP) {
-    typenum = NPY_DOUBLE;
+    typenum = x.inherits("integer64") ? (x.inherits("np.ulong") ? NPY_ULONG : NPY_LONG) : NPY_DOUBLE;
     data = &(REAL(sexp)[0]);
   } else if (type == LGLSXP) {
     typenum = NPY_BOOL;
@@ -1698,25 +1785,39 @@ PyObject* r_to_py_cpp(RObject x, bool convert) {
   }
 
   // numeric (pass length 1 vectors as scalars, otherwise pass list)
-  if (type == REALSXP) {
-
-    // handle scalars
+  } else if (type == REALSXP) {
+    bool isBit64  = x.inherits("integer64");
+    bool isUnsigned = x.inherits("np.ulong");
+    bool isInt64 = isBit64 && !isUnsigned;
+    bool isUint64 = isBit64 && isUnsigned;
     if (LENGTH(sexp) == 1) {
       double value = REAL(sexp)[0];
-      return PyFloat_FromDouble(value);
-    }
-
-    PyObjectPtr list(PyList_New(LENGTH(sexp)));
-    for (R_xlen_t i = 0; i<LENGTH(sexp); i++) {
-      double value = REAL(sexp)[i];
-      // NOTE: reference to added value is "stolen" by the list
-      int res = PyList_SetItem(list, i, PyFloat_FromDouble(value));
-      if (res != 0)
-        throw PythonException(py_fetch_error());
+      if(isInt64) {
+        return PyInt_FromLong(reinterpret_cast<long&>(value));
+      } else if(isUint64) {
+        return PyLong_FromUnsignedLong(reinterpret_cast<unsigned long&>(value));
+      } else {
+        return PyFloat_FromDouble(value);
+      }
+    } else {
+      PyObjectPtr list(PyList_New(LENGTH(sexp)));
+      for (R_xlen_t i = 0; i<LENGTH(sexp); i++) {
+        double value = REAL(sexp)[i];
+        // NOTE: reference to added value is "stolen" by the list
+        PyObject* obj;
+        if(isInt64) {
+          obj = PyInt_FromLong(reinterpret_cast<long&>(value));
+        } else if(isUint64) {
+          obj = PyLong_FromUnsignedLong(reinterpret_cast<unsigned long&>(value));
+        } else {
+          obj = PyFloat_FromDouble(value);
+        }
+        int res = PyList_SetItem(list, i, obj);
+        if (res != 0)
+          throw PythonException(py_fetch_error());
+      }
+      return list.detach();
     }
-
-    return list.detach();
-
   }
 
   // complex (pass length 1 vectors as scalars, otherwise pass list)
@@ -2703,6 +2804,7 @@ SEXP py_dict_get_item(PyObjectRef dict, RObject key) {
     return py_ref(Py_None, false);
   }
 
+
   Py_IncRef(item);
   return py_ref(item, dict.convert());
 
@@ -2765,6 +2867,7 @@ CharacterVector py_dict_get_keys_as_str(PyObjectRef dict) {
   std::vector<std::string> keys;
 
   PyObjectPtr it(PyObject_GetIter(py_keys));
+
   if (it.is_null())
     throw PythonException(py_fetch_error());
 
@@ -3397,4 +3500,5 @@ PyObjectRef py_capsule(SEXP x) {
     ensure_python_initialized();
 
   return py_ref(py_capsule_new(x), false);
+
 }
diff --git a/tests/testthat/test-python-numpy.R b/tests/testthat/test-python-numpy.R
@@ -29,11 +29,38 @@ test_that("Character arrays are handled correctly", {
   expect_equal(a1, py_to_r(r_to_py(a1)))
 })
 
+
+test_that("Long integer types are converted to bit64", {
+  skip_if_no_numpy()
+  np <- import("numpy", convert = FALSE)
+  dtypes <- c(np$int64, np$long, np$uint64, np$longlong, np$ulonglong)
+  require(bit64)
+  #First run with the default mode
+  lapply(dtypes, function(dtype) {
+    a1 <- np$array(c(as.integer64("12345"), as.integer64("1567447722123456786")), dtype = dtype)
+    expect_equal(class(as.vector(py_to_r(a1))), "numeric")
+  })
+
+  #Now with the options set
+  options(reticulate.long_as_bit64=TRUE)
+  options(reticulate.ulong_as_bit64=TRUE)
+  lapply(dtypes, function(dtype) {
+    a1 <- np$array(c(as.integer64("12345"), as.integer64("1567447722123456786")), dtype = dtype)
+    res = c('integer64')
+    if(dtype == np$uint64 || dtype == np$ulonglong) {
+      res = c(res, 'np.ulong')
+    }
+    expect_setequal(class(py_to_r(a1)), res)
+  })
+  options(reticulate.long_as_bit64=F)
+  options(reticulate.ulong_as_bit64=F)
+})
+
 test_that("Long integer types are converted to R numeric", {
   skip_if_no_numpy()
   np <- import("numpy", convert = FALSE)
-  dtypes <- c(np$int64, np$int32,
-              np$uint64, np$uint32)
+  dtypes <- c(np$uint32)
+
   lapply(dtypes, function(dtype) {
     a1 <- np$array(c(1L:30L), dtype = dtype)
     a1 <- as.vector(py_to_r(a1))
@@ -86,3 +113,4 @@ test_that("boolean matrices are converted appropriately", {
   A <- matrix(TRUE, nrow = 2, ncol = 2)
   expect_equal(A, py_to_r(r_to_py(A)))
 })
+
diff --git a/tests/testthat/test-python-pandas.R b/tests/testthat/test-python-pandas.R
@@ -166,6 +166,23 @@ test_that("NaT is converted to NA", {
 
   expect_equal(py_to_r(before), py_to_r(after))
 
+
+})
+
+test_that("Large ints are handled correctly", {
+  skip_if_no_pandas()
+  require(bit64)
+
+  options(reticulate.long_as_bit64=TRUE)
+  options(reticulate.ulong_as_bit64=TRUE)
+  A <- data.frame(val=c(as.integer64("1567447722123456785"), as.integer64("1567447722123456786")))
+  expect_equal(A, py_to_r(r_to_py(A)))
+
+  options(reticulate.long_as_bit64=F)
+  options(reticulate.ulong_as_bit64=F)
+  A <- data.frame(val=c(as.integer64("1567447722123456785"), as.integer64("1567447722123456786")))
+  expect_equal(A, py_to_r(r_to_py(A)))
+
 })
 
 test_that("pandas NAs are converted to R NAs", {
@@ -218,4 +235,5 @@ df = pd.DataFrame({"FCT": pd.Categorical(["No", "Yes"]),
 
   expect_identical(p_df, r_df)
 
+
 })
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,6 +45,7 @@ Imports: @@
         methods,
         png,
         rappdirs,
+        bit64,
         utils,
         rlang,
         withr
@@ Expand Down @@