forked from pgspider/parquet_s3_fdw
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parquet_s3_fdw.cpp
113 lines (99 loc) · 3.11 KB
/
parquet_s3_fdw.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/*-------------------------------------------------------------------------
*
* parquet_s3_fdw.cpp
* S3 accessing module for parquet_s3_fdw
*
* Portions Copyright (c) 2020, TOSHIBA CORPORATION
*
* IDENTIFICATION
* contrib/parquet_s3_fdw/parquet_s3_fdw.cpp
*
*-------------------------------------------------------------------------
*/
#include "parquet_s3_fdw.hpp"
#include <aws/s3/model/GetObjectRequest.h>
#include <aws/s3/model/HeadObjectRequest.h>
using namespace std;
#define S3_ALLOCATION_TAG "S3_ALLOCATION_TAG"
/* Implementation of S3RandomAccessFile class methods */
S3RandomAccessFile::S3RandomAccessFile(Aws::S3::S3Client *s3_client,
const Aws::String &bucket, const Aws::String &object)
: bucket_(bucket), object_(object), s3_client_(s3_client) {
offset = 0;
isclosed = false;
}
arrow::Status
S3RandomAccessFile::Close()
{
isclosed = true;
return arrow::Status::OK();
}
arrow::Result<int64_t>
S3RandomAccessFile::Tell() const
{
return offset;
}
bool
S3RandomAccessFile::closed() const
{
return isclosed;
}
arrow::Status
S3RandomAccessFile::Seek(int64_t position)
{
offset = position;
return arrow::Status::OK();
}
arrow::Result<int64_t>
S3RandomAccessFile::Read(int64_t nbytes, void* out)
{
Aws::S3::Model::GetObjectRequest object_request;
object_request.WithBucket(bucket_.c_str()).WithKey(object_.c_str());
string bytes = "bytes=" + to_string(offset) + "-" + to_string(offset + nbytes - 1);
object_request.SetRange(bytes.c_str());
object_request.SetBucket(this->bucket_);
object_request.SetKey(this->object_);
#if 0
object_request.SetResponseStreamFactory([](){
return Aws::New<Aws::FStream>(
"ALLOCATION_TAG", "DOWNLOADED_FILENAME", std::ios_base::out); });
#else
object_request.SetResponseStreamFactory([](){
return Aws::New<Aws::StringStream >(S3_ALLOCATION_TAG); });
#endif
Aws::S3::Model::GetObjectOutcome get_object_outcome = this->s3_client_->GetObject(object_request);
if (!get_object_outcome.IsSuccess()) {
auto err = get_object_outcome.GetError();
Aws::String msg = "GetObject failed. " + err.GetExceptionName() + ": " + err.GetMessage();
return arrow::Status(arrow::StatusCode::IOError, msg.c_str());
}
int64_t n_read = get_object_outcome.GetResult().GetContentLength();
offset += n_read;
std::stringstream string_stream;
string_stream << get_object_outcome.GetResult().GetBody().rdbuf();
string_stream.read((char*)out, n_read);
return n_read;
}
arrow::Result<std::shared_ptr<arrow::Buffer>>
S3RandomAccessFile::Read(int64_t nbytes)
{
char *out = (char*)malloc(nbytes);
arrow::Result<int64_t> res = this->Read(nbytes, out);
int64_t n = res.ValueOrDie();
std::shared_ptr<arrow::Buffer> buf = make_shared<arrow::Buffer>((const uint8_t*)out, n);
return buf;
}
arrow::Result<int64_t>
S3RandomAccessFile::GetSize()
{
Aws::S3::Model::HeadObjectRequest headObj;
headObj.SetBucket(bucket_);
headObj.SetKey(object_);
auto object = this->s3_client_->HeadObject(headObj);
if (!object.IsSuccess())
{
return arrow::Status(arrow::StatusCode::IOError, "HeadObject failed");
}
int64_t fileSize = object.GetResultWithOwnership().GetContentLength();
return fileSize;
}