Skip to content

Commit

Permalink
pure datatype with string arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
miquelduranfrigola committed Sep 29, 2022
1 parent 85c2a7f commit 1c82060
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 7 deletions.
21 changes: 19 additions & 2 deletions ersilia/io/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
import collections
from .dataframe import Dataframe
from .readers.file import FileTyper
from .pure import PureDataTyper
from ..serve.schema import ApiSchema
from .. import ErsiliaBase
from ..default import FEATURE_MERGE_PATTERN
from ..utils.hdf5 import Hdf5Data, Hdf5DataStacker
from ..default import FEATURE_MERGE_PATTERN


class DataFrame(object):
Expand Down Expand Up @@ -173,15 +174,31 @@ def __cast_values(self, vals, dtypes, output_keys):
v += [v_]
return v

def _guess_pure_dtype_if_absent(self, vals):
pdt = PureDataTyper(vals)
dtype = pdt.get_type()
self.logger.debug("Guessed pure datatype: {0}".format(dtype))
return dtype["type"]

def __expand_output_keys(self, vals, output_keys):
output_keys_expanded = []
if len(output_keys) == 1:
merge_key = False
else:
merge_key = True
current_pure_dtype = {}
for v, ok in zip(vals, output_keys):
self.logger.debug("Data: {0}".format(ok))
self.logger.debug("Values: {0}".format(v))
m = self.__meta_by_key(ok)
t = self.__pure_dtype(ok)
if ok not in current_pure_dtype:
t = self.__pure_dtype(ok)
if t is None:
t = self._guess_pure_dtype_if_absent(v)
current_pure_dtype[ok] = t
else:
t = current_pure_dtype[ok]
self.logger.debug("Pure datatype: {0}".format(t))
if t in self._array_types:
assert m is not None
if v is not None:
Expand Down
6 changes: 3 additions & 3 deletions ersilia/io/pure.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _is_array(self):

def _is_numeric_array(self):
if self._is_array():
data = np.array(self.data).ravel()
data = np.array(self.data).ravel().tolist()
for x in data:
if not PureDataTyper(x)._is_numeric():
return False
Expand All @@ -37,7 +37,7 @@ def _is_numeric_array(self):

def _is_string_array(self):
if self._is_array():
data = np.array(self.data).ravel()
data = np.array(self.data).ravel().tolist()
for x in data:
if not PureDataTyper(x)._is_string():
return False
Expand All @@ -49,7 +49,7 @@ def _is_mixed_array(self):
if self._is_array():
has_numeric = False
has_string = False
data = np.array(self.data).ravel()
data = np.array(self.data).ravel().tolist()
for x in data:
if PureDataTyper(x)._is_numeric():
has_numeric = True
Expand Down
6 changes: 4 additions & 2 deletions ersilia/utils/csvfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ def __init__(self):
def load(self, csv_file):
with open(csv_file, "r") as f:
reader = csv.reader(f)
self.features = next(reader)[3:]
self.features = [
x for x in next(reader) if x not in ["key", "input", "text"]
]
self.keys = []
self.inputs = []
self.values = []
for r in reader:
self.keys += [r[0]]
self.inputs += [r[1]]
self.values += [r[3:]]
self.values += [r[-len(self.features) :]]

0 comments on commit 1c82060

Please sign in to comment.