SimpleJSON和NumPy数组

使用simplejson序列化numpy数组的最有效方法是什么?

我使用simplejson.dumps(somearray.tolist())作为最方便的方法(如果我仍然使用simplejson ,这意味着被困在Python 2.5或更早; 2.6和更高版本有一个标准的库模块json同样的方式,所以当然,如果正在使用的Python版本支持它,那么我会使用它;-)。

为了提高效率,你可以 inheritancejson.JSONEncoder (在json ;我不知道老版本的simplejson已经提供了这种定制的可能性),并且在default方法中,通过转换numpy.array的特殊情况到“及时”的列表或元组中。 但是,我认为,就绩效而言,这样的方法可以让你获得足够的收益,以certificate这一努力的合理性。

为了保持dtype和维度试试这个:

 import base64 import json import numpy as np class NumpyEncoder(json.JSONEncoder): def default(self, obj): """If input object is an ndarray it will be converted into a dict holding dtype, shape and the data, base64 encoded. """ if isinstance(obj, np.ndarray): if obj.flags['C_CONTIGUOUS']: obj_data = obj.data else: cont_obj = np.ascontiguousarray(obj) assert(cont_obj.flags['C_CONTIGUOUS']) obj_data = cont_obj.data data_b64 = base64.b64encode(obj_data) return dict(__ndarray__=data_b64, dtype=str(obj.dtype), shape=obj.shape) # Let the base class default method raise the TypeError return json.JSONEncoder(self, obj) def json_numpy_obj_hook(dct): """Decodes a previously encoded numpy ndarray with proper shape and dtype. :param dct: (dict) json encoded ndarray :return: (ndarray) if input was an encoded ndarray """ if isinstance(dct, dict) and '__ndarray__' in dct: data = base64.b64decode(dct['__ndarray__']) return np.frombuffer(data, dct['dtype']).reshape(dct['shape']) return dct expected = np.arange(100, dtype=np.float) dumped = json.dumps(expected, cls=NumpyEncoder) result = json.loads(dumped, object_hook=json_numpy_obj_hook) # None of the following assertions will be broken. assert result.dtype == expected.dtype, "Wrong Type" assert result.shape == expected.shape, "Wrong Shape" assert np.allclose(expected, result), "Wrong Values" 

我发现这个json子类代码用于序列化字典中的一维numpy数组。 我试了一下,它适用于我。

 class NumpyAwareJSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, numpy.ndarray) and obj.ndim == 1: return obj.tolist() return json.JSONEncoder.default(self, obj) 

我的字典是'结果'。 以下是我写入文件“data.json”的方法:

 j=json.dumps(results,cls=NumpyAwareJSONEncoder) f=open("data.json","w") f.write(j) f.close() 

这显示了如何从1D NumPy数组转换为JSON并返回到数组:

 try: import json except ImportError: import simplejson as json import numpy as np def arr2json(arr): return json.dumps(arr.tolist()) def json2arr(astr,dtype): return np.fromiter(json.loads(astr),dtype) arr=np.arange(10) astr=arr2json(arr) print(repr(astr)) # '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]' dt=np.int32 arr=json2arr(astr,dt) print(repr(arr)) # array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

build立在tlausch的答案上 ,这里是一种JSON编码NumPy数组的方法,同时保留任何NumPy数组的形状和dtype(包括具有复杂dtype的数组)。

 class NDArrayEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.ndarray): output = io.BytesIO() np.savez_compressed(output, obj=obj) return {'b64npz' : base64.b64encode(output.getvalue())} return json.JSONEncoder.default(self, obj) def ndarray_decoder(dct): if isinstance(dct, dict) and 'b64npz' in dct: output = io.BytesIO(base64.b64decode(dct['b64npz'])) output.seek(0) return np.load(output)['obj'] return dct # Make expected non-contiguous structured array: expected = np.arange(10)[::2] expected = expected.view('<i4,<f4') dumped = json.dumps(expected, cls=NDArrayEncoder) result = json.loads(dumped, object_hook=ndarray_decoder) assert result.dtype == expected.dtype, "Wrong Type" assert result.shape == expected.shape, "Wrong Shape" assert np.array_equal(expected, result), "Wrong Values" 

如果你想将Russ的方法应用于n维numpy数组,你可以试试这个

 class NumpyAwareJSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, numpy.ndarray): if obj.ndim == 1: return obj.tolist() else: return [self.default(obj[i]) for i in range(obj.shape[0])] return json.JSONEncoder.default(self, obj) 

这将简单地将n维数组转换为深度为“n”的列表列表。 为了将这样的列表转换回numpy数组, my_nparray = numpy.array(my_list)将工作,不pipe列表“深度”如何。

改进Russ的答案,我还将包括np.generic标量 :

 class NumpyAwareJSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.ndarray) and obj.ndim == 1: return obj.tolist() elif isinstance(obj, np.generic): return obj.item() return json.JSONEncoder.default(self, obj) 

你也可以通过这样的方式只用一个传入json.dumps的函数来回答这个问题:

 json.dumps(np.array([1, 2, 3]), default=json_numpy_serializer) 

 import numpy as np def json_numpy_serialzer(o): """ Serialize numpy types for json Parameters: o (object): any python object which fails to be serialized by json Example: >>> import json >>> a = np.array([1, 2, 3]) >>> json.dumps(a, default=json_numpy_serializer) """ numpy_types = ( np.bool_, # np.bytes_, -- python `bytes` class is not json serializable # np.complex64, -- python `complex` class is not json serializable # np.complex128, -- python `complex` class is not json serializable # np.complex256, -- special handling below # np.datetime64, -- python `datetime.datetime` class is not json serializable np.float16, np.float32, np.float64, # np.float128, -- special handling below np.int8, np.int16, np.int32, np.int64, # np.object_ -- should already be evaluated as python native np.str_, np.timedelta64, np.uint8, np.uint16, np.uint32, np.uint64, np.void, ) if isinstance(o, np.ndarray): return o.tolist() elif isinstance(o, numpy_types): return o.item() elif isinstance(o, np.float128): return o.astype(np.float64).item() # elif isinstance(o, np.complex256): -- no python native for np.complex256 # return o.astype(np.complex128).item() -- python `complex` class is not json serializable else: raise TypeError("{} of type {} is not JSON serializable".format(repr(o), type(o))) 

validation:

 need_addition_json_handeling = ( np.bytes_, np.complex64, np.complex128, np.complex256, np.datetime64, np.float128, ) numpy_types = tuple(set(np.typeDict.values())) for numpy_type in numpy_types: print(numpy_type) if numpy_type == np.void: # complex dtypes evaluate as np.void, eg numpy_type = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))]) elif numpy_type in need_addition_json_handeling: print('python native can not be json serialized') continue a = np.ones(1, dtype=nptype) json.dumps(a, default=json_numpy_serialzer) 

一个快速,但不是真正的最佳方式是使用pandas :

 import pandas as pd pd.Series(your_array).to_json(orient='values') 

我刚刚发现tlausch对这个问题的答案,并意识到它给出了我的问题几乎正确的答案,但至less对我来说,它不适用于Python 3.5,因为几个错误:1 – 无限recursion2 – 数据保存为None

因为我不能直接评论原来的答案,这里是我的版本:

 import base64 import json import numpy as np class NumpyEncoder(json.JSONEncoder): def default(self, obj): """If input object is an ndarray it will be converted into a dict holding dtype, shape and the data, base64 encoded. """ if isinstance(obj, np.ndarray): if obj.flags['C_CONTIGUOUS']: obj_data = obj.data else: cont_obj = np.ascontiguousarray(obj) assert(cont_obj.flags['C_CONTIGUOUS']) obj_data = cont_obj.data data_b64 = base64.b64encode(obj_data) return dict(__ndarray__= data_b64.decode('utf-8'), dtype=str(obj.dtype), shape=obj.shape) def json_numpy_obj_hook(dct): """Decodes a previously encoded numpy ndarray with proper shape and dtype. :param dct: (dict) json encoded ndarray :return: (ndarray) if input was an encoded ndarray """ if isinstance(dct, dict) and '__ndarray__' in dct: data = base64.b64decode(dct['__ndarray__']) return np.frombuffer(data, dct['dtype']).reshape(dct['shape']) return dct expected = np.arange(100, dtype=np.float) dumped = json.dumps(expected, cls=NumpyEncoder) result = json.loads(dumped, object_hook=json_numpy_obj_hook) # None of the following assertions will be broken. assert result.dtype == expected.dtype, "Wrong Type" assert result.shape == expected.shape, "Wrong Shape" assert np.allclose(expected, result), "Wrong Values"