在Python中,如何将YAML映射加载为OrderedDicts?

我想要PyYAML的加载器来加载映射(和有序映射)到Python 2.7+ OrderedDicttypes,而不是香草dict和它当前使用的对列表。

什么是最好的方式来做到这一点?

我喜欢@詹姆斯的解决scheme,因为它的简单。 但是,它会更改默认的全局yaml.Loader类,这会导致麻烦的副作用。 特别是在编写库代码时,这是一个坏主意。 另外,它不直接与yaml.safe_load()

幸运的是,这个解决scheme可以在不需要太多努力的情况

 import yaml from collections import OrderedDict def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict): class OrderedLoader(Loader): pass def construct_mapping(loader, node): loader.flatten_mapping(node) return object_pairs_hook(loader.construct_pairs(node)) OrderedLoader.add_constructor( yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping) return yaml.load(stream, OrderedLoader) # usage example: ordered_load(stream, yaml.SafeLoader) 

对于序列化,我不知道一个明显的概括,但至less这不应该有任何副作用:

 def ordered_dump(data, stream=None, Dumper=yaml.Dumper, **kwds): class OrderedDumper(Dumper): pass def _dict_representer(dumper, data): return dumper.represent_mapping( yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items()) OrderedDumper.add_representer(OrderedDict, _dict_representer) return yaml.dump(data, stream, OrderedDumper, **kwds) # usage: ordered_dump(data, Dumper=yaml.SafeDumper) 

yaml模块允许您指定自定义的“表示者”来将Python对象转换为文本和“构造函数”来反转进程。

 _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG def dict_representer(dumper, data): return dumper.represent_dict(data.iteritems()) def dict_constructor(loader, node): return collections.OrderedDict(loader.construct_pairs(node)) yaml.add_representer(collections.OrderedDict, dict_representer) yaml.add_constructor(_mapping_tag, dict_constructor) 

我非常怀疑这是做这件事的最好方法,但这是我想出来的方式,而且确实有效。 也可作为要点 。

 import yaml import yaml.constructor try: # included in standard lib from Python 2.7 from collections import OrderedDict except ImportError: # try importing the backported drop-in replacement # it's available on PyPI from ordereddict import OrderedDict class OrderedDictYAMLLoader(yaml.Loader): """ A YAML loader that loads mappings into ordered dictionaries. """ def __init__(self, *args, **kwargs): yaml.Loader.__init__(self, *args, **kwargs) self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map) self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map) def construct_yaml_map(self, node): data = OrderedDict() yield data value = self.construct_mapping(node) data.update(value) def construct_mapping(self, node, deep=False): if isinstance(node, yaml.MappingNode): self.flatten_mapping(node) else: raise yaml.constructor.ConstructorError(None, None, 'expected a mapping node, but found %s' % node.id, node.start_mark) mapping = OrderedDict() for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) try: hash(key) except TypeError, exc: raise yaml.constructor.ConstructorError('while constructing a mapping', node.start_mark, 'found unacceptable key (%s)' % exc, key_node.start_mark) value = self.construct_object(value_node, deep=deep) mapping[key] = value return mapping 

规范说明了sorting是不能保证的,但是当然在YAML文件中有sorting,适当的parsing器可以保持这一点,并透明地生成一个保持sorting的对象。 你只需要select正确的parsing器,加载器和自卸车¹:

 import sys import ruamel.yaml as yaml yaml_str = """\ 3: abc conf: 10: def 3: gij # h is missing more: - what - else """ data = yaml.load(yaml_str, Loader=yaml.RoundTripLoader) data['conf'][10] = 'klm' data['conf'][3] = 'jig' yaml.dump(data, sys.stdout, Dumper=yaml.RoundTripDumper) 

会给你:

 3: abc conf: 10: klm 3: jig # h is missing more: - what - else 

数据的types是CommentedMap,其function类似于字典,但是有额外的信息被保留,直到被转储(包括保留的注释!)。

¹ 这是使用ruamel.yaml完成的,我是作者。 它是PyYAML的一个分支和超集。

我刚刚find了一个基于这个问题的答案创build的Python库( https://pypi.python.org/pypi/yamlordereddictloader/0.1.1 ),使用非常简单:

 import yaml import yamlordereddictloader datas = yaml.load(open('myfile.yml'), Loader=yamlordereddictloader.Loader) 

在我的PyYaml安装Python 2.7中,我更新了__init__.py,constructor.py和loader.py。 现在支持加载命令的object_pairs_hook选项。 我做的变化的差异如下。

 __init__.py $ diff __init__.py Original 64c64 < def load(stream, Loader=Loader, **kwds): --- > def load(stream, Loader=Loader): 69c69 < loader = Loader(stream, **kwds) --- > loader = Loader(stream) 75c75 < def load_all(stream, Loader=Loader, **kwds): --- > def load_all(stream, Loader=Loader): 80c80 < loader = Loader(stream, **kwds) --- > loader = Loader(stream) constructor.py $ diff constructor.py Original 20,21c20 < def __init__(self, object_pairs_hook=dict): < self.object_pairs_hook = object_pairs_hook --- > def __init__(self): 27,29d25 < def create_object_hook(self): < return self.object_pairs_hook() < 54,55c50,51 < self.constructed_objects = self.create_object_hook() < self.recursive_objects = self.create_object_hook() --- > self.constructed_objects = {} > self.recursive_objects = {} 129c125 < mapping = self.create_object_hook() --- > mapping = {} 400c396 < data = self.create_object_hook() --- > data = {} 595c591 < dictitems = self.create_object_hook() --- > dictitems = {} 602c598 < dictitems = value.get('dictitems', self.create_object_hook()) --- > dictitems = value.get('dictitems', {}) loader.py $ diff loader.py Original 13c13 < def __init__(self, stream, **constructKwds): --- > def __init__(self, stream): 18c18 < BaseConstructor.__init__(self, **constructKwds) --- > BaseConstructor.__init__(self) 23c23 < def __init__(self, stream, **constructKwds): --- > def __init__(self, stream): 28c28 < SafeConstructor.__init__(self, **constructKwds) --- > SafeConstructor.__init__(self) 33c33 < def __init__(self, stream, **constructKwds): --- > def __init__(self, stream): 38c38 < Constructor.__init__(self, **constructKwds) --- > Constructor.__init__(self) 

有一个关于5年前开放的PyYAML门票 。 它包含一些相关的链接,包括这个问题的链接:)我个人抓住了要点317164,并修改它有点使用来自Python 2.7 OrderedDict ,而不是包含的实现(刚刚from collections import OrderedDict取代了类)。

这里有一个简单的解决scheme,可以检查地图中重复的顶级密钥。

 import yaml import re from collections import OrderedDict def yaml_load_od(fname): "load a yaml file as an OrderedDict" # detects any duped keys (fail on this) and preserves order of top level keys with open(fname, 'r') as f: lines = open(fname, "r").read().splitlines() top_keys = [] duped_keys = [] for line in lines: m = re.search(r'^([A-Za-z0-9_]+) *:', line) if m: if m.group(1) in top_keys: duped_keys.append(m.group(1)) else: top_keys.append(m.group(1)) if duped_keys: raise Exception('ERROR: duplicate keys: {}'.format(duped_keys)) # 2nd pass to set up the OrderedDict with open(fname, 'r') as f: d_tmp = yaml.load(f) return OrderedDict([(key, d_tmp[key]) for key in top_keys])