使用Python 2.7读取和写入CSV文件，包括unicode

我是Python的新手，我有一个关于如何使用Python来读取和写入CSV文件的问题。我的文件包含像德国，法国等。根据我的代码，这些文件可以在Python中正确读取，但是当我把它写入一个新的CSV文件时，Unicode变成一些奇怪的字符。

数据如下所示：在这里输入图像描述

而我的代码是：

import csv f=open('xxx.csv','rb') reader=csv.reader(f) wt=open('lll.csv','wb') writer=csv.writer(wt,quoting=csv.QUOTE_ALL) wt.close() f.close()

其结果是：在这里输入图像描述

你能告诉我该怎么办才能解决这个问题吗？非常感谢你！

确保你编码和解码适当。

这个例子将UTF-8中的一些示例文本循环到一个csv文件，然后返回来演示：

 # -*- coding: utf-8 -*- import csv tests={'German': [u'Straße',u'auslösen',u'zerstören'], 'French': [u'français',u'américaine',u'épais'], 'Chinese': [u'中國的',u'英語',u'美國人']} with open('/tmp/utf.csv','w') as fout: writer=csv.writer(fout) writer.writerows([tests.keys()]) for row in zip(*tests.values()): row=[s.encode('utf-8') for s in row] writer.writerows([row]) with open('/tmp/utf.csv','r') as fin: reader=csv.reader(fin) for row in reader: temp=list(row) fmt=u'{:<15}'*len(temp) print fmt.format(*[s.decode('utf-8') for s in temp])

打印：

 German Chinese French Straße 中國的 français auslösen 英語 américaine zerstören 美國人 épais

另一种select：

使用unicodecsv软件包中的代码…

https://pypi.python.org/pypi/unicodecsv/

 >>> import unicodecsv as csv >>> from io import BytesIO >>> f = BytesIO() >>> w = csv.writer(f, encoding='utf-8') >>> _ = w.writerow((u'é', u'ñ')) >>> _ = f.seek(0) >>> r = csv.reader(f, encoding='utf-8') >>> next(r) == [u'é', u'ñ'] True

该模块与STDLIB csv模块API兼容。

在csv模块文档的末尾有一个例子，演示了如何处理Unicode。下面是从该示例直接复制。请注意，读取或写入的string将是Unicodestring。例如，不要将字节string传递给UnicodeWriter.writerows 。

 import csv,codecs,cStringIO class UTF8Recoder: def __init__(self, f, encoding): self.reader = codecs.getreader(encoding)(f) def __iter__(self): return self def next(self): return self.reader.next().encode("utf-8") class UnicodeReader: def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds): f = UTF8Recoder(f, encoding) self.reader = csv.reader(f, dialect=dialect, **kwds) def next(self): '''next() -> unicode This function reads and returns the next line as a Unicode string. ''' row = self.reader.next() return [unicode(s, "utf-8") for s in row] def __iter__(self): return self class UnicodeWriter: def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds): self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() def writerow(self, row): '''writerow(unicode) -> None This function takes a Unicode string and encodes it to the output. ''' self.writer.writerow([s.encode("utf-8") for s in row]) data = self.queue.getvalue() data = data.decode("utf-8") data = self.encoder.encode(data) self.stream.write(data) self.queue.truncate(0) def writerows(self, rows): for row in rows: self.writerow(row) with open('xxx.csv','rb') as fin, open('lll.csv','wb') as fout: reader = UnicodeReader(fin) writer = UnicodeWriter(fout,quoting=csv.QUOTE_ALL) for line in reader: writer.writerow(line)

input（UTF-8编码）：

 American,美国人French,法国人German,德国人

输出：

 "American","美国人" "French","法国人" "German","德国人"

我有同样的问题。答案是你已经做对了。这是MS Excel的问题。尝试用另一个编辑器打开文件，你会发现你的编码已经成功。为了使MS Excel高兴，从UTF-8移到UTF-16。这应该工作：

 class UnicodeWriter: def __init__(self, f, dialect=csv.excel_tab, encoding="utf-16", **kwds): # Redirect output to a queue self.queue = StringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f # Force BOM if encoding=="utf-16": import codecs f.write(codecs.BOM_UTF16) self.encoding = encoding def writerow(self, row): # Modified from original: now using unicode(s) to deal with eg ints self.writer.writerow([unicode(s).encode("utf-8") for s in row]) # Fetch UTF-8 output from the queue ... data = self.queue.getvalue() data = data.decode("utf-8") # ... and reencode it into the target encoding data = data.encode(self.encoding) # strip BOM if self.encoding == "utf-16": data = data[2:] # write to the target stream self.stream.write(data) # empty queue self.queue.truncate(0) def writerows(self, rows): for row in rows: self.writerow(row)

我不能回应上面的Mark，但是我只做了一个修改，修正了如果单元格中的数据不是unicode（即float或int数据）所导致的错误。我把这一行换成了UnicodeWriter函数：“self.writer.writerow（[s.encode（”utf-8“）if type（s）== types.UnicodeType else s for s in row]）” ：

 class UnicodeWriter: def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds): self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() def writerow(self, row): '''writerow(unicode) -> None This function takes a Unicode string and encodes it to the output. ''' self.writer.writerow([s.encode("utf-8") if type(s)==types.UnicodeType else s for s in row]) data = self.queue.getvalue() data = data.decode("utf-8") data = self.encoder.encode(data) self.stream.write(data) self.queue.truncate(0) def writerows(self, rows): for row in rows: self.writerow(row)

您还需要“导入types”。

使用Python 2.7读取和写入CSV文件，包括unicode

没有指定标题映射，logging值不能被名称访问（Apache Commons CSV）

是否有可能查询逗号分隔列的特定值？

如何创build和从PHP脚本下载CSV文件？

是否可以强制Excel自动识别UTF-8 CSV文件？

如何在Ruby中创build新的CSV文件？

Excel能否将我的CSV中的URL解释为超链接？

如何从powershell中的csv文件循环一行

LOAD DATA LOCAL，如何跳过第一行？

如何用csv.DictWriter编写标题行？

如何在CSV文件中编写UTF-8