www.zhblog.net

python csv 模块读取

在python中读取 .csv 文件数据,使用标准的 csv 模块。

例如:股票市场数据

Symbol,Price,Date,Time,Change,Volume

"AA",39.48,"6/11/2007","9:36am",-0.18,181800

"AIG",71.38,"6/11/2007","9:36am",-0.15,195500

"AXP",62.58,"6/11/2007","9:36am",-0.46,935000

"BA",98.31,"6/11/2007","9:36am",+0.12,104800

"C",53.08,"6/11/2007","9:36am",-0.25,360900

"CAT",78.29,"6/11/2007","9:36am",-0.23,225400


每行数据读取为一个列表

>>> with open(r'e:\stocks.csv') as f:
	f_csv = csv.reader(f)
	headers = next(f_csv)
	print(headers)
	for row in f_csv:
		print(row)

		
['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
['AA', '39.48', '6/11/2007', '9:36am', '-0.18', '181800']
['AIG', '71.38', '6/11/2007', '9:36am', '-0.15', '195500']
['AXP', '62.58', '6/11/2007', '9:36am', '-0.46', '935000']
['BA', '98.31', '6/11/2007', '9:36am', '+0.12', '104800']
['C', '53.08', '6/11/2007', '9:36am', '-0.25', '360900']
['CAT', '78.29', '6/11/2007', '9:36am', '-0.23', '225400']


使用 namedtuple 代替列表索引访问:row[0]、row[4]

>>> from collections import namedtuple
>>> import csv
>>> with open(r'e:\stocks.csv') as f:
	f_csv = csv.reader(f)
	headings = next(f_csv)
	Row = namedtuple('Row', headings)
	for r in f_csv:
		row = Row(*r)
		print(row.Symbol, row.Price)

		
AA 39.48
AIG 71.38
AXP 62.58
BA 98.31
C 53.08
CAT 78.29


每行数据读取为一个字典

>>> import csv
>>> with open(r'e:\stocks.csv') as f:
	f_csv = csv.DictReader(f)
	for row in f_csv:
		print(row)

		
OrderedDict([('Symbol', 'AA'), ('Price', '39.48'), ('Date', '6/11/2007'), ('Time', '9:36am'), ('Change', '-0.18'), ('Volume', '181800')])
OrderedDict([('Symbol', 'AIG'), ('Price', '71.38'), ('Date', '6/11/2007'), ('Time', '9:36am'), ('Change', '-0.15'), ('Volume', '195500')])
OrderedDict([('Symbol', 'AXP'), ('Price', '62.58'), ('Date', '6/11/2007'), ('Time', '9:36am'), ('Change', '-0.46'), ('Volume', '935000')])
OrderedDict([('Symbol', 'BA'), ('Price', '98.31'), ('Date', '6/11/2007'), ('Time', '9:36am'), ('Change', '+0.12'), ('Volume', '104800')])
OrderedDict([('Symbol', 'C'), ('Price', '53.08'), ('Date', '6/11/2007'), ('Time', '9:36am'), ('Change', '-0.25'), ('Volume', '360900')])
OrderedDict([('Symbol', 'CAT'), ('Price', '78.29'), ('Date', '6/11/2007'), ('Time', '9:36am'), ('Change', '-0.23'), ('Volume', '225400')])


此时可以用字典的方式访问数据:row['Symbol']、row['Price']



使用 csv 模块将列表数据写入 csv 文件

>>> headers = ['Symbol','Price','Date','Time','Change','Volume']
>>> rows = [('AA', 39.48, '6/11/2007', '9:36am', -0.18, 181800),
	('AIG', 71.38, '6/11/2007', '9:36am', -0.15, 195500),
	('AXP', 62.58, '6/11/2007', '9:36am', -0.46, 935000),
	]
>>> with open(r'e:\stocks.csv', 'w') as f:
	f_csv = csv.writer(f)
	f_csv.writerow(headers)
	f_csv.writerows(rows)

	
38


使用 csv 模块将字典数据写入 csv 文件

>>> headers = ['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
>>> rows = [{'Symbol':'AA', 'Price':39.48, 'Date':'6/11/2007',
		'Time':'9:36am', 'Change':-0.18, 'Volume':181800},
	    {'Symbol':'AIG', 'Price': 71.38, 'Date':'6/11/2007',
		'Time':'9:36am', 'Change':-0.15, 'Volume': 195500},
	    {'Symbol':'AXP', 'Price': 62.58, 'Date':'6/11/2007',
		'Time':'9:36am', 'Change':-0.46, 'Volume': 935000},
	]
>>> with open(r'e:\stocks.csv', 'w') as f:
	f_csv = csv.DictWriter(f, headers)
	f_csv.writeheader()
	f_csv.writerows(rows)



不要手动写csv文件解析代码:row.split(','),因为文本中可能本身就带有标点符号。而 csv 模块支持大多Microsoft Excel规则。


csv数据不同分隔符的读取

# 将数据转换成 \t 分隔
>>> with open(r'e:\stocks.csv', 'w', newline='') as f:
	f_csv = csv.writer(f, delimiter='\t')
	f_csv.writerow(headers)
	f_csv.writerows(rows)

	
38
# 此时以 \t 分隔符读取
>>> with open(r'e:\stocks.csv') as f:
	f_csv = csv.reader(f, delimiter='\t')
	for row in f_csv:
		print(row)

		
['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
['AA', '39.48', '6/11/2007', '9:36am', '-0.18', '181800']
['AIG', '71.38', '6/11/2007', '9:36am', '-0.15', '195500']
['AXP', '62.58', '6/11/2007', '9:36am', '-0.46', '935000']


当使用 namedtuple 时需要注意不规则字段名称

Street Address,Num-Premises,Latitude,Longitude

5412 N CLARK,10,41.980262,-87.668452

>>> with open(r'e:\stocks.csv') as f:
	f_csv = csv.reader(f)
	headers = next(f_csv)
	for row in f_csv:
		print(row)

		
['5412 N CLARK', '10', '41.980262', '-87.668452']

>>> with open(r'e:\stocks.csv') as f:
	f_csv = csv.reader(f)
	headers = next(f_csv)
	Row = namedtuple('Row', headers)
	for row in f_csv:
		r = Row(*row)
		print(r)

		
Traceback (most recent call last):
  File "<pyshell#181>", line 4, in <module>
    Row = namedtuple('Row', headers)
  File "D:\work\Python\Python36\lib\collections\__init__.py", line 401, in namedtuple
    'identifiers: %r' % name)
ValueError: Type names and field names must be valid identifiers: 'Street Address'


此时需要对字段名称进行处理

>>> import re
>>> with open(r'e:\stocks.csv') as f:
	f_csv = csv.reader(f)
	headers = [re.sub(r'[^a-zA-Z_]', '_', h) for h in next(f_csv)]
	Row = namedtuple('Row', headers)
	for row in f_csv:
		r = Row(*row)
		print(r)

		
Row(Street_Address='5412 N CLARK', Num_Premises='10', Latitude='41.980262', Longitude='-87.668452')


csv模块转换数据通常都是字符串类型,可以在自己的处理逻辑进行转换处理。如果转换是很重要的,可以对应起来转换

>>> col_types = [str, float, str, str, float, int]
>>> with open(r'e:\stocks.csv') as f:
	f_csv = csv.reader(f, delimiter='\t')
	headers = next(f_csv)
	for row in f_csv:
		row = [convert(value) for convert, value in zip(col_types, row)]
		print(row)

		
['AA', 39.48, '6/11/2007', '9:36am', -0.18, 181800]
['AIG', 71.38, '6/11/2007', '9:36am', -0.15, 195500]
['AXP', 62.58, '6/11/2007', '9:36am', -0.46, 935000]


转换部分需要的字段

>>> field_types = [('Price', float), ('Change', float), ('Volume', int)]
>>> with open(r'e:\stocks.csv') as f:
	for row in csv.DictReader(f, delimiter='\t'):
		row.update((key, conversion(row[key])) for key, conversion in field_types)
		print(row)

		
OrderedDict([('Symbol', 'AA'), ('Price', 39.48), ('Date', '6/11/2007'), ('Time', '9:36am'), ('Change', -0.18), ('Volume', 181800)])
OrderedDict([('Symbol', 'AIG'), ('Price', 71.38), ('Date', '6/11/2007'), ('Time', '9:36am'), ('Change', -0.15), ('Volume', 195500)])
OrderedDict([('Symbol', 'AXP'), ('Price', 62.58), ('Date', '6/11/2007'), ('Time', '9:36am'), ('Change', -0.46), ('Volume', 935000)])

展开阅读全文

评论

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 心情