60
|
1 #!/usr/bin/python |
|
2 # -*- coding: utf-8 -*- |
|
3 # |
|
4 # CSV Reader and Writer |
|
5 # |
|
6 # This is a copy paste of the examples given in Python docs: |
|
7 # https://docs.python.org/2/library/csv.html |
|
8 |
|
9 import csv, codecs, cStringIO |
|
10 |
|
11 class UTF8Recoder: |
|
12 """ |
|
13 Iterator that reads an encoded stream and reencodes the input to UTF-8 |
|
14 """ |
|
15 def __init__(self, f, encoding): |
|
16 self.reader = codecs.getreader(encoding)(f) |
|
17 |
|
18 def __iter__(self): |
|
19 return self |
|
20 |
|
21 def next(self): |
|
22 return self.reader.next().encode("utf-8") |
|
23 |
|
24 class UnicodeReader: |
|
25 """ |
|
26 A CSV reader which will iterate over lines in the CSV file "f", |
|
27 which is encoded in the given encoding. |
|
28 """ |
|
29 |
|
30 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): |
|
31 f = UTF8Recoder(f, encoding) |
|
32 self.reader = csv.reader(f, dialect=dialect, **kwds) |
|
33 |
|
34 def next(self): |
|
35 row = self.reader.next() |
|
36 return [unicode(s, "utf-8") for s in row] |
|
37 |
|
38 def __iter__(self): |
|
39 return self |
|
40 |
|
41 class UnicodeWriter: |
|
42 """ |
|
43 A CSV writer which will write rows to CSV file "f", |
|
44 which is encoded in the given encoding. |
|
45 """ |
|
46 |
|
47 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): |
|
48 # Redirect output to a queue |
|
49 self.queue = cStringIO.StringIO() |
|
50 self.writer = csv.writer(self.queue, dialect=dialect, **kwds) |
|
51 self.stream = f |
|
52 self.encoder = codecs.getincrementalencoder(encoding)() |
|
53 |
|
54 def writerow(self, row): |
|
55 self.writer.writerow([s.encode("utf-8") for s in row]) |
|
56 # Fetch UTF-8 output from the queue ... |
|
57 data = self.queue.getvalue() |
|
58 data = data.decode("utf-8") |
|
59 # ... and reencode it into the target encoding |
|
60 data = self.encoder.encode(data) |
|
61 # write to the target stream |
|
62 self.stream.write(data) |
|
63 # empty queue |
|
64 self.queue.truncate(0) |
|
65 |
|
66 def writerows(self, rows): |
|
67 for row in rows: |
|
68 self.writerow(row) |