annotate sat_website/unicodecsv.py @ 67:ae46991a3489

English correction (typos and improvements) by Florian and Rachelle
author souliane <souliane@mailoo.org>
date Tue, 26 May 2015 21:23:56 +0200
parents 0d20fb28c32e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
60
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
1 #!/usr/bin/python
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
3 #
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
4 # CSV Reader and Writer
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
5 #
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
6 # This is a copy paste of the examples given in Python docs:
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
7 # https://docs.python.org/2/library/csv.html
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
8
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
9 import csv, codecs, cStringIO
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
10
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
11 class UTF8Recoder:
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
12 """
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
13 Iterator that reads an encoded stream and reencodes the input to UTF-8
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
14 """
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
15 def __init__(self, f, encoding):
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
16 self.reader = codecs.getreader(encoding)(f)
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
17
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
18 def __iter__(self):
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
19 return self
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
20
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
21 def next(self):
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
22 return self.reader.next().encode("utf-8")
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
23
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
24 class UnicodeReader:
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
25 """
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
26 A CSV reader which will iterate over lines in the CSV file "f",
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
27 which is encoded in the given encoding.
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
28 """
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
29
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
30 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
31 f = UTF8Recoder(f, encoding)
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
32 self.reader = csv.reader(f, dialect=dialect, **kwds)
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
33
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
34 def next(self):
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
35 row = self.reader.next()
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
36 return [unicode(s, "utf-8") for s in row]
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
37
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
38 def __iter__(self):
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
39 return self
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
40
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
41 class UnicodeWriter:
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
42 """
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
43 A CSV writer which will write rows to CSV file "f",
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
44 which is encoded in the given encoding.
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
45 """
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
46
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
47 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
48 # Redirect output to a queue
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
49 self.queue = cStringIO.StringIO()
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
50 self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
51 self.stream = f
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
52 self.encoder = codecs.getincrementalencoder(encoding)()
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
53
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
54 def writerow(self, row):
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
55 self.writer.writerow([s.encode("utf-8") for s in row])
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
56 # Fetch UTF-8 output from the queue ...
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
57 data = self.queue.getvalue()
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
58 data = data.decode("utf-8")
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
59 # ... and reencode it into the target encoding
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
60 data = self.encoder.encode(data)
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
61 # write to the target stream
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
62 self.stream.write(data)
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
63 # empty queue
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
64 self.queue.truncate(0)
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
65
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
66 def writerows(self, rows):
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
67 for row in rows:
0d20fb28c32e many small changes:
souliane <souliane@mailoo.org>
parents:
diff changeset
68 self.writerow(row)