source: main/trunk/openPLM/plmapp/csvimport.py @ 406

Revision 406, 10.6 KB checked in by pcosquer, 9 years ago (diff)

csvimport: refactor and document

Line 
1u"""
2Tools to import data from a CSV file.
3"""
4
5import re
6from abc import ABCMeta, abstractmethod
7from functools import partial
8from itertools import islice
9from collections import defaultdict
10
11from django.db import transaction
12from django.forms.util import ErrorList
13from django.utils.safestring import mark_safe
14
15from openPLM.plmapp import models
16from openPLM.plmapp.unicodecsv import UnicodeReader
17from openPLM.plmapp.controllers.plmobject import PLMObjectController
18
19
20# function that replace spaces by an underscore
21_to_underscore = partial(re.compile(r"\s+").sub, "_")
22
23class CSVImportError(StandardError):
24    """
25    Exception raised when an import of a CSV file fails.
26
27    .. attribute: errors
28
29        dictionary (line -> :class:`~django.forms.util.ErrorList`) of all
30        detected errors.
31    """
32
33    def __init__(self, errors):
34        self.errors = errors
35
36    def __unicode__(self):
37        details = self.errors.as_text()
38        return u"CSVImportError:\n\t" + details
39
40class Preview(object):
41    u"""
42    Preview of a CSV file.
43
44    :param csv_file: the csv file being parsed
45    :type csv_file: a file like object
46    :param encoding: encoding of the file (`utf-8`, `ascii`, etc.)
47    :param known_headers: collection of headers that may be valid
48
49    .. attribute:: headers
50
51        headers of the CSV file
52    .. attribute:: guessed_headers
53
54        headers translated according to *known_headers*, an header that can
55        not be translated is replaced by `None`
56    .. attribute:: rows
57
58        first non-headers rows of the file (at most two rows)
59    """
60
61    def __init__(self, csv_file, encoding, known_headers):
62        reader = UnicodeReader(csv_file, encoding=encoding)
63        self.headers = reader.next()
64        self.guessed_headers = self._guess_headers(known_headers)
65        self.rows = tuple(islice(reader, 2))
66
67    def _guess_headers(self, known_headers):
68        headers = []
69        for header in self.headers:
70            h = _to_underscore(header.lower())
71            if h in known_headers:
72                headers.append(h)
73            else:
74                headers.append(None)
75        return headers
76
77class CSVImporter(object):
78    """
79    Abstract class to import data from a CSV file.
80
81    :param csv_file: file being imported
82    :type csv_file: a file like object
83    :param user: user who imports the file
84    :type user: :class:`~django.contrib.auth.models.User`
85    :param encoding: encoding of the file (`utf-8`, `ascii`, etc.)
86   
87    For "end users", this class has two useful methods:
88       
89        * :meth:`get_preview` to generate a :class:`Preview` of the file
90        * :meth:`import_csv` to import the csv file
91   
92    An implementation must overwrite the methods :meth:`get_headers_set` and
93    :meth:`parse_row` and redefine the attribute :attr:`REQUIRED_HEADERS`.
94    """
95
96    __metaclass__ = ABCMeta
97
98    #: Headers that must be present in the csv file
99    REQUIRED_HEADERS = ()
100
101    def __init__(self, csv_file, user, encoding="utf-8"):
102        self.csv_file = csv_file
103        self.user = user
104        self.encoding = encoding
105
106    @classmethod
107    @abstractmethod
108    def get_headers_set(cls):
109        """
110        Returns a set of all possible headers.
111
112        .. note::
113
114            This method is abstract and must be implemented.
115        """
116        return set()
117
118    @classmethod
119    def get_headers(cls):
120        """
121        Returns a sorted list of all possible headers.
122        """
123        headers = [None]
124        headers.extend(sorted(cls.get_headers_set()))
125        return headers
126
127    @classmethod
128    def get_missing_headers_msg(cls):
129        """
130        Returns a message explaining which headers are required.
131        """
132        headers = ", ".join(cls.REQUIRED_HEADERS)
133        return u"Missing headers: %s are required." % headers
134
135    def get_preview(self):
136        """
137        Returns a :class:`Preview` of the csv file.
138        """
139        self.csv_file.seek(0)
140        return Preview(self.csv_file, self.encoding, self.get_headers_set())
141
142    @transaction.commit_on_success
143    def import_csv(self, headers):
144        """
145        Imports the csv file. *headers* is the list of headers as given by the
146        user. Columns whose header is `None` are ignored.
147        *headers* must contains all values of :attr:`REQUIRED_HEADERS`.
148
149        If one or several errors occur (missing headers, row which can not be
150        parsed), a :exc:`CSVImportError` is raised with all detected errors.
151
152        :return: A list of :class:`.PLMObjectController` of all created objects.
153        """
154        self.csv_file.seek(0)
155        reader = UnicodeReader(self.csv_file, encoding=self.encoding)
156        self.headers_dict = dict((h, i) for i, h in enumerate(headers))
157        # checks that required columns are presents
158        for field in self.REQUIRED_HEADERS:
159            if field not in self.headers_dict:
160                raise CSVImportError({1: self.get_missing_headers_msg()})
161        # read the header
162        reader.next()
163        self._errors = defaultdict(ErrorList)
164        self.objects = []
165        # parse each row
166        for line, row in enumerate(reader):
167            try:
168                self.parse_row(line + 2, row)
169            except Exception, e:
170                self.store_errors(line + 2, e)
171        if self._errors:
172            raise CSVImportError(self._errors)
173        for obj in self.objects:
174            obj.unblock_mails()
175        return self.objects
176
177    def store_errors(self, line, *errors):
178        """
179        Appends *errors* to the list of errors which occurs at the line *line*.
180        """
181        for e in errors:
182            if isinstance(e, Exception):
183                e = unicode(e)
184            self._errors[line].append(e)
185   
186    def get_value(self, row, header):
187        return row[self.headers_dict[header]]
188
189    def get_values(self, row, *headers):
190        return [self.get_value(row, h) for h in headers]
191
192    @abstractmethod
193    def parse_row(self, line, row):
194        """
195        Method called by :meth:`import_csv` for each row.
196
197        :param line: line number of current row, useful to store a list of
198                     errors
199        :type line: int
200        :param row: row being parsed.
201        :type row: list of unicode strings.
202
203        This method must be overwritten. Implementation can use the methods
204        :meth:`get_value`, :meth:`get_values`, and :meth:`store_errors` to
205        retrieve values and store detected errors.
206
207        .. warning::
208
209            All :class:`.Controller` created should not send emails since an
210            error may occur and thus, all modifications would be cancelled.
211            To block mails, call :meth:`.Controller.block_mails`. You can
212            released all blocked mails by appending the controller to
213            :attr:`objects`. :meth:`import_csv` will send mails if no errors
214            occurred.
215
216            Example::
217
218                ctrl = get_obj(type, reference, revision, user)
219                ctrl.block_mails()
220                ...
221                if ok:
222                    self.objects.append(ctrl)
223        """
224        pass
225
226class PLMObjectsImporter(CSVImporter):
227    """
228    An :class:`CSVImporter` that creates :class:`PLMObject` from
229    a csv file.
230
231    The CSV must contain the following columns:
232
233        * type
234        * reference
235        * revision
236        * name
237        * group (name of the group, not its id)
238        * lifecycle (name of the lifecycle, not its id)
239
240    Moreover, it must have a column for each required field of defined types.
241    """
242
243    #: Headers that must be present in the csv file
244    REQUIRED_HEADERS = ("type", "reference", "revision", "name", "group", "lifecycle")
245
246    @classmethod
247    def get_headers_set(cls):
248        """
249        Returns a set of all possible headers.
250        """
251        return set().union(*(cls.get_creation_fields()
252            for cls in models.get_all_plmobjects().itervalues()))
253
254
255    def parse_row(self, line, row):
256        """
257        Method called by :meth:`import_csv` for each row.
258        """
259        from openPLM.plmapp.forms import get_creation_form
260        type_, reference, revision = self.get_values(row, "type", "reference",
261            "revision")
262        cls = models.get_all_plmobjects()[type_]
263        group = models.GroupInfo.objects.get(name=self.get_value(row, "group"))
264        lifecycle = models.Lifecycle.objects.get(name=self.get_value(row, "lifecycle"))
265        form = get_creation_form(self.user, cls)
266        data = {
267                "type" : type_,
268                "group" : str(group.id),
269                "reference" : reference,
270                "revision" : revision,
271                }
272        for field in form.fields:
273            if field not in data and field in self.headers_dict:
274                data[field] = self.get_value(row, field)
275        form = get_creation_form(self.user, cls, data)
276        if not form.is_valid():
277            items = (mark_safe(u"%s: %s" % item) for item
278                    in form.errors.iteritems())
279            self.store_errors(line, *items)
280        else:
281            obj = PLMObjectController.create_from_form(form, self.user, True)
282            self.objects.append(obj)
283
284
285class BOMImporter(CSVImporter):
286    """
287    A :class:`CSVImporter` that builds a bom from a CSV file.
288
289    The CSV must contain the following columns:
290
291        * parent-type
292        * parent-reference
293        * parent-revision
294        * child-type
295        * child-reference
296        * child-revision
297        * quantity
298        * order
299    """
300
301    REQUIRED_HEADERS = ("parent-type", "parent-reference", "parent-revision",
302                        "child-type", "child-reference", "child-revision",
303                        "quantity", "order")
304
305    HEADERS_SET = set(REQUIRED_HEADERS)
306
307    @classmethod
308    def get_headers_set(cls):
309        return cls.HEADERS_SET
310   
311    def parse_row(self, line, row):
312        from openPLM.plmapp.base_views import get_obj
313        ptype, preference, prevision = self.get_values(row,
314                *["parent-" + h for h in ("type", "reference", "revision")])
315        parent = get_obj(ptype, preference, prevision, self.user)
316
317        ctype, creference, crevision = self.get_values(row,
318                *["child-" + h for h in ("type", "reference", "revision")])
319        child = get_obj(ctype, creference, crevision, self.user)
320
321        parent.block_mails()
322        child.block_mails()
323        self.objects.append(parent)
324        self.objects.append(child)
325
326        qty = self.get_value(row, "quantity").replace(",", ".").replace(" ", "")
327        quantity = float(qty)
328        order = int(self.get_value(row, "order").replace(" ", ""))
329
330        parent.add_child(child, quantity, order)
331   
332#: Dictionary (name -> CSVImporter's subclass) of known :class:`CSVImporter`
333IMPORTERS = {"csv" : PLMObjectsImporter, "bom" : BOMImporter }
334
Note: See TracBrowser for help on using the repository browser.