source: main/trunk/openPLM/plmapp/csvimport.py @ 471

Revision 471, 11.4 KB checked in by pcosquer, 9 years ago (diff)

csv: work on #46
this commit enables mails sending/index updating after the database commit
a new task (update_indexes) is available to update several indexes at once

Line 
1u"""
2Tools to import data from a CSV file.
3"""
4
5import re
6from abc import ABCMeta, abstractmethod
7from functools import partial
8from itertools import islice
9from collections import defaultdict
10
11from django.db import transaction
12from django.forms.util import ErrorList
13from django.utils.safestring import mark_safe
14
15from openPLM.plmapp import models
16from openPLM.plmapp.unicodecsv import UnicodeReader
17from openPLM.plmapp.controllers.plmobject import PLMObjectController
18from openPLM.plmapp.tasks import update_indexes
19
20
21# function that replace spaces by an underscore
22_to_underscore = partial(re.compile(r"\s+").sub, "_")
23
24class CSVImportError(StandardError):
25    """
26    Exception raised when an import of a CSV file fails.
27
28    .. attribute: errors
29
30        dictionary (line -> :class:`~django.forms.util.ErrorList`) of all
31        detected errors.
32    """
33
34    def __init__(self, errors):
35        self.errors = errors
36
37    def __unicode__(self):
38        details = self.errors.as_text()
39        return u"CSVImportError:\n\t" + details
40
41class Preview(object):
42    u"""
43    Preview of a CSV file.
44
45    :param csv_file: the csv file being parsed
46    :type csv_file: a file like object
47    :param encoding: encoding of the file (`utf-8`, `ascii`, etc.)
48    :param known_headers: collection of headers that may be valid
49
50    .. attribute:: headers
51
52        headers of the CSV file
53    .. attribute:: guessed_headers
54
55        headers translated according to *known_headers*, an header that can
56        not be translated is replaced by `None`
57    .. attribute:: rows
58
59        first non-headers rows of the file (at most two rows)
60    """
61
62    def __init__(self, csv_file, encoding, known_headers):
63        reader = UnicodeReader(csv_file, encoding=encoding)
64        self.headers = reader.next()
65        self.guessed_headers = self._guess_headers(known_headers)
66        self.rows = tuple(islice(reader, 2))
67
68    def _guess_headers(self, known_headers):
69        headers = []
70        for header in self.headers:
71            h = _to_underscore(header.lower())
72            if h in known_headers:
73                headers.append(h)
74            else:
75                headers.append(None)
76        return headers
77
78class CSVImporter(object):
79    """
80    Abstract class to import data from a CSV file.
81
82    :param csv_file: file being imported
83    :type csv_file: a file like object
84    :param user: user who imports the file
85    :type user: :class:`~django.contrib.auth.models.User`
86    :param encoding: encoding of the file (`utf-8`, `ascii`, etc.)
87   
88    For "end users", this class has two useful methods:
89       
90        * :meth:`get_preview` to generate a :class:`Preview` of the file
91        * :meth:`import_csv` to import the csv file
92   
93    An implementation must overwrite the methods :meth:`get_headers_set` and
94    :meth:`parse_row` and redefine the attribute :attr:`REQUIRED_HEADERS`.
95    """
96
97    __metaclass__ = ABCMeta
98
99    #: Headers that must be present in the csv file
100    REQUIRED_HEADERS = ()
101
102    def __init__(self, csv_file, user, encoding="utf-8"):
103        self.csv_file = csv_file
104        self.user = user
105        self.encoding = encoding
106
107    @classmethod
108    @abstractmethod
109    def get_headers_set(cls):
110        """
111        Returns a set of all possible headers.
112
113        .. note::
114
115            This method is abstract and must be implemented.
116        """
117        return set()
118
119    @classmethod
120    def get_headers(cls):
121        """
122        Returns a sorted list of all possible headers.
123        """
124        headers = [None]
125        headers.extend(sorted(cls.get_headers_set()))
126        return headers
127
128    @classmethod
129    def get_missing_headers_msg(cls):
130        """
131        Returns a message explaining which headers are required.
132        """
133        headers = ", ".join(cls.REQUIRED_HEADERS)
134        return u"Missing headers: %s are required." % headers
135
136    def get_preview(self):
137        """
138        Returns a :class:`Preview` of the csv file.
139        """
140        self.csv_file.seek(0)
141        return Preview(self.csv_file, self.encoding, self.get_headers_set())
142
143    @transaction.commit_on_success
144    def __do_import_csv(self, headers):
145        self.csv_file.seek(0)
146        reader = UnicodeReader(self.csv_file, encoding=self.encoding)
147        self.headers_dict = dict((h, i) for i, h in enumerate(headers))
148        # checks that required columns are presents
149        for field in self.REQUIRED_HEADERS:
150            if field not in self.headers_dict:
151                raise CSVImportError({1: self.get_missing_headers_msg()})
152        # read the header
153        reader.next()
154        self._errors = defaultdict(ErrorList)
155        self.objects = []
156        # parse each row
157        for line, row in enumerate(reader):
158            try:
159                self.parse_row(line + 2, row)
160            except Exception, e:
161                self.store_errors(line + 2, e)
162        if self._errors:
163            raise CSVImportError(self._errors)
164
165    def import_csv(self, headers):
166        """
167        Imports the csv file. *headers* is the list of headers as given by the
168        user. Columns whose header is `None` are ignored.
169        *headers* must contains all values of :attr:`REQUIRED_HEADERS`.
170
171        If one or several errors occur (missing headers, row which can not be
172        parsed), a :exc:`CSVImportError` is raised with all detected errors.
173
174        :return: A list of :class:`.PLMObjectController` of all created objects.
175        """
176        # puts all stuff in a private method so we call tear_down only after
177        # after a database commit
178        self.__do_import_csv(headers)
179        self.tear_down()
180        return self.objects
181
182    def tear_down(self):
183        """
184        Method called once *all* rows have been successfully parsed.
185
186        By default, this method sends all blocked mails.
187        """
188        for obj in self.objects:
189            obj.unblock_mails()
190
191    def store_errors(self, line, *errors):
192        """
193        Appends *errors* to the list of errors which occurs at the line *line*.
194        """
195        for e in errors:
196            if isinstance(e, Exception):
197                e = unicode(e)
198            self._errors[line].append(e)
199   
200    def get_value(self, row, header):
201        return row[self.headers_dict[header]]
202
203    def get_values(self, row, *headers):
204        return [self.get_value(row, h) for h in headers]
205
206    @abstractmethod
207    def parse_row(self, line, row):
208        """
209        Method called by :meth:`import_csv` for each row.
210
211        :param line: line number of current row, useful to store a list of
212                     errors
213        :type line: int
214        :param row: row being parsed.
215        :type row: list of unicode strings.
216
217        This method must be overwritten. Implementation can use the methods
218        :meth:`get_value`, :meth:`get_values`, and :meth:`store_errors` to
219        retrieve values and store detected errors.
220
221        .. warning::
222
223            All :class:`.Controller` created should not send emails since an
224            error may occur and thus, all modifications would be cancelled.
225            To block mails, call :meth:`.Controller.block_mails`. You can
226            released all blocked mails by appending the controller to
227            :attr:`objects`. :meth:`import_csv` will send mails if no errors
228            occurred.
229
230            Example::
231
232                ctrl = get_obj(type, reference, revision, user)
233                ctrl.block_mails()
234                ...
235                if ok:
236                    self.objects.append(ctrl)
237        """
238        pass
239
240class PLMObjectsImporter(CSVImporter):
241    """
242    An :class:`CSVImporter` that creates :class:`PLMObject` from
243    a csv file.
244
245    The CSV must contain the following columns:
246
247        * type
248        * reference
249        * revision
250        * name
251        * group (name of the group, not its id)
252        * lifecycle (name of the lifecycle, not its id)
253
254    Moreover, it must have a column for each required field of defined types.
255    """
256
257    #: Headers that must be present in the csv file
258    REQUIRED_HEADERS = ("type", "reference", "revision", "name", "group", "lifecycle")
259
260    @classmethod
261    def get_headers_set(cls):
262        """
263        Returns a set of all possible headers.
264        """
265        return set().union(*(cls.get_creation_fields()
266            for cls in models.get_all_plmobjects().itervalues()))
267
268    def tear_down(self):
269        super(PLMObjectsImporter, self).tear_down()
270        instances = []
271        for obj in self.objects:
272            instance = obj.object
273            instances.append((instance._meta.app_label,
274                    instance._meta.module_name, instance._get_pk_val()))
275        update_indexes.delay(instances)
276
277    def parse_row(self, line, row):
278        """
279        Method called by :meth:`import_csv` for each row.
280        """
281        from openPLM.plmapp.forms import get_creation_form
282        type_, reference, revision = self.get_values(row, "type", "reference",
283            "revision")
284        cls = models.get_all_plmobjects()[type_]
285        group = models.GroupInfo.objects.get(name=self.get_value(row, "group"))
286        lifecycle = models.Lifecycle.objects.get(name=self.get_value(row, "lifecycle"))
287        form = get_creation_form(self.user, cls)
288        data = {
289                "type" : type_,
290                "group" : str(group.id),
291                "reference" : reference,
292                "revision" : revision,
293                }
294        for field in form.fields:
295            if field not in data and field in self.headers_dict:
296                data[field] = self.get_value(row, field)
297        form = get_creation_form(self.user, cls, data)
298        if not form.is_valid():
299            items = (mark_safe(u"%s: %s" % item) for item
300                    in form.errors.iteritems())
301            self.store_errors(line, *items)
302        else:
303            obj = PLMObjectController.create_from_form(form, self.user, True, True)
304            self.objects.append(obj)
305
306
307class BOMImporter(CSVImporter):
308    """
309    A :class:`CSVImporter` that builds a bom from a CSV file.
310
311    The CSV must contain the following columns:
312
313        * parent-type
314        * parent-reference
315        * parent-revision
316        * child-type
317        * child-reference
318        * child-revision
319        * quantity
320        * order
321    """
322
323    REQUIRED_HEADERS = ("parent-type", "parent-reference", "parent-revision",
324                        "child-type", "child-reference", "child-revision",
325                        "quantity", "order")
326
327    HEADERS_SET = set(REQUIRED_HEADERS)
328
329    @classmethod
330    def get_headers_set(cls):
331        return cls.HEADERS_SET
332   
333    def parse_row(self, line, row):
334        from openPLM.plmapp.base_views import get_obj
335        ptype, preference, prevision = self.get_values(row,
336                *["parent-" + h for h in ("type", "reference", "revision")])
337        parent = get_obj(ptype, preference, prevision, self.user)
338
339        ctype, creference, crevision = self.get_values(row,
340                *["child-" + h for h in ("type", "reference", "revision")])
341        child = get_obj(ctype, creference, crevision, self.user)
342
343        parent.block_mails()
344        child.block_mails()
345        self.objects.append(parent)
346        self.objects.append(child)
347
348        qty = self.get_value(row, "quantity").replace(",", ".").replace(" ", "")
349        quantity = float(qty)
350        order = int(self.get_value(row, "order").replace(" ", ""))
351
352        parent.add_child(child, quantity, order)
353   
354#: Dictionary (name -> CSVImporter's subclass) of known :class:`CSVImporter`
355IMPORTERS = {"csv" : PLMObjectsImporter, "bom" : BOMImporter }
356
Note: See TracBrowser for help on using the repository browser.