Source code for process.processors.loader

import argparse
import copy
import logging
import os

from django.utils.translation import gettext as t

from process.exceptions import InvalidFormError
from process.forms import CollectionFileForm, CollectionForm, CollectionNote, CollectionNoteForm
from process.models import Collection, CollectionFile, ProcessingStep
from process.util import create_step

logger = logging.getLogger(__name__)


[docs] def file_or_directory(path): """Check whether the path exists. Raise an exception if not.""" if not os.path.exists(path): raise argparse.ArgumentTypeError(t("No such file or directory %(path)r") % {"path": path}) return path
[docs] def create_collection_file(collection, filename=None, url=None) -> CollectionFile: """ Create file for a collection and steps for this file. :param Collection collection: collection :param str filename: path to file data :returns: created collection file :raises InvalidFormError: if there is a validation error """ form = CollectionFileForm({"collection": collection, "filename": filename, "url": url}) if form.is_valid(): collection_file = form.save() create_step(ProcessingStep.Name.LOAD, collection.pk, collection_file=collection_file) return collection_file raise InvalidFormError(form)
[docs] def create_collections( # Identification source_id, data_version, *, sample=False, # Steps upgrade=False, compile=False, # noqa: A002 # consistency check=False, # Other scrapyd_job="", note="", force=False, ) -> tuple[Collection, Collection, Collection]: """ Create the root collection, derived collections and notes. :param str source_id: collection source :param str data_version: data version in ISO format :param boolean sample: is this sample only :param boolean upgrade: whether to plan collection upgrade :param boolean compile: whether to plan collection compile :param boolean check: whether to plan schema-based checks :param str scrapyd_job: Scrapyd job ID :param str note: text description :param boolean force: skip validation of the source_id against the Scrapyd project :returns: the root collection, upgraded collection and compiled_collection """ data = { "source_id": source_id, "data_version": data_version, "sample": sample, "scrapyd_job": scrapyd_job, "force": force, } steps = [] if check: steps.append("check") if upgrade: steps.append("upgrade") elif compile: steps.append("compile") collection = _create_collection(data, note, steps=steps) upgraded_collection = None if upgrade: # main -> upgrade -> compile / main -> upgrade upgrade_steps = ["compile"] if compile else [] upgraded_collection = _create_collection( data, note, steps=upgrade_steps, parent=collection, transform_type=Collection.Transform.UPGRADE_10_11 ) compiled_collection = None if compile: # main -> upgrade -> compile / main -> compile base_collection = upgraded_collection if upgrade else collection compiled_collection = _create_collection( data, note, parent=base_collection, transform_type=Collection.Transform.COMPILE_RELEASES ) return collection, upgraded_collection, compiled_collection
def _create_collection(data, note, **kwargs): collection_data = copy.deepcopy(data) collection_data.update(kwargs) # If steps is empty, Django attempts to save it as NULL, but the column has a NOT NULL constraint. if "steps" in collection_data and not collection_data["steps"]: collection_data.pop("steps") form = CollectionForm(collection_data) if form.is_valid(): collection = form.save() if note: _save_note(collection, note) return collection raise InvalidFormError(form) def _save_note(collection, note): form = CollectionNoteForm({"collection": collection, "note": note, "code": CollectionNote.Level.INFO}) if form.is_valid(): return form.save() raise InvalidFormError(form)