import os
import shutil
import tempfile
from zipfile import ZipFile
from LOGS import LOGS
from LOGS.Auxiliary import LOGSException
from LOGS.Entities import (
Dataset,
DatasetRequestParameter,
DocumentRequestParameter,
EntitiesRequestParameter,
ExperimentRequestParameter,
InstrumentRequestParameter,
MethodRequestParameter,
PersonRequestParameter,
ProjectRequestParameter,
SampleRequestParameter,
)
def unique(l):
return list(set(l))
class EntityCollection:
"""Helper class for CopyProject. Contains a collection of all LOGS entities types."""
def __init__(self):
self.origins = {}
self.projects = {}
self.samples = {}
self.documents = {}
self.datasets = {}
self.instruments = {}
self.methods = {}
self.persons = {}
self.experiments = {}
def printSummary(self):
"""Print the counts of each LOGS entity type."""
print("Found %d project(s) to copy." % len(self.projects.keys()))
print("Found %d sample(s) to copy." % len(self.samples.keys()))
# print("Found %d document(s) to copy." % len(self.documents.keys()))
print("Found %d experiment(s) to copy." % len(self.experiments.keys()))
print("Found %d dataset(s) to copy." % len(self.datasets.keys()))
print("Found %d instrument(s) to copy." % len(self.instruments.keys()))
print("Found %d method(s) to copy." % len(self.methods.keys()))
print("Found %d person(s) to copy." % len(self.persons.keys()))
class CopyProject:
"""Copies a project from a source LOGS to of a target LOGS."""
def __init__(self, sourceLogs: LOGS, targetLogs: LOGS):
"""Constructor of CopyProject
Args:
sourceLogs: The LOGS object connected to gather project data from
targetLogs: The LOGS object connected to the copy the project contents to
"""
self.sourceLogs = sourceLogs
self.targetLogs = targetLogs
self.unknownCount = 0
def collectEntitiesFromProjects(self, projectNames=[], projectTags=[]):
"""Collects entities based on specified projects
Args:
projectNames: Name list for project selection to collect entities from. Defaults to [].
projectTags: Tag list for project selection to collect entities from. Defaults to [].
Raises:
Exception: Throws if source projects are not
Returns:
A collection of entities from the source projects.
"""
request1 = self.sourceLogs.projects(ProjectRequestParameter(names=projectNames))
request2 = self.sourceLogs.projects(
ProjectRequestParameter(projectTags=projectTags)
)
print(
"+++ Collecting %d projects from source +++"
% (request1.count + request2.count)
)
projectList = list(request1)
projectList.extend(list(request2))
projects = {project.name: project for project in projectList}
for name in projectNames:
if name not in projects:
raise Exception("Source project %a not found." % name)
origin = self.sourceLogs.instanceOrigin
projects = {project.id: project for project in projectList}
# Samples from source
request3 = self.sourceLogs.samples(
SampleRequestParameter(projectIds=list(projects.keys()))
)
print("+++ Collecting %d samples from source +++" % request3.count)
samples = {sample.id: sample for sample in request3}
# Documents from source
request4 = self.sourceLogs.documents(
DocumentRequestParameter(projectIds=list(projects.keys()))
)
print("+++ Collecting %d documents from source +++" % request4.count)
documents = {dataset.id: dataset for dataset in request4}
# Datasets from source
request5 = self.sourceLogs.datasets(
DatasetRequestParameter(projectIds=list(projects.keys()))
)
print("+++ Collecting %d datasets from source +++" % request5.count)
datasets = {dataset.id: dataset for dataset in request5}
## Uncomment the following blocks if you want to also copy all related datasets
# datasets.update(
# {
# dataset.id: dataset
# for dataset in logs.datasets(
# DatasetRequestParameter(sampleIds=list(samples.keys()))
# )
# }
# )
# datasetIds = [
# dataset.id
# for document in documents.values()
# if document.datasets
# for dataset in document.datasets
# ]
# datasets.update(
# {
# dataset.id: dataset
# for dataset in logs.datasets(DatasetRequestParameter(ids=datasetIds))
# }
# )
print("+++ Collecting related entities +++")
sampleIds = [
dataset.sample.id for dataset in datasets.values() if dataset.sample
]
samples.update(
{
sample.id: sample
for sample in self.sourceLogs.samples(
SampleRequestParameter(ids=sampleIds)
)
}
)
# Instruments from source
instrumentIds = [
dataset.instrumentId
for dataset in datasets.values()
if dataset.instrumentId
]
instrumentIds = unique(instrumentIds)
instruments = {
instrument.id: instrument
for instrument in self.sourceLogs.instruments(
InstrumentRequestParameter(ids=instrumentIds)
)
}
# Experiments from source
experimentIds = [
dataset.experiment.id for dataset in datasets.values() if dataset.experiment
]
experimentIds = unique(experimentIds)
experiments = {
experiment.id: experiment
for experiment in self.sourceLogs.experiments(
ExperimentRequestParameter(ids=experimentIds)
)
}
# Methods from source
methodIds = [
dataset.methodId for dataset in datasets.values() if dataset.methodId
]
methodIds.extend(
instrument.methodId
for instrument in instruments.values()
if instrument.methodId
)
methodIds.extend(
experiment.method.id
for experiment in experiments.values()
if experiment.method
)
methodIds = unique(methodIds)
methods = {
method.id: method
for method in self.sourceLogs.methods(MethodRequestParameter(ids=methodIds))
}
# Persons from other entities
personIds = list(sample.owner.id for sample in samples.values() if sample.owner)
personIds.extend(
person.id
for sample in samples.values()
if sample.preparedBy
for person in sample.preparedBy
)
personIds.extend(
dataset.owner.id for dataset in datasets.values() if dataset.owner
)
personIds.extend(
id
for dataset in datasets.values()
if dataset.operatorIds
for id in dataset.operatorIds
)
personIds = unique(personIds)
persons = {
person.id: person
for person in self.sourceLogs.persons(PersonRequestParameter(ids=personIds))
}
collection = EntityCollection()
collection.origins = {origin.id: origin}
collection.projects = projects
collection.samples = samples
collection.documents = documents
collection.datasets = datasets
collection.instruments = instruments
collection.methods = methods
collection.persons = persons
collection.experiments = experiments
return collection
@classmethod
def _removeKnownByUid(cls, logs: LOGS, entities, targetMapping):
mapper = {str(v.uid): k for k, v in entities.items()}
for targetEntity in logs.entities(
EntitiesRequestParameter(uids=list(mapper.keys()))
):
knownEntity = entities[mapper[str(targetEntity.uid)]]
print(
"%s with name %a already exists and will be skipped."
% (type(knownEntity).__name__, knownEntity.name)
)
targetMapping[targetEntity.id] = targetEntity
del entities[mapper[str(targetEntity.uid)]]
@classmethod
def _removeKnownByName(cls, logs: LOGS, entities, targetMapping):
mapper = {v.name: k for k, v in entities.items()}
# print(
# "mapper",
# list(mapper.keys()),
# list(logs.entities(EntitiesRequestParameter(names=list(mapper.keys())))),
# )
for targetEntity in logs.entities(
EntitiesRequestParameter(names=list(mapper.keys()))
):
if not targetEntity.name or targetEntity.name not in mapper:
continue
knownEntity = entities[mapper[targetEntity.name]]
if (
type(targetEntity).__name__.replace("Minimal", "")
!= type(knownEntity).__name__
):
continue
print(
"%s with name %a already exists and will be skipped."
% (type(knownEntity).__name__, knownEntity.name)
)
targetMapping[knownEntity.id] = targetEntity
del entities[mapper[str(targetEntity.name)]]
del mapper[str(targetEntity.name)]
cls._removeKnownByUid(
logs=logs, entities=entities, targetMapping=targetMapping
)
@classmethod
def _applyMapping(cls, collection, nameMapping):
print(list(nameMapping.keys()))
hasMapping = False
if "instrument" in nameMapping:
hasMapping = True
print("+++ Mapping for instruments found +++")
nameMap = (
{m["original"]: m["mapped"] for m in nameMapping["instrument"]}
if "instrument" in nameMapping
else {}
)
# print([i.name for i in collection.instruments.values()])
for instrument in collection.instruments.values():
if instrument.name in nameMap:
print(
"Renaming %a to %a"
% (instrument.name, nameMap[instrument.name])
)
instrument.name = nameMap[instrument.name]
# for i in collection.instruments.values():
# i.printJson()
# print([i.name for i in collection.instruments.values()])
if "projectNamePrefix" in nameMapping:
hasMapping = True
print(
"+++ Project name prefix %a found +++"
% nameMapping["projectNamePrefix"]
)
for project in collection.projects.values():
project.name = nameMapping["projectNamePrefix"] + (project.name or "")
if not hasMapping:
raise Exception("Mapping does not contain any entity name map.")
@classmethod
def _filterKnown(cls, logs, collection, targetMapping):
cls._removeKnownByUid(logs, collection.origins, targetMapping.origins)
cls._removeKnownByUid(logs, collection.samples, targetMapping.samples)
cls._removeKnownByUid(logs, collection.datasets, targetMapping.datasets)
cls._removeKnownByUid(logs, collection.projects, targetMapping.projects)
cls._removeKnownByName(logs, collection.instruments, targetMapping.instruments)
cls._removeKnownByName(logs, collection.methods, targetMapping.methods)
cls._removeKnownByName(logs, collection.experiments, targetMapping.experiments)
# documents TODO: skip documents for now
persons = {}
for knownPerson in collection.persons.values():
l = list(
logs.persons(
PersonRequestParameter(
firstNames=[knownPerson.firstName],
lastNames=[knownPerson.lastName],
)
)
)
if l:
targetPerson = l[0]
print(
"Person with name %a already exists and will be skipped."
% knownPerson.name
)
targetMapping.persons[knownPerson.id] = targetPerson
else:
persons[knownPerson.id] = knownPerson
collection.persons = persons
@classmethod
def _mapEntities(cls, orig, mapping):
if orig == None:
return None
single = True
if isinstance(orig, list):
single = False
else:
orig = [orig]
mapped = [mapping[o.id] for o in orig if o.id in mapping]
if single:
if mapped:
return mapped[0]
else:
return None
return mapped
@classmethod
def _mapOwner(cls, entities, mapping):
for e in entities:
if hasattr(e, "owner"):
setattr(e, "owner", cls._mapEntities(getattr(e, "owner"), mapping))
def _createEntities(self, collection, targetMapping):
temporaryDirectory = tempfile.TemporaryDirectory()
tempdir = temporaryDirectory.name
if not os.path.exists(tempdir):
raise Exception("Temp path %a does not exist" % tempdir)
if not os.path.isdir(tempdir):
raise Exception("Temp path %a is not a directory" % tempdir)
numberSum = 0
number = len(collection.origins.keys())
numberSum += number
count = 0
for id, origin in collection.origins.items():
count += 1
p = 100.0 * count / number
print("%.1f%% Creating" % p, origin, origin.url)
targetMapping.origins[id] = origin
self.targetLogs.create(origin)
# We have only one origin for now so we use the first values from the dict
origin = list(targetMapping.origins.values())[0]
number = len(collection.persons.keys())
numberSum += number
count = 0
for id, person in collection.persons.items():
if not person.salutation:
person.salutation = "Mrs."
count += 1
p = 100.0 * count / number
print("%.1f%% Creating" % p, person)
person.setOrigin(uid=person.uid, origin=origin)
targetMapping.persons[id] = person
self.targetLogs.create(person)
self._mapOwner(collection.projects.values(), targetMapping.persons)
self._mapOwner(collection.samples.values(), targetMapping.persons)
self._mapOwner(collection.documents.values(), targetMapping.persons)
self._mapOwner(collection.datasets.values(), targetMapping.persons)
self._mapOwner(collection.instruments.values(), targetMapping.persons)
self._mapOwner(collection.methods.values(), targetMapping.persons)
self._mapOwner(collection.persons.values(), targetMapping.persons)
self._mapOwner(collection.experiments.values(), targetMapping.persons)
number = len(collection.projects.keys())
numberSum += number
count = 0
for id, project in collection.projects.items():
count += 1
p = 100.0 * count / number
print("%.1f%% Creating" % p, project)
project.setOrigin(uid=project.uid, origin=origin)
if project.userPermissions:
userPermissions = []
for perm in project.userPermissions:
if perm.person:
perm.person = self._mapEntities(
perm.person, targetMapping.persons
)
if perm.person:
userPermissions.append(perm)
project.userPermissions = userPermissions
targetMapping.projects[id] = project
self.targetLogs.create(project)
number = len(collection.projects.keys())
numberSum += number
count = 0
for id, method in collection.methods.items():
count += 1
p = 100.0 * count / number
print("%.1f%% Creating" % p, method)
method.setOrigin(uid=method.uid, origin=origin)
targetMapping.methods[id] = method
self.targetLogs.create(method)
number = len(collection.instruments.keys())
numberSum += number
count = 0
for id, instrument in collection.instruments.items():
count += 1
p = 100.0 * count / number
print("%.1f%% Creating" % p, instrument)
instrument.method = self._mapEntities(
instrument.method, targetMapping.methods
)
instrument.setOrigin(uid=instrument.uid, origin=origin)
targetMapping.instruments[id] = instrument
self.targetLogs.create(instrument)
number = len(collection.experiments.keys())
numberSum += number
count = 0
for id, experiment in collection.experiments.items():
count += 1
p = 100.0 * count / number
print("%.1f%% Creating" % p, experiment)
experiment.method = self._mapEntities(
experiment.method, targetMapping.methods
)
if not experiment.name:
self.unknownCount += 1
experiment.name = "Unknown %d" % self.unknownCount
experiment.setOrigin(uid=experiment.uid, origin=origin)
targetMapping.experiments[id] = experiment
self.targetLogs.create(experiment)
number = len(collection.samples.keys())
numberSum += number
count = 0
for id, sample in collection.samples.items():
count += 1
p = 100.0 * count / number
print("%.1f%% Creating" % p, sample)
sample.preparedBy = self._mapEntities(
sample.preparedBy, targetMapping.persons
)
sample.projects = self._mapEntities(sample.projects, targetMapping.projects)
if not sample.projects:
sample.projects = list(targetMapping.projects.values())[:1]
sample.customFields = None
if sample.tags:
if not sample.notes:
sample.notes = ""
else:
sample.notes += "\n"
sample.notes += "Tags from origin:\n" + "\n".join(
[
"%s: %s" % (k, v.replace(":", "_"))
for k, v in sample.tags.items()
]
)
sample.setOrigin(uid=sample.uid, origin=origin)
targetMapping.samples[id] = sample
self.targetLogs.create(sample)
number = len(collection.datasets.keys())
numberSum += number
count = 0
for id, dataset in collection.datasets.items():
count += 1
p = 100.0 * count / number
print("%.1f%% Creating" % p, dataset)
zipName = "dataset_%s" % id
zipFileName = zipName + ".zip"
zipDir = os.path.join(tempdir, zipName)
zipFile = os.path.join(tempdir, zipFileName)
if os.path.exists(zipFile):
os.remove(zipFile)
dataset.download(directory=tempdir, fileName=zipFileName, overwrite=True)
if not os.path.exists(zipDir):
with ZipFile(zipFile, "r") as zObject:
os.mkdir(zipDir)
zObject.extractall(path=zipDir)
uid = dataset.uid
dataset = Dataset(dataset, files=zipDir)
dataset.instrument = self._mapEntities(
dataset.instrument, targetMapping.instruments
)
dataset.method = self._mapEntities(dataset.method, targetMapping.methods)
dataset.operators = self._mapEntities(
dataset.operators, targetMapping.persons
)
dataset.projects = self._mapEntities(
dataset.projects, targetMapping.projects
)
dataset.sample = self._mapEntities(dataset.sample, targetMapping.samples)
dataset.experiment = self._mapEntities(
dataset.experiment, targetMapping.experiments
)
if not (dataset.projects and dataset.sample and dataset.operators):
dataset.claimed = False
dataset.setOrigin(uid=uid, origin=origin)
try:
targetMapping.datasets[id] = dataset
self.targetLogs.create(dataset)
except LOGSException as e:
print(
" Dataset %a upload failed: '" % dataset.toString() + str(e) + "'"
)
os.remove(zipFile)
shutil.rmtree(zipDir)
temporaryDirectory.cleanup()
print("+++ Created %d entities on target +++" % numberSum)
def copy(
self, projectNames=[], projectTags=[], nameMapping=None, projectNamePrefix=None
):
if not nameMapping:
print("+++ No name mapping. +++")
if projectNamePrefix:
if not nameMapping:
nameMapping = {}
nameMapping["projectNamePrefix"] = projectNamePrefix
collection = self.collectEntitiesFromProjects(
projectNames=projectNames, projectTags=projectTags
)
collection.printSummary()
if nameMapping is not None:
self._applyMapping(collection, nameMapping)
targetMapping = EntityCollection()
self._filterKnown(self.targetLogs, collection, targetMapping)
print()
print(
"+++ Final number of entities to copy (after removing duplicates and known). +++"
)
collection.printSummary()
print()
self._createEntities(collection, targetMapping)
if __name__ == "__main__":
source_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
source_url = "https://source.logs.com/sorceGroup"
target_key = "YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY"
target_url = "https://target.logs.com/targetGroup"
# create two logs instances (source and target LOGS)
copy = CopyProject(
LOGS(source_url, source_key, verbose=False),
LOGS(target_url, target_key, verbose=False),
)
# You can also introduce some name mapping for the instrument name
# nameMapping = {
# "instrument": [
# {"original": "OriginalInstrumentName", "mapped": "TargetInstrumentName"}
# ]
# }
# copy.copy(
# projectNames=["Project to Copy 1", "Project to Copy 2"], nameMapping=nameMapping
# )
copy.copy(projectNames=["Project to Copy 1", "Project to Copy 2"])