# -*- coding: utf-8 -*-
import json
import logging
import os
from collections import defaultdict
import numpy as np
LOGGER = logging.getLogger(__name__)
[docs]def dump_pipeline(pipeline, dump_dir, rank=None):
if not isinstance(pipeline, dict):
pipeline = pipeline.to_json_structure()
if 'session' in pipeline:
pipeline = pipeline.copy()
del pipeline['session']
pipeline_filename = pipeline['id'] + '.json'
pipeline_path = os.path.join(dump_dir, pipeline_filename)
with open(pipeline_path, 'w') as pipeline_file:
json.dump(pipeline, pipeline_file, indent=4)
if rank is not None:
rank_filename = pipeline['id'] + '.rank'
rank_path = os.path.join(dump_dir, rank_filename)
with open(rank_path, 'w') as rank_file:
print(rank, file=rank_file)
[docs]def logging_setup(verbosity=1, logfile=None, logger_name=None, stdout=True):
logger = logging.getLogger(logger_name)
log_level = (3 - verbosity) * 10
fmt = '%(asctime)s - %(process)d - %(levelname)s - %(name)s - %(module)s - %(message)s'
formatter = logging.Formatter(fmt)
logger.setLevel(log_level)
logger.propagate = False
if logfile:
file_handler = logging.FileHandler(logfile)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
if stdout or not logfile:
console_handler = logging.StreamHandler()
console_handler.setLevel(log_level)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
[docs]def detect_data_modality(dataset):
dataset_doc_path = dataset.metadata.query(())['location_uris'][0]
with open(dataset_doc_path[7:]) as f:
dataset_doc = json.load(f)
resources = list()
for resource in dataset_doc['dataResources']:
resources.append(resource['resType'])
if len(resources) == 1:
return 'single_table'
else:
for resource in resources:
if resource == 'edgeList':
return 'graph'
elif resource not in ('table', 'raw'):
return resource
return 'multi_table'
[docs]def get_dataset_details(dataset, problem):
data_modality = detect_data_modality(dataset)
task_type = problem['problem']['task_keywords'][0].name.lower()
task_subtype = problem['problem']['task_keywords'][1].name.lower()
return data_modality, task_type, task_subtype
[docs]def to_dicts(hyperparameters):
params_tree = defaultdict(dict)
for (block, hyperparameter), value in hyperparameters.items():
if isinstance(value, np.integer):
value = int(value)
elif isinstance(value, np.floating):
value = float(value)
elif isinstance(value, np.ndarray):
value = value.tolist()
elif isinstance(value, np.bool_):
value = bool(value)
elif value == 'None':
value = None
params_tree[block][hyperparameter] = value
return params_tree