Source code for ta2.utils

# -*- coding: utf-8 -*-

import json
import logging
import os
from collections import defaultdict

import numpy as np

LOGGER = logging.getLogger(__name__)


[docs]def dump_pipeline(pipeline, dump_dir, rank=None): if not isinstance(pipeline, dict): pipeline = pipeline.to_json_structure() if 'session' in pipeline: pipeline = pipeline.copy() del pipeline['session'] pipeline_filename = pipeline['id'] + '.json' pipeline_path = os.path.join(dump_dir, pipeline_filename) with open(pipeline_path, 'w') as pipeline_file: json.dump(pipeline, pipeline_file, indent=4) if rank is not None: rank_filename = pipeline['id'] + '.rank' rank_path = os.path.join(dump_dir, rank_filename) with open(rank_path, 'w') as rank_file: print(rank, file=rank_file)
[docs]def logging_setup(verbosity=1, logfile=None, logger_name=None, stdout=True): logger = logging.getLogger(logger_name) log_level = (3 - verbosity) * 10 fmt = '%(asctime)s - %(process)d - %(levelname)s - %(name)s - %(module)s - %(message)s' formatter = logging.Formatter(fmt) logger.setLevel(log_level) logger.propagate = False if logfile: file_handler = logging.FileHandler(logfile) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(formatter) logger.addHandler(file_handler) if stdout or not logfile: console_handler = logging.StreamHandler() console_handler.setLevel(log_level) console_handler.setFormatter(formatter) logger.addHandler(console_handler)
[docs]def detect_data_modality(dataset): dataset_doc_path = dataset.metadata.query(())['location_uris'][0] with open(dataset_doc_path[7:]) as f: dataset_doc = json.load(f) resources = list() for resource in dataset_doc['dataResources']: resources.append(resource['resType']) if len(resources) == 1: return 'single_table' else: for resource in resources: if resource == 'edgeList': return 'graph' elif resource not in ('table', 'raw'): return resource return 'multi_table'
[docs]def get_dataset_details(dataset, problem): data_modality = detect_data_modality(dataset) task_type = problem['problem']['task_keywords'][0].name.lower() task_subtype = problem['problem']['task_keywords'][1].name.lower() return data_modality, task_type, task_subtype
[docs]def to_dicts(hyperparameters): params_tree = defaultdict(dict) for (block, hyperparameter), value in hyperparameters.items(): if isinstance(value, np.integer): value = int(value) elif isinstance(value, np.floating): value = float(value) elif isinstance(value, np.ndarray): value = value.tolist() elif isinstance(value, np.bool_): value = bool(value) elif value == 'None': value = None params_tree[block][hyperparameter] = value return params_tree