ta2.ta3.core_servicer module¶

class ta2.ta3.core_servicer.CoreServicer(input_dir, output_dir, static_dir, timeout, debug=False)[source]¶

Bases: ta3ta2_api.core_pb2_grpc.CoreServicer

DB = {}¶

DescribeSolution(request, context)[source]¶

EndSearchSolutions(request, context)[source]¶

FitSolution(request, context)[source]¶

GetFitSolutionResults(request, context)[source]¶

GetProduceSolutionResults(request, context)[source]¶

GetScoreSolutionResults(request, context)[source]¶

GetSearchSolutionsResults(request, context)[source]¶

Hello(request, context)[source]¶

ListPrimitives(request, context)[source]¶

ProduceSolution(request, context)[source]¶

ScoreSolution(request, context)[source]¶

SearchSolutions(request, context)[source]¶

SolutionExport(request, context)[source]¶

StopSearchSolutions(request, context)[source]¶

UpdateProblem(request, context)[source]¶

ta2.ta3.core_servicer.VERSION = '2020.2.11'¶

enum EvaluationMethod {

// Default value. Not to be used. EVALUATION_METHOD_UNDEFINED = 0;

// The following are the only evaluation methods required // to be supported for the “ScoreSolution” call. HOLDOUT = 1; K_FOLD = 2;

// The rest are defined to allow expressing internal evaluation // methods used by TA2 during solution search. If any method being used // is missing, feel free to request it to be added. LEAVE_ONE_OUT = 100; // Instead of really scoring, a TA2 might predict the score only. PREDICTION = 101; // Training data is reused to test as well. TRAINING_DATA = 102;

}

message ScoringConfiguration {: // The evaluation method to use. EvaluationMethod method = 1; // Number of folds made, if applicable. int32 folds = 2; // Ratio of train set vs. test set, if applicable. double train_test_ratio = 3; // Shuffle data? Set to true if employed. bool shuffle = 4; // Value for random seed to use for shuffling. Optional. int32 random_seed = 5; // Do stratified k-fold? Set to true if employed. bool stratified = 6;

}

message Score {: ProblemPerformanceMetric metric = 1; // When doing multiple folds, which fold is this score associated with, 0-based. // We do not aggregate scores across folds on the TA2 side, but expose everything to the TA3. // If scoring was not done as part of the cross-validation, then it can be returned // as the first and only fold, in which case the value of this field should be 0. int32 fold = 2; // To which target or targets does this score apply? repeated ProblemTarget targets = 3; Value value = 4;

}

enum ProgressState {

// Default value. Not to be used. PROGRESS_UNKNOWN = 0;

// The process has been scheduled but is pending execution. PENDING = 1; // The process is currently running. There can be multiple messages with this state // (while the process is running). RUNNING = 2; // The process completed and final results are available. COMPLETED = 3; // The process failed. ERRORED = 4;

}

// After “state” becomes “COMPLETED” or “ERRORED” stream closes. // The granularity of progress updates is not specified by the API at this time. Some systems // might be updating frequently and provide many updates of the progress of a whole process // as well as individual pipeline steps. Some systems might just report these high-level // progress states once, not doing any progress updates in the meantime. The “status” field // should contain information to supplement the progress state, such as specific failure details // in the case of an “ERRORED” state being returned. message Progress {

ProgressState state = 1; string status = 2; // Set only after state becomes “RUNNING”. If it never really properly runs, but errors // when attempted to run, then it should be the timestamp of the error. google.protobuf.Timestamp start = 3; // Set only when state is “COMPLETED” or “ERRORED”. google.protobuf.Timestamp end = 4;

}

// Description of a TA2 score done during solution search. Because there is a wide range of // potential approaches a TA2 can use to score candidate solutions this might not capture what // your TA2 is doing. Feel free to request additions to be able to describe your approach. message SolutionSearchScore {

ScoringConfiguration scoring_configuration = 1; repeated Score scores = 2;

}

message PrimitiveStepDescription {: // Selected value for free pipeline hyper-parameters. map<string, Value> hyperparams = 1;

}

message SubpipelineStepDescription {: // Each step in a sub-pipeline has a description. These are reported in the order of steps // in the sub-pipeline. repeated StepDescription steps = 1;

}

message StepDescription {

oneof step {: PrimitiveStepDescription primitive = 1; SubpipelineStepDescription pipeline = 2;

}

message StepProgress {: Progress progress = 1; // If step is a sub-pipeline, then this list contains progress for each step in the // sub-pipeline, in order. // List can be incomplete while the process is in progress. Systems can provide // steps only at the end (when “progress” equals COMPLETED) and not during running. repeated StepProgress steps = 2;

}

// User associated with the run of the solution. message SolutionRunUser {

// A UUID of the user. It does not have to map to any real ID, just that it is possible // to connect multiple solution actions by the same user together, if necessary. string id = 1; // Was this run because solution was choosen by this user. bool choosen = 2; // Textual reason provided by the user why the run was choosen by this user. string reason = 3;

}

// See each message’s comments for information about each particular call. service Core {

rpc SearchSolutions (SearchSolutionsRequest) returns (SearchSolutionsResponse) {} rpc GetSearchSolutionsResults (GetSearchSolutionsResultsRequest) returns (stream GetSearchSolutionsResultsResponse) {} rpc EndSearchSolutions (EndSearchSolutionsRequest) returns (EndSearchSolutionsResponse) {} rpc StopSearchSolutions (StopSearchSolutionsRequest) returns (StopSearchSolutionsResponse) {}

rpc DescribeSolution (DescribeSolutionRequest) returns (DescribeSolutionResponse) {}

rpc ScoreSolution (ScoreSolutionRequest) returns (ScoreSolutionResponse) {} rpc GetScoreSolutionResults (GetScoreSolutionResultsRequest) returns (stream GetScoreSolutionResultsResponse) {}

rpc FitSolution (FitSolutionRequest) returns (FitSolutionResponse) {} rpc GetFitSolutionResults (GetFitSolutionResultsRequest) returns (stream GetFitSolutionResultsResponse) {}

rpc ProduceSolution (ProduceSolutionRequest) returns (ProduceSolutionResponse) {} rpc GetProduceSolutionResults (GetProduceSolutionResultsRequest) returns (stream GetProduceSolutionResultsResponse) {}

rpc SolutionExport (SolutionExportRequest) returns (SolutionExportResponse) {}

rpc UpdateProblem (UpdateProblemRequest) returns (UpdateProblemResponse) {}

rpc ListPrimitives (ListPrimitivesRequest) returns (ListPrimitivesResponse) {}

rpc Hello (HelloRequest) returns (HelloResponse) {}

}

// All values are immutable and no files should be changed after a URI // is provided to the other system. When using shared file system, all // URIs should be absolute to the file system, for example // “file:///datasets/dataset_1/datasetDoc.json”. It is assumed that both // TA2 and TA3 systems both have a limited number of shared directories // mounted at same locations (in previous example, “/datasets” directory). // When one system creates a dataset and sends over the URI, the other can // directly access it without doing any extra work (like downloading or copying). // // Configuration of shared directories and shared instance of Plasma are not // specified by this API. // // Not all types of non-raw values is necessary to be supported/allowed. // Both systems maintain a list of allowed value types the other system accepts. // Some calls also provide a way to provide such a list. When a value is to be // provided to the other system, the list is traversed in order and the first // value type which can be used without an error is used. If the list is // exhausted, then an error is provided instead.

enum ValueType {

// Default value. Not to be used. VALUE_TYPE_UNDEFINED = 0;

// The following value types are those everyone should support.

// Raw value. Not all values can be represented as a raw value. RAW = 1; // Represent the value as a D3M dataset. Only “file://” schema is supported using a // shared file system. Dataset URI should point to the “datasetDoc.json” file of the dataset. // Only Dataset container values can be represented this way. DATASET_URI = 2; // Represent the value as a CSV file. Only “file://” schema is supported using a // shared file system. CSV URI should point to the file with “.csv” file extension. // Only tabular container values with numberic and string cell values can be represented // this way. CSV_URI = 3;

// The following are additional value types which can be supported by systems, // but it is not required. If the value cannot be represented with value types your system // supports and your system is still asked to do so, it should return “ValueError” error instead.

// Represent values by Python-pickling them. Only “file://” schema is supported using a // shared file system. Pickle URI should point to the file with “.pickle” file extension. PICKLE_URI = 4; // Represent values by Python-pickling them but sending them through the API. PICKLE_BLOB = 5; // Represent values with arrow and storing them into shared instance of Plasma. PLASMA_ID = 6;

}

message ValueError {: // A error message useful for debugging or logging. Not meant to be very end-user friendly. // If a list of supported/allowed value types could not support a given value, then message // should say so. On the other hand, if there was really an error using a value type which // would otherwise support a given value, then the error message should communicate this error. // If there was such an error but some later value type allowed for recovery, then there // should be no error. string message = 1;

}

message DoubleList {: repeated double list = 1;

}

message Int64List {: repeated int64 list = 1;

}

message BoolList {: repeated bool list = 1;

}

message StringList {: repeated string list = 1;

}

message BytesList {: repeated bytes list = 1;

}

message Value {

oneof value {: // If there was an error trying to provided the value using the requested // value type and no other value type was available to be used. ValueError error = 1; // Raw values directly provided in the message. double double = 2; int64 int64 = 3; bool bool = 4; string string = 5; bytes bytes = 6; DoubleList double_list = 7; Int64List int64_list = 8; BoolList bool_list = 9; StringList string_list = 10; BytesList bytes_list = 11; // An URI pointing to a dataset. Resulting value is Dataset container value from loading this URI. string dataset_uri = 12; // An URI pointing to a CSV file. string csv_uri = 13; // An URI to a Python-pickled value. string pickle_uri = 14; // A Python-pickled value itself. bytes pickle_blob = 15; // 20 bytes of Plasma ObjectID of the value. bytes plasma_id = 16;

}

// Top level classification of the problem. enum TaskType {

// Default value. Not to be used. TASK_TYPE_UNDEFINED = 0;

CLASSIFICATION = 1; REGRESSION = 2; CLUSTERING = 3; LINK_PREDICTION = 4; VERTEX_NOMINATION = 5; COMMUNITY_DETECTION = 6; GRAPH_CLUSTERING = 7; GRAPH_MATCHING = 8; TIME_SERIES_FORECASTING = 9; COLLABORATIVE_FILTERING = 10; OBJECT_DETECTION = 11;

}

// Secondary classification of the problem. enum TaskSubtype {

// Default value. Not to be used. TASK_SUBTYPE_UNDEFINED = 0;

// No secondary task is applicable for this problem. NONE = 1; BINARY = 2; MULTICLASS = 3; MULTILABEL = 4; UNIVARIATE = 5; MULTIVARIATE = 6; OVERLAPPING = 7; NONOVERLAPPING = 8;

}

// The evaluation metric for any potential solution. enum PerformanceMetric {

// Default value. Not to be used. METRIC_UNDEFINED = 0;

// The following are the only evaluation methods required // to be supported for the ScoreSolution call. ACCURACY = 1; PRECISION = 2; RECALL = 3; F1 = 4; F1_MICRO = 5; F1_MACRO = 6; ROC_AUC = 7; ROC_AUC_MICRO = 8; ROC_AUC_MACRO = 9; MEAN_SQUARED_ERROR = 10; ROOT_MEAN_SQUARED_ERROR = 11; ROOT_MEAN_SQUARED_ERROR_AVG = 12; MEAN_ABSOLUTE_ERROR = 13; R_SQUARED = 14; NORMALIZED_MUTUAL_INFORMATION = 15; JACCARD_SIMILARITY_SCORE = 16; PRECISION_AT_TOP_K = 17; OBJECT_DETECTION_AVERAGE_PRECISION = 18;

// The rest are defined to allow expressing internal evaluation // scores used by TA2 during pipeline search. If any you are using // is missing, feel free to request it to be added. // Average loss of an unspecified loss function. LOSS = 100;

}

message ProblemPerformanceMetric {: PerformanceMetric metric = 1; // Additional params used by some metrics. int32 k = 2; string pos_label = 3;

}

message Problem {: // ID of this problem. string id = 1; // Version of this problem. string version = 2; string name = 3; string description = 4; TaskType task_type = 5; TaskSubtype task_subtype = 6; repeated ProblemPerformanceMetric performance_metrics = 7;

}

message ProblemTarget {: int32 target_index = 1; string resource_id = 2; int32 column_index = 3; string column_name = 4; int32 clusters_number = 5;

}

message ProblemInput {: // Should match one of input datasets given to the pipeline search. // Every “Dataset” object has an “id” associated with it and is available // in its metadata. That ID is then used here to reference those inputs. string dataset_id = 1; // Targets should resolve to columns in a given dataset. repeated ProblemTarget targets = 2;

}

// Problem description matches the parsed problem description by // the d3m_metadata.problem.Problem.load Python method. // Problem outputs are not necessary for the purpose of this API // and are needed only when executing an exported pipeline, but then // TA2 gets full problem description anyway directly. message ProblemDescription {

Problem problem = 1; repeated ProblemInput inputs = 2;

}

// Pipeline description contains many “data references”. Data reference is just a string // which identifies an output of a step or a pipeline input and forms a data-flow connection // between data available and an input to a step. It is recommended to be a string of the // following forms: // // * steps.<number>.<id> — number identifies the step in the list of steps (0-based) // and id identifies the name of a produce method of the primitive, // or the output of a pipeline step // // * inputs.<number> — number identifies the pipeline input (0-based) // // * outputs.<number> — number identifies the pipeline output (0-based)

message ContainerArgument {: // Data reference. string data = 1;

}

message DataArgument {: // Data reference. string data = 1;

}

message DataArguments {: repeated string data = 1;

}

message PrimitiveArgument {: // 0-based index identifying a step of which primitive is used as a value. int32 data = 1;

}

message PrimitiveArguments {: // 0-based index identifying a step of which primitive is used as a value. repeated int32 data = 1;

}

message ValueArgument {: Value data = 1;

}

message PrimitiveStepArgument {

oneof argument {: // A container data type as an argument. ContainerArgument container = 1; // A singleton output from another step as an argument. DataArgument data = 2;

}

message PrimitiveStepHyperparameter {

oneof argument {: // A container data type as a hyper-parameter. ContainerArgument container = 1; // A singleton output from another step as a hyper-parameter. DataArgument data = 2; // A primitive instance to be passed as a hyper-parameter. PrimitiveArgument primitive = 3; // A constant value of a hyper-parameter. ValueArgument value = 4; // “A set of singleton outputs from other steps in a pipeline. DataArguments data_set = 5; // A set of primitive instances to be passed as a hyper-parameter. PrimitiveArguments primitives_set = 6;

}

message StepInput {: // Data reference. string data = 1;

}

message StepOutput {: // Name which becomes part of the data reference. string id = 1;

}

message PipelineSource {: // String representing name of the author, team. string name = 1; // An URI to contact the source. string contact = 2; // A list of pipeline IDs used to derive the pipeline. repeated string pipelines = 3;

}

enum PipelineContext {

// Default value. Not to be used. PIPELINE_CONTEXT_UNKNOWN = 0;

// Pipeline was created during building/training of the system itself, e.g., during metalearning. PRETRAINING = 1; // Pipeline was created during development or testing of the system itself, e.g., during debugging. TESTING = 2; // Pipeline was created during evaluation of the system itself, e.g., NIST blind evaluation. EVALUATION = 3; // Pipeline was created during regular (production) operation of the system. PRODUCTION = 4;

}

// User associated with the creation of the template/pipeline, or selection of a primitive. message PipelineDescriptionUser {

// Globally unique ID for this user. It can be opaque, but it should identify the same user // across sessions. Consider using UUID variant 5 with namespace set to the name of your system // and name to an ID in your system’s database. It does not have to map to any real ID, just // that it is possible to connect mutliple pipelines/templates by the same user together, // if necessary. string id = 1; // A natural language description of what the user did to be on the list, e.g., “Picked // a pipeline from a list of pipelines.”. string reason = 2; // A natural language description by the user of what the user did, // e.g., “I picked a pipeline because it looks short in comparison with others.”. string rationale = 3;

}

// Possible input to the pipeline or template. message PipelineDescriptionInput {

// Human friendly name of the input. string name = 1;

}

// Available output of the pipeline or template. message PipelineDescriptionOutput {

// Human friendly name of the output. string name = 1; // Data reference, probably of an output of a step. string data = 2;

}

message PrimitivePipelineDescriptionStep {: Primitive primitive = 1; // Arguments to the primitive. Constructor arguments should not be listed here, because they // can be automatically created from other information. All these arguments are listed as kind // “PIPELINE” in primitive’s metadata. map<string, PrimitiveStepArgument> arguments = 2; // List of produce metods providing data. One can reference using data reference these outputs // then in arguments (inputs) in other steps or pipeline outputs. repeated StepOutput outputs = 3; // Some hyper-parameters are not really tunable and should be fixed as part of template/pipeline. // This can be done here. Hyper-parameters listed here cannot be tuned or overridden. Author of a // template/pipeline decides which hyper-parameter are which, probably based on their semantic type. // TA3 can specify a list of hyper-parameters to fix, and TA2 can add to the list additional // hyper-paramaters in found pipelines. map<string, PrimitiveStepHyperparameter> hyperparams = 4; // List of users associated with selection of this primitive/arguments/hyper-parameters. Optional. repeated PipelineDescriptionUser users = 5;

}

message SubpipelinePipelineDescriptionStep {: // Only “id” field is required in this case to reference another pipeline in the template. PipelineDescription pipeline = 1; // List of data references, probably of an output of a step or pipeline input, // mapped to sub-pipeline’s inputs in order. repeated StepInput inputs = 2; // List of IDs to be used in data references, mapping sub-pipeline’s outputs in order. repeated StepOutput outputs = 3;

}

// Used to represent a pipeline template which can be used to generate full pipelines. // A placeholder is replaced with a pipeline step to form a pipeline. See README.md // for restrictions on the number of them, their position, allowed inputs and outputs, // etc. message PlaceholderPipelineDescriptionStep {

// List of inputs which can be used as inputs to resulting sub-pipeline. Resulting // sub-pipeline does not have to use all the inputs, but it cannot use any other inputs. repeated StepInput inputs = 1; // A list of outputs of the resulting sub-pipeline. repeated StepOutput outputs = 2;

}

message PipelineDescriptionStep {

oneof step {: PrimitivePipelineDescriptionStep primitive = 1; SubpipelinePipelineDescriptionStep pipeline = 2; PlaceholderPipelineDescriptionStep placeholder = 3;

}

// Pipeline description matches the D3M pipeline description. // It serves two purposes: describing found pipelines by TA2 to TA3, and communicating pipeline // templates by TA3 to TA2. Because of this some fields are reasonable only in one of those uses. // They are marked with “TA2” or “TA3” in the comment, for fields which are primarily to be set // only by TA2 or only by TA3, respectivelly. message PipelineDescription {

// TA2: UUID of the pipeline. Templates do not have IDs. But TA3 might provide it for a fully // specified pipeline. It does not necessary have to match “solution_id” from // “ListSolutionsResponse” and other related messages. Those IDs are about whole solutions // (pipeline, potentially fitted, with set hyper-parameters). This here ID is about this // particular ID description. string id = 1; // “schema” field is not needed because it is fixed by the TA2-TA3 protocol version. // System which generated a pipeline or a template. Optional. PipelineSource source = 2; // TA2: Timestamp when created. Templates do not have this timestamp. TA3 might provide it for // a fully specified pipeline. google.protobuf.Timestamp created = 3; // In which context a template or pipeline was made. This is helpful to distinguish evaluation // context from other contexts. The value should not really influence different behavior from // either system, but it is useful when recording metalearning information to understand this. PipelineContext context = 4; // Human friendly name of the pipeline. For templates it can be a hint to // TA2 how to name found pipelines. Optional. string name = 5; // Human friendly description of the pipeline. Optional. string description = 6; // List of users associated with the creation of the template and consequently of the pipeline. // TA2 can store this information into metalearning database. TA2 is not really expected to use // this information during pipeline search. TA2 should not have to understand TA3 users, mapping // between users and pipeline search IDs is something TA3 should handle. Optional. repeated PipelineDescriptionUser users = 7; // In most cases inputs are datasets. But if TA3 wants to jut run a primitive, it can send a // template with only that primitive in the template, and then pass anything to its inputs during // execution. Here, we are describing possible inputs to the pipeline or template. Order matters. repeated PipelineDescriptionInput inputs = 8; // Available outputs of the pipeline or template. repeated PipelineDescriptionOutput outputs = 9; // Steps defining the pipeline. repeated PipelineDescriptionStep steps = 10;

}

ta2.ta3.core_servicer.camel_case(name)[source]¶

ta2.ta3.core_servicer.dt2ts(dt)[source]¶

ta2.ta3.core_servicer.recursivedict()[source]¶