Source code for adanet.autoensemble.estimator

"""An estimator that learns to ensemble.

Copyright 2018 The AdaNet Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from adanet import core
from adanet.autoensemble.common import _GeneratorFromCandidatePool

import tensorflow.compat.v2 as tf


[docs]class AutoEnsembleEstimator(core.Estimator):  # pylint: disable=g-classes-have-attributes
  # pyformat: disable
  """A :class:`tf.estimator.Estimator` that learns to ensemble models.

  Specifically, it learns to ensemble models from a candidate pool using the
  Adanet algorithm.

  .. code-block:: python

      # A simple example of learning to ensemble linear and neural network
      # models.

      import adanet
      import tensorflow as tf

      feature_columns = ...

      head = MultiClassHead(n_classes=10)

      # Learn to ensemble linear and DNN models.
      estimator = adanet.AutoEnsembleEstimator(
          head=head,
          candidate_pool=lambda config: {
              "linear":
                  tf.estimator.LinearEstimator(
                      head=head,
                      feature_columns=feature_columns,
                      config=config,
                      optimizer=...),
              "dnn":
                  tf.estimator.DNNEstimator(
                      head=head,
                      feature_columns=feature_columns,
                      config=config,
                      optimizer=...,
                      hidden_units=[1000, 500, 100])},
          max_iteration_steps=50)

      # Input builders
      def input_fn_train:
        # Returns tf.data.Dataset of (x, y) tuple where y represents label's
        # class index.
        pass
      def input_fn_eval:
        # Returns tf.data.Dataset of (x, y) tuple where y represents label's
        # class index.
        pass
      def input_fn_predict:
        # Returns tf.data.Dataset of (x, None) tuple.
        pass
      estimator.train(input_fn=input_fn_train, steps=100)
      metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
      predictions = estimator.predict(input_fn=input_fn_predict)

  Or to train candidate subestimators on different training data subsets:

  .. code-block:: python

      train_data_files = [...]

      # Learn to ensemble linear and DNN models.
      estimator = adanet.AutoEnsembleEstimator(
          head=head,
          candidate_pool=lambda config: {
              "linear":
                  adanet.AutoEnsembleSubestimator(
                      tf.estimator.LinearEstimator(
                          head=head,
                          feature_columns=feature_columns,
                          config=config,
                          optimizer=...),
                      make_train_input_fn(train_data_files[:-1])),
              "dnn":
                  adanet.AutoEnsembleSubestimator(
                      tf.estimator.DNNEstimator(
                          head=head,
                          feature_columns=feature_columns,
                          config=config,
                          optimizer=...,
                          hidden_units=[1000, 500, 100]),
                      make_train_input_fn(train_data_files[0:]))},
          max_iteration_steps=50)

      estimator.train(input_fn=make_train_input_fn(train_data_files), steps=100)


  Args:
    head: A :class:`tf.contrib.estimator.Head` instance for computing loss and
      evaluation metrics for every candidate.
    candidate_pool: List of :class:`tf.estimator.Estimator` and
      :class:`AutoEnsembleSubestimator` objects, or dict of string name to
      :class:`tf.estimator.Estimator` and :class:`AutoEnsembleSubestimator`
      objects that are candidate subestimators to ensemble at each iteration.
      The order does not directly affect which candidates will be included in
      the final ensemble, but will affect the name of the candidate. When using
      a dict, the string key becomes the candidate subestimator's name.
      Alternatively, this argument can be a function that takes a `config`
      argument and returns the aforementioned values in case the
      objects need to be re-instantiated at each adanet iteration.
    max_iteration_steps: Total number of steps for which to train candidates per
      iteration. If `OutOfRange` or `StopIteration` occurs in the middle,
      training stops before `max_iteration_steps` steps.
    logits_fn: A function for fetching the subnetwork logits from a
      :class:`tf.estimator.EstimatorSpec`, which should obey the following
      signature:
        - `Args`: Can only have following argument:
          - estimator_spec: The candidate's :class:`tf.estimator.EstimatorSpec`.
        - `Returns`: Logits :class:`tf.Tensor` or dict of string to logits
          :class:`tf.Tensor` (for multi-head) for the candidate subnetwork
          extracted from the given `estimator_spec`. When `None`, it will
          default to returning `estimator_spec.predictions` when they are a
          :class:`tf.Tensor` or the :class:`tf.Tensor` for the key 'logits' when
          they are a dict of string to :class:`tf.Tensor`.
    last_layer_fn: An optional function for fetching the subnetwork last_layer
      from a :class:`tf.estimator.EstimatorSpec`, which should obey the
      following signature:
        - `Args`: Can only have following argument:
          - estimator_spec: The candidate's :class:`tf.estimator.EstimatorSpec`.
        - `Returns`: Last layer :class:`tf.Tensor` or dict of string to last
          layer :class:`tf.Tensor` (for multi-head) for the candidate subnetwork
          extracted from the given `estimator_spec`. The last_layer can be used
          for learning ensembles or exporting them as embeddings.
      When `None`, it will default to using the logits as the last_layer.
    ensemblers: See :class:`adanet.Estimator`.
    ensemble_strategies: See :class:`adanet.Estimator`.
    evaluator:  See :class:`adanet.Estimator`.
    metric_fn:  See :class:`adanet.Estimator`.
    force_grow:  See :class:`adanet.Estimator`.
    adanet_loss_decay: See :class:`adanet.Estimator`.
    worker_wait_timeout_secs: See :class:`adanet.Estimator`.
    model_dir: See :class:`adanet.Estimator`.
    config: See :class:`adanet.Estimator`.
    debug: See :class:`adanet.Estimator`.
    enable_ensemble_summaries: See :class:`adanet.Estimator`.
    enable_subnetwork_summaries: See :class:`adanet.Estimator`.
    global_step_combiner_fn: See :class:`adanet.Estimator`.
    max_iterations: See :class:`adanet.Estimator`.
    replay_config: See :class:`adanet.Estimator`.
    **kwargs: Extra keyword args passed to the parent.

  Returns:
    An :class:`adanet.AutoEnsembleEstimator` instance.

  Raises:
    ValueError: If any of the candidates in `candidate_pool` are not
      :class:`tf.estimator.Estimator` instances.
  """
  # pyformat: enable

  def __init__(self,
               head,
               candidate_pool,
               max_iteration_steps,
               ensemblers=None,
               ensemble_strategies=None,
               logits_fn=None,
               last_layer_fn=None,
               evaluator=None,
               metric_fn=None,
               force_grow=False,
               adanet_loss_decay=.9,
               worker_wait_timeout_secs=7200,
               model_dir=None,
               config=None,
               debug=False,
               enable_ensemble_summaries=True,
               enable_subnetwork_summaries=True,
               global_step_combiner_fn=tf.math.reduce_mean,
               max_iterations=None,
               replay_config=None,
               **kwargs):
    subnetwork_generator = _GeneratorFromCandidatePool(candidate_pool,
                                                       logits_fn, last_layer_fn)
    super(AutoEnsembleEstimator, self).__init__(
        head=head,
        subnetwork_generator=subnetwork_generator,
        max_iteration_steps=max_iteration_steps,
        ensemblers=ensemblers,
        ensemble_strategies=ensemble_strategies,
        evaluator=evaluator,
        metric_fn=metric_fn,
        force_grow=force_grow,
        adanet_loss_decay=adanet_loss_decay,
        worker_wait_timeout_secs=worker_wait_timeout_secs,
        model_dir=model_dir,
        config=config,
        debug=debug,
        enable_ensemble_summaries=enable_ensemble_summaries,
        enable_subnetwork_summaries=enable_subnetwork_summaries,
        global_step_combiner_fn=global_step_combiner_fn,
        max_iterations=max_iterations,
        replay_config=replay_config,
        **kwargs)


[docs]class AutoEnsembleTPUEstimator(core.TPUEstimator):  # pylint: disable=g-classes-have-attributes
  # pyformat: disable
  """A :class:`tf.estimator.tpu.TPUEstimator` that learns to ensemble models.

  Specifically, it learns to ensemble models from a candidate pool using the
  Adanet algorithm.

  This estimator is capable of training and evaluating on TPU. It can ensemble
  both :class:`tf.estimator.tpu.TPUEstimator` candidates as well as regular
  :class:`tf.estimator.Estimator` candidates, as long as these candidates are
  TPU compatible.

  Note the following restrictions compared to AutoEnsembleEstimator:
    * All candidates must wrap their optimizers with a
      :class:`tf.tpu.CrossShardOptimizer`.
    * The `input_fn` must expose a `params` argument.
    * The `model_fn` of :class:`tf.estimator.tpu.TPUEstimator` candidates must
      also expose a `params` argument.

  WARNING: This Estimator is a work in progress and the API could change at any
  moment. May not support all AutoEnsembleEstimator features.

    .. code-block:: python

      # A simple example of learning to ensemble linear and neural network
      # models on TPU.

      import adanet
      import tensorflow as tf

      feature_columns = ...

      head = MultiClassHead(n_classes=10)

      # Learn to ensemble linear and DNN models.
      estimator = adanet.AutoEnsembleTPUEstimator(
          head=head,
          candidate_pool=lambda config: {
              "linear":
                  tf.estimator.LinearEstimator(
                      head=head,
                      feature_columns=feature_columns,
                      config=config,
                      optimizer=tf.tpu.CrossShardOptimizer(...)),
              "dnn":
                  tf.estimator.DNNEstimator(
                      head=head,
                      feature_columns=feature_columns,
                      config=config,
                      optimizer=tf.tpu.CrossShardOptimzier(...),
                      hidden_units=[1000, 500, 100])},
          max_iteration_steps=50)

      # Input builders
      def input_fn_train(params):
        # Returns tf.data.Dataset of (x, y) tuple where y represents label's
        # class index.
        pass
      def input_fn_eval(params):
        # Returns tf.data.Dataset of (x, y) tuple where y represents label's
        # class index.
        pass
      def input_fn_predict():
        # Returns tf.data.Dataset of (x, None) tuple.
        pass
      estimator.train(input_fn=input_fn_train, steps=100)
      metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
      predictions = estimator.predict(input_fn=input_fn_predict)

  Args:
    head: A :class:`tf.contrib.estimator.Head` instance for computing loss and
      evaluation metrics for every candidate.
    candidate_pool: List of :class:`tf.estimator.tpu.TPUEstimator` and
      :class:`AutoEnsembleSubestimator` objects, or dict of string name to
      :class:`tf.estimator.tpu.TPUEstimator` and
      :class:`AutoEnsembleSubestimator` objects that are candidate subestimators
      to ensemble at each iteration. The order does not directly affect which
      candidates will be included in the final ensemble, but will affect the
      name of the candidate. When using a dict, the string key becomes the
      candidate subestimator's name. Alternatively, this argument can be a
      function that takes a `config` argument and returns the aforementioned
      values in case the objects need to be re-instantiated at each adanet
      iteration.
    max_iteration_steps: See :class:`adanet.Estimator`.
    logits_fn: A function for fetching the subnetwork logits from a
      :class:`tf.estimator.EstimatorSpec`, which should obey the following
      signature:
        - `Args`: Can only have following argument:
          - estimator_spec: The candidate's :class:`tf.estimator.EstimatorSpec`.
        - `Returns`: Logits :class:`tf.Tensor` or dict of string to logits
          :class:`tf.Tensor` (for multi-head) for the candidate subnetwork
          extracted from the given `estimator_spec`. When `None`, it will
          default to returning `estimator_spec.predictions` when they are a
          :class:`tf.Tensor` or the :class:`tf.Tensor` for the key 'logits' when
          they are a dict of string to :class:`tf.Tensor`.
    last_layer_fn: An optional function for fetching the subnetwork last_layer
      from a :class:`tf.estimator.EstimatorSpec`, which should obey the
      following signature:
        - `Args`: Can only have following argument:
          - estimator_spec: The candidate's :class:`tf.estimator.EstimatorSpec`.
        - `Returns`: Last layer :class:`tf.Tensor` or dict of string to last
          layer :class:`tf.Tensor` (for multi-head) for the candidate subnetwork
          extracted from the given `estimator_spec`. The last_layer can be used
          for learning ensembles or exporting them as embeddings.
      When `None`, it will default to using the logits as the last_layer.
    ensemblers: See :class:`adanet.Estimator`.
    ensemble_strategies: See :class:`adanet.Estimator`.
    evaluator:  See :class:`adanet.Estimator`.
    metric_fn:  See :class:`adanet.Estimator`.
    force_grow:  See :class:`adanet.Estimator`.
    adanet_loss_decay: See :class:`adanet.Estimator`.
    model_dir: See :class:`adanet.Estimator`.
    config: See :class:`adanet.Estimator`.
    use_tpu: See :class:`adanet.Estimator`.
    eval_on_tpu: See :class:`adanet.Estimator`.
    export_to_tpu: See :class:`adanet.Estimator`.
    train_batch_size: See :class:`adanet.Estimator`.
    eval_batch_size: See :class:`adanet.Estimator`.
    embedding_config_spec: See :class:`adanet.Estimator`.
    debug: See :class:`adanet.Estimator`.
    enable_ensemble_summaries: See :class:`adanet.Estimator`.
    enable_subnetwork_summaries: See :class:`adanet.Estimator`.
    global_step_combiner_fn: See :class:`adanet.Estimator`.
    max_iterations: See :class:`adanet.Estimator`.
    replay_config: See :class:`adanet.Estimator`.
    **kwargs: Extra keyword args passed to the parent.

  Returns:
    An :class:`adanet.AutoEnsembleTPUEstimator` instance.

  Raises:
    ValueError: If any of the candidates in `candidate_pool` are not
      :class:`tf.estimator.Estimator` instances.
  """
  # pyformat: enable

  def __init__(self,
               head,
               candidate_pool,
               max_iteration_steps,
               ensemblers=None,
               ensemble_strategies=None,
               logits_fn=None,
               last_layer_fn=None,
               evaluator=None,
               metric_fn=None,
               force_grow=False,
               adanet_loss_decay=.9,
               model_dir=None,
               config=None,
               use_tpu=True,
               eval_on_tpu=True,
               export_to_tpu=True,
               train_batch_size=None,
               eval_batch_size=None,
               predict_batch_size=None,
               embedding_config_spec=None,
               debug=False,
               enable_ensemble_summaries=True,
               enable_subnetwork_summaries=True,
               global_step_combiner_fn=tf.math.reduce_mean,
               max_iterations=None,
               replay_config=None,
               **kwargs):
    subnetwork_generator = _GeneratorFromCandidatePool(candidate_pool,
                                                       logits_fn, last_layer_fn)
    super(AutoEnsembleTPUEstimator, self).__init__(
        head=head,
        subnetwork_generator=subnetwork_generator,
        max_iteration_steps=max_iteration_steps,
        ensemblers=ensemblers,
        ensemble_strategies=ensemble_strategies,
        evaluator=evaluator,
        metric_fn=metric_fn,
        force_grow=force_grow,
        adanet_loss_decay=adanet_loss_decay,
        model_dir=model_dir,
        config=config,
        use_tpu=use_tpu,
        eval_on_tpu=eval_on_tpu,
        export_to_tpu=export_to_tpu,
        train_batch_size=train_batch_size,
        eval_batch_size=eval_batch_size,
        predict_batch_size=predict_batch_size,
        embedding_config_spec=embedding_config_spec,
        debug=debug,
        enable_ensemble_summaries=enable_ensemble_summaries,
        enable_subnetwork_summaries=enable_subnetwork_summaries,
        global_step_combiner_fn=global_step_combiner_fn,
        max_iterations=max_iterations,
        replay_config=replay_config,
        **kwargs)