tensorflow/tensorflow/python/eager/function.py at master · feihugis/tensorflow

History

3831 lines (3311 loc) · 159 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

# ==============================================================================

# pylint: disable=unidiomatic-typecheck

"""Defun decorator for defining graph-mode functions."""

from __future__ import absolute_import

from __future__ import division

from __future__ import print_function

import collections

import functools

import itertools

import pprint

import threading

import types as types_lib

import weakref

import numpy as np

import six

from six.moves import map

from tensorflow.core.framework import attr_value_pb2

from tensorflow.core.framework import function_pb2

from tensorflow.python import _pywrap_utils

from tensorflow.python import pywrap_tfe

from tensorflow.python.client import pywrap_tf_session

from tensorflow.python.eager import backprop

from tensorflow.python.eager import backprop_util

from tensorflow.python.eager import context

from tensorflow.python.eager import execute

from tensorflow.python.eager import forwardprop_util

from tensorflow.python.eager import tape

from tensorflow.python.eager.graph_only_ops import graph_placeholder

from tensorflow.python.framework import c_api_util

from tensorflow.python.framework import composite_tensor

from tensorflow.python.framework import constant_op

from tensorflow.python.framework import device as pydev

from tensorflow.python.framework import dtypes

from tensorflow.python.framework import error_interpolation

from tensorflow.python.framework import errors

from tensorflow.python.framework import func_graph as func_graph_module

from tensorflow.python.framework import ops

from tensorflow.python.framework import tensor_shape

from tensorflow.python.framework import tensor_spec

from tensorflow.python.framework import tensor_util

from tensorflow.python.framework import type_spec

from tensorflow.python.ops import array_ops

from tensorflow.python.ops import control_flow_ops

from tensorflow.python.ops import custom_gradient

from tensorflow.python.ops import default_gradient

from tensorflow.python.ops import functional_ops

from tensorflow.python.ops import gradients_util

from tensorflow.python.ops import resource_variable_ops

from tensorflow.python.platform import tf_logging as logging

from tensorflow.python.profiler import trace

from tensorflow.python.util import compat

from tensorflow.python.util import function_utils

from tensorflow.python.util import lazy_loader

from tensorflow.python.util import memory

from tensorflow.python.util import nest

from tensorflow.python.util import object_identity

from tensorflow.python.util import tf_decorator

from tensorflow.python.util import tf_inspect

# Loaded lazily due to a circular dependency (roughly

# tf.function->autograph->->dataset->tf.function).

# TODO(b/133251390): Use a regular import.

ag_ctx = lazy_loader.LazyLoader(

"ag_ctx", globals(),

"tensorflow.python.autograph.core.ag_ctx")

np_arrays = lazy_loader.LazyLoader(

"np_arrays", globals(),

"tensorflow.python.ops.numpy_ops.np_arrays")

FORWARD_FUNCTION_ATTRIBUTE_NAME = "forward_function_name"

BACKWARD_FUNCTION_ATTRIBUTE_NAME = "backward_function_name"

IMPLEMENTS_ATTRIBUTE_NAME = "_implements"

SHARED_RENDEZVOUS_ATTRIBUTE_NAME = "shared_rendezvous"

def _make_input_signature_hashable(elem):

"""Rewrite input signature to be hashable.

We replace nested variables in the input signature with TensorSpec in order to

be hashable.

Args:

elem: Input signature element

Returns:

A hashable object for the requested input signature

"""

# TODO(slebedev): consider using nest.

if isinstance(elem, tuple):

return tuple(map(_make_input_signature_hashable, elem))

try:

hash(elem)

except TypeError:

# TFE_Py_EncodeArg weakrefs arguments it does not recognize, and we expect

# all recognized types to be hashable.

assert isinstance(elem, weakref.ReferenceType)

v = elem()

if resource_variable_ops.is_resource_variable(v):

# We special case variables here to use unique_id as the cache key. This

# ensures we have to retrace whenever a different variable is passed in.

# This is needed to support cases where the user may use the id of a

# variable in the function perhaps as a lookup in a dictionary.

# This choice leads to more retracing when we could have possibly used the

# shape and dtype instead. However, we expect the number of variables in a

# program to be bounded, and correspondingly the number of retraces.

# Note we also include the class name to avoid collisions with strings.

return v.__class__, v._unique_id # pylint: disable=protected-access

if _is_ndarray(v):

# Numpy arrays are not hashable, but when calling functions we treat them

# in the same way as tf.Tensors.

if not hasattr(v, "shape") or not hasattr(v, "dtype"):

# TODO(tomhennigan) De-dup with _as_ndarray in _convert_numpy_inputs.

v = _as_ndarray(v)

return tensor_spec.TensorSpec(v.shape, v.dtype)

raise ValueError("Arguments to a tf.function must be Tensors, Variables, "

"or hashable Python objects (or nested structures of "

"these types).\nGot type: %s" % type(v).__name__)

return elem

CacheKey = collections.namedtuple("CacheKey", [

"input_signature",

"parent_graph",

"device_functions",

"colocation_stack",

"in_cross_replica_context",

"xla_context_id",

])

def _type_spec_for(x):

"""Returns a TypeSpec for `x`, or `None` if `x` doesn't have a TensorSpec."""

if isinstance(x, ops.Tensor):

return tensor_spec.TensorSpec.from_tensor(x)

elif isinstance(x, type_spec.TypeSpec):

return x

elif isinstance(x, composite_tensor.CompositeTensor):

return x._type_spec # pylint: disable=protected-access

else:

return None

def _is_type_subset(a, b):

"""Returns true if TypeSpec `b` is a subset of type `a` (or if a is None.)"""

if a is None:

return True

else:

return a.most_specific_compatible_type(b) == a

def _shape_relaxed_type_for_composite_tensor(x):

"""Returns a shape-relaxed TypeSpec for x (if composite) or x (if not)."""

if isinstance(x, composite_tensor.CompositeTensor):

# pylint: disable=protected-access

return x._type_spec._with_tensor_ranks_only()

else:

return x

def common_shape(x, y):

"""Find a `TensorShape` that is compatible with both `x` and `y`."""

if x is None != y is None:

raise RuntimeError(

"Cannot find a common shape when LHS shape is None but RHS shape "

"is not (or vice versa): %s vs. %s" % (x, y))

if x is None:

return None # The associated input was not a Tensor, no shape generated.

if not isinstance(x, tensor_shape.TensorShape):

raise TypeError("Expected x to be a TensorShape but saw %s" % (x,))

if not isinstance(y, tensor_shape.TensorShape):

raise TypeError("Expected y to be a TensorShape but saw %s" % (y,))

if x.rank != y.rank or x.rank is None:

return tensor_shape.TensorShape(None)

dims = []

for dim_x, dim_y in zip(x.dims, y.dims):

if (dim_x != dim_y

or tensor_shape.dimension_value(dim_x) is None

or tensor_shape.dimension_value(dim_y) is None):

dims.append(None)

else:

dims.append(tensor_shape.dimension_value(dim_x))

return tensor_shape.TensorShape(dims)

def is_same_structure(structure1,

structure2,

check_values=False):

"""Check two structures for equality, optionally of types and of values."""

try:

nest.assert_same_structure(structure1, structure2, expand_composites=True)

except (ValueError, TypeError):

return False

if check_values:

flattened1 = nest.flatten(structure1, expand_composites=True)

flattened2 = nest.flatten(structure2, expand_composites=True)

# First check the types to avoid AttributeErrors.

if any(type(f1) != type(f2) for f1, f2 in zip(flattened1, flattened2)):

return False

return flattened1 == flattened2

return True

def _parse_func_attrs(attributes):

"""Convert the keyword arguments into function_def attributes.

Currently only support primitive types: bool, int, float and string.

Args:

attributes: the dictionary of attributes.

Returns:

A dict of attributes where the key is the name of attribute and the value

is the AttrValue proto.

Raises:

ValueError: If the kwargs contains unwhitelisted name or unsupported value

types.

"""

attrs = {}

for key, value in attributes.items():

if isinstance(value, attr_value_pb2.AttrValue):

attrs[key] = value

# bool type check has to happen before int since bool is a subclass of int.

elif isinstance(value, bool):

attrs[key] = attr_value_pb2.AttrValue(b=value)

elif isinstance(value, int):

attrs[key] = attr_value_pb2.AttrValue(i=value)

elif isinstance(value, float):

attrs[key] = attr_value_pb2.AttrValue(f=value)

elif isinstance(value, (str, bytes, six.text_type)):

attrs[key] = attr_value_pb2.AttrValue(s=compat.as_bytes(value))

else:

raise ValueError("Unsupported attribute type for %s with type %s" %

(key, type(value)))

return attrs

class _InterpolateFunctionError(object):

"""Context Manager that interpolates the exception from 'top_level_func'."""

def __init__(self, top_level_func):

self._func = top_level_func

def __enter__(self):

pass

def __exit__(self, typ, exc, tb):

if not exc or not isinstance(exc, errors.OpError):

return False

message = compat.as_text(exc.message)

_, tags = error_interpolation.parse_message(message)

g = None

func_stack = []

for t in tags:

if t.type == "function_node":

# TODO(mdan): Tests should cover this.

if t.name == compat.as_str(self._func.name):

g = self._func.graph

elif g:

next_func = g._get_function(t.name) # pylint: disable=protected-access

if next_func is not None and isinstance(next_func,

_EagerDefinedFunction):

g = next_func.graph

if g:

func_stack.append(g.name)

else:

func_stack.append("<unknown>")

if g:

message = error_interpolation.interpolate(message, g)

message += "\n\nFunction call stack:\n"

message += " -> ".join(func_stack)

message += "\n"

exc._message = message # pylint: disable=protected-access

return False

_function_callbacks = set()

def add_function_callback(function_callback):

"""Add a callback function for Function creation.

The callback function has the signature:

`def function_callback(function):`

wherein `function` is the just-created _EagerDefinedFunction.

The callback is invoked immediately after a new `_EagerDefinedFunction`

is created. The return value(s) of the callback function (if any) is ignored.

Repeated registration of the same callback function is idempotent.

After a callback is added, it can be removed with the

`remove_function_callback()` method.

Args:

function_callback: The callback to add.

"""

_function_callbacks.add(function_callback)

def remove_function_callback(function_callback):

"""Remove an already-added function callback.

See the doc string of `add_function_callback()` for more information.

Args:

function_callback: The callback to remove.

"""

_function_callbacks.remove(function_callback)

def clear_function_callbacks():

"""Clear all function callbacks, if any have been regisered."""

_function_callbacks.clear()

_FORWARD_PREFIX = "__forward_"

_BACKWARD_PREFIX = "__backward_"

_INFERENCE_PREFIX = "__inference_"

def _forward_name(n):

"""The name of a generated forward defun named n."""

return "%s%s_%s" % (_FORWARD_PREFIX, n, ops.uid())

def _backward_name(n):

"""The name of a generated backward defun named n."""

return "%s%s_%s" % (_BACKWARD_PREFIX, n, ops.uid())

def _inference_name(n):

"""The name of a forward-but-no-gradient defun named n."""

return "%s%s_%s" % (_INFERENCE_PREFIX, n, ops.uid())

def _enclosing_xla_context():

"""Returns the XLAControlFlowContext, which exists inside a tpu.rewrite()."""

graph = ops.get_default_graph()

while graph is not None:

# pylint: disable=protected-access

context_ = graph._get_control_flow_context()

# pylint: enable=protected-access

while context_ is not None:

if isinstance(context_, control_flow_ops.XLAControlFlowContext):

return context_

context_ = context_.outer_context

# This may be a FuncGraph due to defuns or v2 control flow. We need to

# find the original graph with the XLAControlFlowContext.

graph = getattr(graph, "outer_graph", None)

return None

class _EagerDefinedFunctionDeleter(object):

"""Unregister function from eager context."""

def __init__(self, name):

self.name = name

def __del__(self):

try:

context.remove_function(self.name)

except TypeError:

# Suppress some exceptions, mainly for the case when we're running on

# module deletion. Things that can go wrong include the context module

# already being unloaded, self._handle._handle_data no longer being

# valid, and so on. Printing warnings in these cases is silly

# (exceptions raised from __del__ are printed as warnings to stderr).

pass # 'NoneType' object is not callable when the handle has been

# partially unloaded.

except AttributeError:

pass # 'NoneType' object has no attribute 'eager_mode' when context has

# been unloaded. Will catch other module unloads as well.

# TODO(apassos) get rid of this by splitting framework.function._DefinedFunction

# so it doesn't have the definition-generating logic and is just a container for

# an already-defined function.

class _EagerDefinedFunction(object):

"""Callable with the interface of `framework.function._DefinedFunction`.

`_EagerDefinedFunction` encapsulates a function definition and its properties,

and it provides a method for calling the encapsulated function. Some Ops

take functions as attributes, which have type `func`; an instance of this

class may be provided as the value of these `func` attributes.

"""

def __init__(self, name, graph, inputs, outputs, attrs):

"""Initializes an eager defined function.

Args:

name: str, the name for the created function.

graph: Graph, the graph containing the operations in the function

inputs: the tensors in the graph to be used as inputs to the function

outputs: the tensors in the graph which will be outputs to the function

attrs: dict mapping names of attributes to their AttrValue values

"""

input_ops = set(arg.op for arg in inputs)

operations = [op for op in graph.get_operations() if op not in input_ops]

graph_output_names = graph._output_names # pylint: disable=protected-access

if (graph_output_names is not None and

all(ops.tensor_id(t) in graph_output_names for t in outputs)):

output_names = [

compat.as_bytes(graph_output_names[ops.tensor_id(t)]) for t in outputs

]

if len(set(output_names)) != len(output_names):

# There are duplicate names for some reason, probably an invalid

# signature. Revert to auto-naming.

output_names = []

else:

output_names = []

fn = pywrap_tf_session.TF_GraphToFunction_wrapper(

graph._c_graph, # pylint: disable=protected-access

compat.as_str(name),

False,

[o._c_op for o in operations], # pylint: disable=protected-access

[t._as_tf_output() for t in inputs], # pylint: disable=protected-access

[t._as_tf_output() for t in outputs], # pylint: disable=protected-access

output_names,

[o._c_op for o in graph.control_outputs], # pylint: disable=protected-access

[], # control_output_names

None,

compat.as_str(""))

for name, attr_value in attrs.items():

serialized = attr_value.SerializeToString()

# TODO(iga): this creates and deletes a new TF_Status for every attr.

# It might be worth creating a convenient way to re-use status.

pywrap_tf_session.TF_FunctionSetAttrValueProto(fn, compat.as_str(name),

serialized)

# TODO(apassos) avoid creating a FunctionDef (specially to grab the

# signature, but also in general it's nice not to depend on it.

with c_api_util.tf_buffer() as buffer_:

pywrap_tf_session.TF_FunctionToFunctionDef(fn, buffer_)

proto_data = pywrap_tf_session.TF_GetBuffer(buffer_)

function_def = function_pb2.FunctionDef()

function_def.ParseFromString(compat.as_bytes(proto_data))

self._name = compat.as_bytes(function_def.signature.name)

with ops.init_scope():

if context.executing_eagerly():

context.ensure_initialized()

context.add_function(fn)

self._function_deleter = _EagerDefinedFunctionDeleter(self.name)

self._registered_on_context = True

self.definition = function_def

self.signature = function_def.signature

self._num_outputs = len(self.signature.output_arg)

self._output_types = [o.type for o in self.signature.output_arg]

self._output_shapes = [o.shape for o in outputs]

self._control_captures = graph.control_captures

# Shallow copy outputs since ConcreteFunction may mutate it.

self._func_graph_outputs = list(outputs)

self.grad_func_name = None

self.python_grad_func = None

self._c_func = c_api_util.ScopedTFFunction(fn)

self._grad_func = None

self.graph = graph

self._stateful_ops = tuple(op for op in operations if op._is_stateful) # pylint: disable=protected-access

for function_callback in _function_callbacks:

function_callback(self)

def add_to_graph(self, g=None):

# pylint: disable=protected-access

if not g and context.executing_eagerly():

context.context().add_function_def(self.definition)

else:

if not g._is_function(self.name):

g._add_function(self)

for f in self.graph._functions.values():

if not g._is_function(f.name):

g._add_function(f)

# pylint: enable=protected-access

@property

def name(self):

return self._name

@property

def stateful_ops(self):

return self._stateful_ops

def call(self, ctx, args, cancellation_manager=None):

"""Calls this function with `args` as inputs.

`ConcreteFunction` execution respects device annotations only if the

function won't be compiled with xla.

Args:

ctx: a Context object

args: a list of arguments to supply this function with.

cancellation_manager: a `CancellationManager` object that can be used to

cancel function execution.

Returns:

The outputs of the function call.

Raises:

ValueError: if the number of arguments is incorrect.

"""

if len(args) != len(self.signature.input_arg):

raise ValueError(

"Arguments and signature arguments do not match. "

"got: %s, expected: %s " %

(len(args), len(list(self.signature.input_arg))))

function_call_options = ctx.function_call_options

if function_call_options.config_proto_serialized is None:

config = function_utils.get_disabled_rewriter_config()

else:

config = function_call_options.config_proto_serialized

executor_type = function_call_options.executor_type or ""

executing_eagerly = ctx.executing_eagerly()

attrs = ("executor_type", executor_type, "config_proto", config)

if executing_eagerly:

with _InterpolateFunctionError(self):

if cancellation_manager is None:

outputs = execute.execute(

str(self.signature.name),

num_outputs=self._num_outputs,

inputs=args,

attrs=attrs,

ctx=ctx)

else:

outputs = execute.execute_with_cancellation(

str(self.signature.name),

num_outputs=self._num_outputs,

inputs=args,

attrs=attrs,

ctx=ctx,

cancellation_manager=cancellation_manager)

# Replace empty list with None

outputs = outputs or None

else:

# TODO(akshayka): Either remove this if the FunctionLibraryRuntime

# creates `PartitionedCallOp` kernels by default, or remove the previous

# branch if a TPU kernel is registered for `PartitionedCall`.

with _InterpolateFunctionError(self):

with ops.control_dependencies(self._control_captures):

# The caller must use record_operation to record this operation in the

# eager case, so we enforce the same requirement for the non-eager

# case by explicitly pausing recording. We don't have a gradient

# registered for PartitionedCall, so recording this operation confuses

# forwardprop code (GradientTape manages to ignore it).

with tape.stop_recording():

outputs = functional_ops.partitioned_call(

args=args,

f=self,

tout=self._output_types,

executing_eagerly=executing_eagerly,

config=config,

executor_type=executor_type)

if executing_eagerly:

return outputs

else:

# TODO(b/128924522): This additional set_shape should not be

# necessary. ShapeRefiner likely needs to inspect handle_data. Remove this

# once that's done.

for i, shape in enumerate(self._output_shapes):

outputs[i].set_shape(shape)

for i, func_graph_output in enumerate(self._func_graph_outputs):

custom_gradient.copy_handle_data(func_graph_output, outputs[i])

return outputs

class _DelayedRewriteGradientFunctions(object):

"""Caches forward/backward functions with a delayed forward rewrite."""

def __init__(self, func_graph, attrs, func_graph_deleter):

"""Construct an inference function and initialize caches."""

# A map from the number of forward function outputs with accepted gradients

# to forward and backward functions, used to cache non-tape backward

# function generation.

self._cached_function_pairs = {}

self._func_graph = func_graph

self._inference_function = _EagerDefinedFunction(

_inference_name(self._func_graph.name), self._func_graph,

self._func_graph.inputs, self._func_graph.outputs, attrs)

self._attrs = attrs

self._gradient_name = None

# Note that the FuncGraph is mutated later, so we need to inspect it now to

# figure out the user-specified outputs of the inference function.

self._num_inference_outputs = len(self._func_graph.outputs)

self._func_graph_deleter = func_graph_deleter

def forward_backward(self, num_doutputs=None):

"""A possibly-cached pair of forward and backward functions."""

if num_doutputs is None:

num_doutputs = self._num_inference_outputs

forward_backward = self._cached_function_pairs.get(num_doutputs)

if forward_backward is not None:

return forward_backward

forward, backward = self._construct_forward_backward(num_doutputs)

self._cached_function_pairs[num_doutputs] = (forward, backward)

return forward, backward

def _construct_forward_backward(self, num_doutputs):

"""Constructs a pair of forward and backward functions.

Args:

num_doutputs: The constructed backprop function will take output gradients

for the first `num_doutputs` outputs of the forward function. Defaults

to the number of outputs for the inference function, but when

higher-order gradients are computed this will increase to include side

outputs.

Returns:

A pair of (forward_function, backward_function):

forward_function: A re-generated inference function (an

_EagerDefinedFunction) to account for new side outputs, if any extra

were required when building the backward pass.

backward_function: A ConcreteFunction that Takes `num_doutputs`

arguments and returns gradients with respect to inputs of the forward

function.

"""

trainable_outputs = [

output for output in self._func_graph.outputs[:num_doutputs]

if backprop_util.IsTrainable(output)]

signature = []

for t in trainable_outputs:

signature.append(

tensor_spec.TensorSpec(*default_gradient.shape_and_dtype(t)))

def _backprop_function(*grad_ys):

with ops.device(None):

return gradients_util._GradientsHelper( # pylint: disable=protected-access

trainable_outputs,

self._func_graph.inputs,

grad_ys=grad_ys,

src_graph=self._func_graph)

with self._func_graph.as_default():

backwards_graph = func_graph_module.FuncGraph(

_backward_name(self._func_graph.name))

func_graph_module.func_graph_from_py_func(

name=backwards_graph.name,

python_func=_backprop_function,

args=[], kwargs={},

signature=signature,

func_graph=backwards_graph)

backwards_graph_captures = backwards_graph.external_captures

captures_from_forward = [

c for c in backwards_graph_captures if

not isinstance(c, ops.EagerTensor) and c.graph is self._func_graph]

forward_function_name = _forward_name(self._func_graph.name)

# NB: forward and backward function have their "_implements"

# attribute set to None if it was present. This is because we don't

# support replacing those functions. If we do want for those functions

# to have implements function we need to provide a mechanism that

# would allow to identify all functions that call this one

# and trace and update their signatures as well. At the moment

# we disable this, until the tooling for doing this becomes available.

# See:

# https://github.com/tensorflow/community/blob/master/rfcs/20190610-standardizing-composite_ops.md#appendix-future-support-for-optimizing-gradient-functions

common_attributes = dict(self._attrs)

common_attributes.pop(IMPLEMENTS_ATTRIBUTE_NAME, None)

existing_outputs = object_identity.ObjectIdentitySet(

self._func_graph.outputs)

for capture in captures_from_forward:

if capture not in existing_outputs:

existing_outputs.add(capture)

self._func_graph.outputs.append(capture)

backward_function_attr = _parse_func_attrs(

{FORWARD_FUNCTION_ATTRIBUTE_NAME: forward_function_name})

backward_function_attr.update(common_attributes)

backward_function = ConcreteFunction(

backwards_graph, attrs=backward_function_attr)

forward_function_attr = _parse_func_attrs({

BACKWARD_FUNCTION_ATTRIBUTE_NAME:

backward_function.name})

forward_function_attr.update(common_attributes)

forward_function = _EagerDefinedFunction(

forward_function_name, self._func_graph, self._func_graph.inputs,

self._func_graph.outputs, forward_function_attr)

return forward_function, backward_function

def _rewrite_forward_and_call_backward(self, op, *doutputs):

"""Add outputs to the forward call and feed them to the grad function."""

forward_function, backwards_function = self.forward_backward(len(doutputs))

if not backwards_function.outputs:

return backwards_function.structured_outputs

forward_function.add_to_graph(op.graph)

# pylint: disable=protected-access

# Rewrite an inference call op to be a forward call op

op._set_func_attr("f", forward_function.name)

op._set_type_list_attr("Tout", forward_function._output_types)

op._add_outputs(

forward_function._output_types[len(op.outputs):],

forward_function._output_shapes[len(op.outputs):])

for i in range(len(op.outputs)):

func_graph_output = forward_function._func_graph_outputs[i]

custom_gradient.copy_handle_data(func_graph_output, op.outputs[i])

# pylint: enable=protected-access

capture_mapping = dict(

zip((ops.tensor_id(t) for t in self._func_graph.outputs), op.outputs))

remapped_captures = [

capture_mapping.get(ops.tensor_id(capture), capture)

for capture in backwards_function.captured_inputs

]

# Replace Nones with zeros since we're calling a graph function which

# expects numeric inputs.

cleaned_doutputs = []

for doutput, placeholder in zip(doutputs, self._func_graph.outputs):

if backprop_util.IsTrainable(placeholder):

if isinstance(doutput, ops.IndexedSlices):

# Gradient passed to a backward ConcreteFunction must be tf.Tensor,

# so we convert tf.IndexedSlices to tf.Tensor.

cleaned_doutputs.append(ops.convert_to_tensor(doutput))

elif doutput is not None:

cleaned_doutputs.append(doutput)

else:

cleaned_doutputs.append(default_gradient.zeros_like(placeholder))

# Compute the gradients using the side outputs

return backwards_function._call_flat( # pylint: disable=protected-access

cleaned_doutputs, remapped_captures)

def get_gradient_function(self):

"""Returns gradient function.

The gradient rewrites an inference call op to a forward call op, but does

not modify a pre-existing forward call op. It then computes the gradient

from the output's gradients and the side outputs of the forward op.

"""

return self._rewrite_forward_and_call_backward

def forward(self, inference_args=None, input_tangents=None):

"""A forward function with only user-specified outputs.

The call operation for the returned inference function can be rewritten into

a forward function. This only happens if the backward function (from the

`backward` method) ends up being used to compute gradients.

This approach avoids constructing unnecessary graphs, but it only works if

we are calling this function when not executing eagerly.

Args:

inference_args: A flat list of Tensors, arguments to the inference

function. Unused, but taken for compatibility with

_TapeGradientFunctions.

input_tangents: A flat list of Tensors, jvps associated with

`inference_args`. Unused; if required, tape functions must be used

instead.

Returns:

An _EagerDefinedFunction.

"""

del inference_args # unused

if input_tangents:

# This class does not support special-cased forwardprop. The arguments are

# here for compatibility with _TapeGradientFunctions.

raise AssertionError(

"Internal error: unexpectedly got forwardprop information in a class "

"that does not support forwardprop.")

return self._inference_function

def _backward(self, outputs):

"""Fetch a backward function for `outputs` from the forward function."""

def _backward_function(*args):

call_op = outputs[0].op

return self._rewrite_forward_and_call_backward(call_op, *args)

return _backward_function, outputs

def record(self, flat_outputs, inference_args, input_tangents):

"""Record the function call operation.

_DelayedRewriteGradientFunctions supports only first-order backprop tape

gradients (and then only when graph building). It does not work with

higher-order tape gradients or forward autodiff, but does work with

higher-order symbolic gradients (tf.gradients).

Args:

flat_outputs: The result of running `forward`.

inference_args: A flat list of Tensors with inference inputs to the

operation.

input_tangents: A flat list of Tensors with input tangents consumed by the

operation.

"""

backward_function, to_record = self._backward(flat_outputs)

tape.record_operation(self._inference_function.signature.name,

to_record, inference_args + input_tangents,

backward_function)

# Contains information about a forward function wrapped to compute jvps.

_ForwardWrapper = collections.namedtuple(

"_ForwardWrapper", (

# The wrapper Graph.

"graph",

# A flat list of non-tangent Tensor outputs from the wrapped forward

# function.

"outputs",

# Indices for output tangents, same format as

# forwardprop_util.pack_tangents.

"output_indices",

# A flat list of tangents for `outputs`.

"output_tangents"))

class _TapeGradientFunctions(object):

"""Caches forward and backward functions compatible with eager gradients.

In contrast to the delayed-rewrite approach in

`_DelayedRewriteGradientFunctions` which only works with delayed execution,

the forward function generated by this class has a fixed set of outputs which

may be preserved by a tape in order to compute gradients later.

This class is abstract; its child classes differ in how many side outputs of

the forward function their backward function accepts gradients for, which

determines whether higher-order tape gradients are possible.

"""

def __init__(self, func_graph, attrs, func_graph_deleter,

forwardprop_input_indices, delayed_rewrite_functions,

need_gradients_for_jvps):

self._func_graph = func_graph

self._forward_graph = None

self._attrs = attrs

self._forward = None

self._backward = None

self._num_outputs = len(func_graph.outputs)

self._func_graph_deleter = func_graph_deleter

self._forwardprop_input_indices = forwardprop_input_indices

self._forwardprop_output_indices = None

self._num_forwardprop_outputs = 0

self._num_inference_outputs = len(func_graph.outputs)

self._num_trainable_inference_outputs = len(

[t for t in func_graph.outputs if backprop_util.IsTrainable(t)])

self._delayed_rewrite_functions = delayed_rewrite_functions

self._need_gradients_for_jvps = need_gradients_for_jvps

def _build_functions_for_outputs(

self, outputs, inference_args, input_tangents):

"""Forward+backward functions where the backward function sees `outputs`."""

# First figure out which of `outputs` are trainable. We'll accept gradients

# for each of these in the backward function.

handles_to_variables = self._func_graph.variable_captures

trainable_outputs = []

trainable_indices = []

for index, output in enumerate(outputs):

if backprop_util.IsTrainable(output):

# Swap in the Variable object for resource handles if we can so

# sparse gradients work.

output = handles_to_variables.get(id(output), output)

trainable_outputs.append(output)

trainable_indices.append(index)

backwards_graph = func_graph_module.FuncGraph(

_backward_name(self._func_graph.name))

with backwards_graph.as_default():

gradients_wrt_outputs = []

for output in trainable_outputs:

gradient_shape, gradient_dtype = default_gradient.shape_and_dtype(

output)

gradients_wrt_outputs.append(

graph_placeholder(gradient_dtype, gradient_shape))

with ops.device(None):

gradients_wrt_inputs = gradients_util._GradientsHelper( # pylint: disable=protected-access

trainable_outputs,

self._func_graph.inputs,

grad_ys=gradients_wrt_outputs,

src_graph=self._func_graph)

captures_from_forward = [

c for c in backwards_graph.external_captures

if not isinstance(c, ops.EagerTensor) and c.graph is self._func_graph

]

existing_outputs = object_identity.ObjectIdentitySet(

self._func_graph.outputs)

for capture in captures_from_forward:

if capture not in existing_outputs:

existing_outputs.add(capture)

self._func_graph.outputs.append(capture)

forward_function_name = _forward_name(self._func_graph.name)

backward_function_attr = _parse_func_attrs(

{FORWARD_FUNCTION_ATTRIBUTE_NAME: forward_function_name})

backward_function_attr.update(self._attrs)

# The ordering of `backwards_graph.inputs` is important: inputs of

# `backward_function` correspond to outputs (including

# side outputs) of `self._tape_forward_function`.

backwards_graph.inputs = (

gradients_wrt_outputs + backwards_graph.internal_captures)

backwards_graph.outputs.extend(

grad

for grad in nest.flatten(gradients_wrt_inputs, expand_composites=True)

if grad is not None)

backwards_graph.structured_outputs = gradients_wrt_inputs

backward_function = ConcreteFunction(

backwards_graph, attrs=backward_function_attr)

forward_function_attr = _parse_func_attrs({

BACKWARD_FUNCTION_ATTRIBUTE_NAME:

backward_function.name})

forward_function_attr.update(self._attrs)

forward_function = _EagerDefinedFunction(

forward_function_name, self._func_graph, self._func_graph.inputs,

self._func_graph.outputs,

forward_function_attr)

if not input_tangents:

# There is no need to special-case forwardprop, so we can return the

# forward+backward pair we've created without further wrapping.

return (forward_function, self._func_graph, backward_function,

# No forwardprop outputs.

None, 0)

forward_wrapper = self._wrap_forward_function_with_jvps(

forward_function, backward_function, inference_args, input_tangents)

(wrapped_backwards_graph,

forward_wrapper) = self._wrap_backward_function_with_jvp_backprop(

backward_function, gradients_wrt_outputs, forward_wrapper)

# Now that we've added new captures, we need to make sure forward outputs

# are in the same order the backward function expects them to be in:

# [inference outputs] + [jvps] + [side outputs] + [captures].

forward_wrapper = self._shuffle_forward_outputs(forward_wrapper)

wrapped_forward_function = _EagerDefinedFunction(

_forward_name(self._func_graph.name), forward_wrapper.graph,

forward_wrapper.graph.inputs, forward_wrapper.graph.outputs,

forward_function_attr)

wrapped_backward_function = ConcreteFunction(

wrapped_backwards_graph, attrs=backward_function_attr)

if (len(inference_args) + len(input_tangents)

!= len(forward_wrapper.graph.inputs)):

raise AssertionError(

("Internal error: the forward graph had {} inputs, but we expected"

" {} ({} inference inputs and {} input tangents)")

.format(len(len(forward_wrapper.graph.inputs)),

len(inference_args) + len(input_tangents),

len(inference_args), len(input_tangents)))

return (wrapped_forward_function, forward_wrapper.graph,

wrapped_backward_function, forward_wrapper.output_indices,

len(forward_wrapper.output_tangents))

def _wrap_forward_function_with_jvps(

self, forward_function, backward_function,

inference_args, input_tangents):

"""Adds inline JVP computation to a forward function."""

forward_wrapper_graph = func_graph_module.FuncGraph(

_forward_name(self._func_graph.name))

with forward_wrapper_graph.as_default():

# Tell forward accumulators to free up space for new JVP computations,

# since one may be in the process of computing a JVP (if that computation

# triggered this function building).

# We'll make symbolic versions of input JVPs, run the forward function

# under forward accumulators to get symbolic output JVPs, then set those

# as outputs of the new wrapped forward function.

with forwardprop_util.push_forwardprop_state():

forward_captures = {

ops.tensor_id(internal): external

for external, internal in self._func_graph.captures}

for input_index, real_input in enumerate(self._func_graph.inputs):

# This loop is more or less equivalent to running tf.identity on each

# of self._func_graph.inputs. However, doing that also captures jvps

# for resource handles, which confuses the jvp capturing code below

# (since primal inputs are interwoven with jvp inputs).

input_placeholder = array_ops.placeholder(

dtype=real_input.dtype,

shape=real_input.shape)

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

function.py

Latest commit

History

function.py

File metadata and controls