README.md 17 KB

AST Types

This module provides an efficient, modular, Esprima-compatible implementation of the abstract syntax tree type hierarchy pioneered by the Mozilla Parser API.

Build Status

Installation

From NPM:

npm install ast-types

From GitHub:

cd path/to/node_modules
git clone git://github.com/benjamn/ast-types.git
cd ast-types
npm install .

Basic Usage

var assert = require("assert");
var n = require("ast-types").namedTypes;
var b = require("ast-types").builders;

var fooId = b.identifier("foo");
var ifFoo = b.ifStatement(fooId, b.blockStatement([
    b.expressionStatement(b.callExpression(fooId, []))
]));

assert.ok(n.IfStatement.check(ifFoo));
assert.ok(n.Statement.check(ifFoo));
assert.ok(n.Node.check(ifFoo));

assert.ok(n.BlockStatement.check(ifFoo.consequent));
assert.strictEqual(
    ifFoo.consequent.body[0].expression.arguments.length,
    0);

assert.strictEqual(ifFoo.test, fooId);
assert.ok(n.Expression.check(ifFoo.test));
assert.ok(n.Identifier.check(ifFoo.test));
assert.ok(!n.Statement.check(ifFoo.test));

AST Traversal

Because it understands the AST type system so thoroughly, this library is able to provide excellent node iteration and traversal mechanisms.

If you want complete control over the traversal, and all you need is a way of enumerating the known fields of your AST nodes and getting their values, you may be interested in the primitives getFieldNames and getFieldValue:

var types = require("ast-types");
var partialFunExpr = { type: "FunctionExpression" };

// Even though partialFunExpr doesn't actually contain all the fields that
// are expected for a FunctionExpression, types.getFieldNames knows:
console.log(types.getFieldNames(partialFunExpr));
// [ 'type', 'id', 'params', 'body', 'generator', 'expression',
//   'defaults', 'rest', 'async' ]

// For fields that have default values, types.getFieldValue will return
// the default if the field is not actually defined.
console.log(types.getFieldValue(partialFunExpr, "generator"));
// false

Two more low-level helper functions, eachField and someField, are defined in terms of getFieldNames and getFieldValue:

// Iterate over all defined fields of an object, including those missing
// or undefined, passing each field name and effective value (as returned
// by getFieldValue) to the callback. If the object has no corresponding
// Def, the callback will never be called.
exports.eachField = function(object, callback, context) {
    getFieldNames(object).forEach(function(name) {
        callback.call(this, name, getFieldValue(object, name));
    }, context);
};

// Similar to eachField, except that iteration stops as soon as the
// callback returns a truthy value. Like Array.prototype.some, the final
// result is either true or false to indicates whether the callback
// returned true for any element or not.
exports.someField = function(object, callback, context) {
    return getFieldNames(object).some(function(name) {
        return callback.call(this, name, getFieldValue(object, name));
    }, context);
};

So here's how you might make a copy of an AST node:

var copy = {};
require("ast-types").eachField(node, function(name, value) {
    // Note that undefined fields will be visited too, according to
    // the rules associated with node.type, and default field values
    // will be substituted if appropriate.
    copy[name] = value;
})

But that's not all! You can also easily visit entire syntax trees using the powerful types.visit abstraction.

Here's a trivial example of how you might assert that arguments.callee is never used in ast:

var assert = require("assert");
var types = require("ast-types");
var n = types.namedTypes;

types.visit(ast, {
    // This method will be called for any node with .type "MemberExpression":
    visitMemberExpression: function(path) {
        // Visitor methods receive a single argument, a NodePath object
        // wrapping the node of interest.
        var node = path.node;

        if (n.Identifier.check(node.object) &&
            node.object.name === "arguments" &&
            n.Identifier.check(node.property)) {
            assert.notStrictEqual(node.property.name, "callee");
        }

        // It's your responsibility to call this.traverse with some
        // NodePath object (usually the one passed into the visitor
        // method) before the visitor method returns, or return false to
        // indicate that the traversal need not continue any further down
        // this subtree.
        this.traverse(path);
    }
});

Here's a slightly more involved example of transforming ...rest parameters into browser-runnable ES5 JavaScript:

var b = types.builders;

// Reuse the same AST structure for Array.prototype.slice.call.
var sliceExpr = b.memberExpression(
    b.memberExpression(
        b.memberExpression(
            b.identifier("Array"),
            b.identifier("prototype"),
            false
        ),
        b.identifier("slice"),
        false
    ),
    b.identifier("call"),
    false
);

types.visit(ast, {
    // This method will be called for any node whose type is a subtype of
    // Function (e.g., FunctionDeclaration, FunctionExpression, and
    // ArrowFunctionExpression). Note that types.visit precomputes a
    // lookup table from every known type to the appropriate visitor
    // method to call for nodes of that type, so the dispatch takes
    // constant time.
    visitFunction: function(path) {
        // Visitor methods receive a single argument, a NodePath object
        // wrapping the node of interest.
        var node = path.node;

        // It's your responsibility to call this.traverse with some
        // NodePath object (usually the one passed into the visitor
        // method) before the visitor method returns, or return false to
        // indicate that the traversal need not continue any further down
        // this subtree. An assertion will fail if you forget, which is
        // awesome, because it means you will never again make the
        // disastrous mistake of forgetting to traverse a subtree. Also
        // cool: because you can call this method at any point in the
        // visitor method, it's up to you whether your traversal is
        // pre-order, post-order, or both!
        this.traverse(path);

        // This traversal is only concerned with Function nodes that have
        // rest parameters.
        if (!node.rest) {
            return;
        }

        // For the purposes of this example, we won't worry about functions
        // with Expression bodies.
        n.BlockStatement.assert(node.body);

        // Use types.builders to build a variable declaration of the form
        //
        //   var rest = Array.prototype.slice.call(arguments, n);
        //
        // where `rest` is the name of the rest parameter, and `n` is a
        // numeric literal specifying the number of named parameters the
        // function takes.
        var restVarDecl = b.variableDeclaration("var", [
            b.variableDeclarator(
                node.rest,
                b.callExpression(sliceExpr, [
                    b.identifier("arguments"),
                    b.literal(node.params.length)
                ])
            )
        ]);

        // Similar to doing node.body.body.unshift(restVarDecl), except
        // that the other NodePath objects wrapping body statements will
        // have their indexes updated to accommodate the new statement.
        path.get("body", "body").unshift(restVarDecl);

        // Nullify node.rest now that we have simulated the behavior of
        // the rest parameter using ordinary JavaScript.
        path.get("rest").replace(null);

        // There's nothing wrong with doing node.rest = null, but I wanted
        // to point out that the above statement has the same effect.
        assert.strictEqual(node.rest, null);
    }
});

Here's how you might use types.visit to implement a function that determines if a given function node refers to this:

function usesThis(funcNode) {
    n.Function.assert(funcNode);
    var result = false;

    types.visit(funcNode, {
        visitThisExpression: function(path) {
            result = true;

            // The quickest way to terminate the traversal is to call
            // this.abort(), which throws a special exception (instanceof
            // this.AbortRequest) that will be caught in the top-level
            // types.visit method, so you don't have to worry about
            // catching the exception yourself.
            this.abort();
        },

        visitFunction: function(path) {
            // ThisExpression nodes in nested scopes don't count as `this`
            // references for the original function node, so we can safely
            // avoid traversing this subtree.
            return false;
        },

        visitCallExpression: function(path) {
            var node = path.node;

            // If the function contains CallExpression nodes involving
            // super, those expressions will implicitly depend on the
            // value of `this`, even though they do not explicitly contain
            // any ThisExpression nodes.
            if (this.isSuperCallExpression(node)) {
                result = true;
                this.abort(); // Throws AbortRequest exception.
            }

            this.traverse(path);
        },

        // Yes, you can define arbitrary helper methods.
        isSuperCallExpression: function(callExpr) {
            n.CallExpression.assert(callExpr);
            return this.isSuperIdentifier(callExpr.callee)
                || this.isSuperMemberExpression(callExpr.callee);
        },

        // And even helper helper methods!
        isSuperIdentifier: function(node) {
            return n.Identifier.check(node.callee)
                && node.callee.name === "super";
        },

        isSuperMemberExpression: function(node) {
            return n.MemberExpression.check(node.callee)
                && n.Identifier.check(node.callee.object)
                && node.callee.object.name === "super";
        }
    });

    return result;
}

As you might guess, when an AbortRequest is thrown from a subtree, the exception will propagate from the corresponding calls to this.traverse in the ancestor visitor methods. If you decide you want to cancel the request, simply catch the exception and call its .cancel() method. The rest of the subtree beneath the try-catch block will be abandoned, but the remaining siblings of the ancestor node will still be visited.

NodePath

The NodePath object passed to visitor methods is a wrapper around an AST node, and it serves to provide access to the chain of ancestor objects (all the way back to the root of the AST) and scope information.

In general, path.node refers to the wrapped node, path.parent.node refers to the nearest Node ancestor, path.parent.parent.node to the grandparent, and so on.

Note that path.node may not be a direct property value of path.parent.node; for instance, it might be the case that path.node is an element of an array that is a direct child of the parent node:

path.node === path.parent.node.elements[3]

in which case you should know that path.parentPath provides finer-grained access to the complete path of objects (not just the Node ones) from the root of the AST:

// In reality, path.parent is the grandparent of path:
path.parentPath.parentPath === path.parent

// The path.parentPath object wraps the elements array (note that we use
// .value because the elements array is not a Node):
path.parentPath.value === path.parent.node.elements

// The path.node object is the fourth element in that array:
path.parentPath.value[3] === path.node

// Unlike path.node and path.value, which are synonyms because path.node
// is a Node object, path.parentPath.node is distinct from
// path.parentPath.value, because the elements array is not a
// Node. Instead, path.parentPath.node refers to the closest ancestor
// Node, which happens to be the same as path.parent.node:
path.parentPath.node === path.parent.node

// The path is named for its index in the elements array:
path.name === 3

// Likewise, path.parentPath is named for the property by which
// path.parent.node refers to it:
path.parentPath.name === "elements"

// Putting it all together, we can follow the chain of object references
// from path.parent.node all the way to path.node by accessing each
// property by name:
path.parent.node[path.parentPath.name][path.name] === path.node

These NodePath objects are created during the traversal without modifying the AST nodes themselves, so it's not a problem if the same node appears more than once in the AST (like Array.prototype.slice.call in the example above), because it will be visited with a distict NodePath each time it appears.

Child NodePath objects are created lazily, by calling the .get method of a parent NodePath object:

// If a NodePath object for the elements array has never been created
// before, it will be created here and cached in the future:
path.get("elements").get(3).value === path.value.elements[3]

// Alternatively, you can pass multiple property names to .get instead of
// chaining multiple .get calls:
path.get("elements", 0).value === path.value.elements[0]

NodePath objects support a number of useful methods:

// Replace one node with another node:
var fifth = path.get("elements", 4);
fifth.replace(newNode);

// Now do some stuff that might rearrange the list, and this replacement
// remains safe:
fifth.replace(newerNode);

// Replace the third element in an array with two new nodes:
path.get("elements", 2).replace(
    b.identifier("foo"),
    b.thisExpression()
);

// Remove a node and its parent if it would leave a redundant AST node:
//e.g. var t = 1, y =2; removing the `t` and `y` declarators results in `var undefined`.
path.prune(); //returns the closest parent `NodePath`.

// Remove a node from a list of nodes:
path.get("elements", 3).replace();

// Add three new nodes to the beginning of a list of nodes:
path.get("elements").unshift(a, b, c);

// Remove and return the first node in a list of nodes:
path.get("elements").shift();

// Push two new nodes onto the end of a list of nodes:
path.get("elements").push(d, e);

// Remove and return the last node in a list of nodes:
path.get("elements").pop();

// Insert a new node before/after the seventh node in a list of nodes:
var seventh = path.get("elements", 6);
seventh.insertBefore(newNode);
seventh.insertAfter(newNode);

// Insert a new element at index 5 in a list of nodes:
path.get("elements").insertAt(5, newNode);

Scope

The object exposed as path.scope during AST traversals provides information about variable and function declarations in the scope that contains path.node. See scope.js for its public interface, which currently includes .isGlobal, .getGlobalScope(), .depth, .declares(name), .lookup(name), and .getBindings().

Custom AST Node Types

The ast-types module was designed to be extended. To that end, it provides a readable, declarative syntax for specifying new AST node types, based primarily upon the require("ast-types").Type.def function:

var types = require("ast-types");
var def = types.Type.def;
var string = types.builtInTypes.string;
var b = types.builders;

// Suppose you need a named File type to wrap your Programs.
def("File")
    .bases("Node")
    .build("name", "program")
    .field("name", string)
    .field("program", def("Program"));

// Prevent further modifications to the File type (and any other
// types newly introduced by def(...)).
types.finalize();

// The b.file builder function is now available. It expects two
// arguments, as named by .build("name", "program") above.
var main = b.file("main.js", b.program([
    // Pointless program contents included for extra color.
    b.functionDeclaration(b.identifier("succ"), [
        b.identifier("x")
    ], b.blockStatement([
        b.returnStatement(
            b.binaryExpression(
                "+", b.identifier("x"), b.literal(1)
            )
        )
    ]))
]));

assert.strictEqual(main.name, "main.js");
assert.strictEqual(main.program.body[0].params[0].name, "x");
// etc.

// If you pass the wrong type of arguments, or fail to pass enough
// arguments, an AssertionError will be thrown.

b.file(b.blockStatement([]));
// ==> AssertionError: {"body":[],"type":"BlockStatement","loc":null} does not match type string

b.file("lib/types.js", b.thisExpression());
// ==> AssertionError: {"type":"ThisExpression","loc":null} does not match type Program

The def syntax is used to define all the default AST node types found in core.js, e4x.js, es6.js, es7.js, flow.js, and jsx.js, so you have no shortage of examples to learn from.