AliasStrings.java
/*
* Copyright 2007 The Closure Compiler Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.javascript.jscomp;
import com.google.javascript.rhino.IR;
import com.google.javascript.rhino.Node;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A compiler pass for aliasing strings. String declarations
* contribute to garbage collection, which becomes a problem in large
* applications. Strings that should be aliased occur many times in the code,
* or occur on codepaths that get executed frequently.
*
* 2017/09/17 Notes:
* - Turning on this pass usually hurts code size after gzip.
* - It was originally written to deal with performance problems on some
* older browser VMs.
* - However, projects that make heavy use of jslayout may need to enable
* this pass even for modern browsers, because jslayout generates so many
* duplicate strings.
*
*/
class AliasStrings implements CompilerPass, NodeTraversal.Callback {
private static final Logger logger =
Logger.getLogger(AliasStrings.class.getName());
/** Prefix for variable names for the aliased strings */
private static final String STRING_ALIAS_PREFIX = "$$S_";
private final AbstractCompiler compiler;
private final JSModuleGraph moduleGraph;
// Regular expression matcher for a blacklisting strings in aliasing.
private Matcher blacklist = null;
/**
* Strings that can be aliased, or null if all strings except 'undefined'
* should be aliased
*/
private final Set<String> aliasableStrings;
private final boolean outputStringUsage;
private final SortedMap<String, StringInfo> stringInfoMap = new TreeMap<>();
private final Set<String> usedHashedAliases = new LinkedHashSet<>();
/**
* Map from module to the node in that module that should parent any string
* variable declarations that have to be moved into that module
*/
private final Map<JSModule, Node> moduleVarParentMap =
new HashMap<>();
/** package private. This value is AND-ed with the hash function to allow
* unit tests to reduce the range of hash values to test collision cases */
int unitTestHashReductionMask = ~0;
/**
* Creates an instance.
*
* @param compiler The compiler
* @param moduleGraph The module graph, or null if there are no modules
* @param strings Set of strings to be aliased. If null, all strings except
* 'undefined' will be aliased.
* @param blacklistRegex The regex to blacklist words in aliasing strings.
* @param outputStringUsage Outputs all strings and the number of times they
* were used in the application to the server log.
*/
AliasStrings(AbstractCompiler compiler,
JSModuleGraph moduleGraph,
Set<String> strings,
String blacklistRegex,
boolean outputStringUsage) {
this.compiler = compiler;
this.moduleGraph = moduleGraph;
this.aliasableStrings = strings;
if (blacklistRegex.length() != 0) {
this.blacklist = Pattern.compile(blacklistRegex).matcher("");
} else {
this.blacklist = null;
}
this.outputStringUsage = outputStringUsage;
}
@Override
public void process(Node externs, Node root) {
logger.fine("Aliasing common strings");
// Traverse the tree and collect strings
NodeTraversal.traverseEs6(compiler, root, this);
// 1st edit pass: replace some strings with aliases
replaceStringsWithAliases();
// 2nd edit pass: add variable declarations for aliased strings.
addAliasDeclarationNodes();
if (outputStringUsage) {
outputStringUsage();
}
}
@Override
public boolean shouldTraverse(NodeTraversal nodeTraversal, Node n, Node parent) {
switch (n.getToken()) {
case TEMPLATELIT:
case TAGGED_TEMPLATELIT:
case TEMPLATELIT_SUB: // technically redundant, since it must be a child of the others
// TODO(bradfordcsmith): Consider replacing long and/or frequently occurring substrings
// within template literals with template substitutions.
return false;
default:
return true;
}
}
@Override
public void visit(NodeTraversal t, Node n, Node parent) {
if (n.isString() && !parent.isGetProp() && !parent.isRegExp()) {
String str = n.getString();
// "undefined" is special-cased, since it needs to be used when JS code
// is unloading and therefore variable references aren't available.
// This is because of a bug in Firefox.
if ("undefined".equals(str)) {
return;
}
if (blacklist != null && blacklist.reset(str).find()) {
return;
}
if (aliasableStrings == null || aliasableStrings.contains(str)) {
StringOccurrence occurrence = new StringOccurrence(n, parent);
StringInfo info = getOrCreateStringInfo(str);
info.occurrences.add(occurrence);
info.numOccurrences++;
// The current module.
JSModule module = t.getModule();
if (info.numOccurrences != 1) {
// Check whether the current module depends on the module containing
// the declaration.
if (module != null
&& info.moduleToContainDecl != null
&& module != info.moduleToContainDecl) {
// We need to declare this string in the deepest module in the
// module dependency graph that both of these modules depend on.
module =
moduleGraph.getDeepestCommonDependencyInclusive(module, info.moduleToContainDecl);
} else {
// use the previously saved insertion location.
return;
}
}
Node varParent = moduleVarParentMap.get(module);
if (varParent == null) {
varParent = compiler.getNodeForCodeInsertion(module);
moduleVarParentMap.put(module, varParent);
}
info.moduleToContainDecl = module;
info.parentForNewVarDecl = varParent;
info.siblingToInsertVarDeclBefore = varParent.getFirstChild();
}
}
}
/**
* Looks up the {@link StringInfo} object for a JavaScript string. Creates
* it if necessary.
*/
private StringInfo getOrCreateStringInfo(String string) {
StringInfo info = stringInfoMap.get(string);
if (info == null) {
info = new StringInfo(stringInfoMap.size());
stringInfoMap.put(string, info);
}
return info;
}
/**
* Replace strings with references to alias variables.
*/
private void replaceStringsWithAliases() {
for (Entry<String, StringInfo> entry : stringInfoMap.entrySet()) {
String literal = entry.getKey();
StringInfo info = entry.getValue();
if (shouldReplaceWithAlias(literal, info)) {
for (StringOccurrence occurrence : info.occurrences) {
replaceStringWithAliasName(
occurrence, info.getVariableName(literal), info);
}
}
}
}
/**
* Creates a var declaration for each aliased string. Var declarations are
* inserted as close to the first use of the string as possible.
*/
private void addAliasDeclarationNodes() {
for (Entry<String, StringInfo> entry : stringInfoMap.entrySet()) {
StringInfo info = entry.getValue();
if (!info.isAliased) {
continue;
}
String alias = info.getVariableName(entry.getKey());
Node var = IR.var(IR.name(alias), IR.string(entry.getKey()));
var.useSourceInfoFromForTree(info.parentForNewVarDecl);
if (info.siblingToInsertVarDeclBefore == null) {
info.parentForNewVarDecl.addChildToFront(var);
} else {
info.parentForNewVarDecl.addChildBefore(
var, info.siblingToInsertVarDeclBefore);
}
compiler.reportChangeToEnclosingScope(var);
}
}
/**
* Dictates the policy for replacing a string with an alias.
*
* @param str The string literal
* @param info Accumulated information about a string
*/
private static boolean shouldReplaceWithAlias(String str, StringInfo info) {
// Optimize for code size. Are aliases smaller than strings?
//
// This logic optimizes for the size of uncompressed code, but it tends to
// get good results for the size of the gzipped code too.
//
// gzip actually prefers that strings are not aliased - it compresses N
// string literals better than 1 string literal and N+1 short variable
// names, provided each string is within 32k of the previous copy. We
// follow the uncompressed logic as insurance against there being multiple
// strings more than 32k apart.
int sizeOfLiteral = 2 + str.length();
int sizeOfStrings = info.numOccurrences * sizeOfLiteral;
int sizeOfVariable = 3;
// '6' comes from: 'var =;' in var XXX="...";
int sizeOfAliases = 6 + sizeOfVariable + sizeOfLiteral // declaration
+ info.numOccurrences * sizeOfVariable; // + uses
return sizeOfAliases < sizeOfStrings;
}
/**
* Replaces a string literal with a reference to the string's alias variable.
*/
private void replaceStringWithAliasName(StringOccurrence occurrence,
String name,
StringInfo info) {
Node nameNode = IR.name(name);
occurrence.parent.replaceChild(occurrence.node,
nameNode);
info.isAliased = true;
compiler.reportChangeToEnclosingScope(nameNode);
}
/**
* Outputs a log of all strings used more than once in the code.
*/
private void outputStringUsage() {
StringBuilder sb = new StringBuilder("Strings used more than once:\n");
for (Entry<String, StringInfo> stringInfoEntry : stringInfoMap.entrySet()) {
StringInfo info = stringInfoEntry.getValue();
if (info.numOccurrences > 1) {
sb.append(info.numOccurrences);
sb.append(": ");
sb.append(stringInfoEntry.getKey());
sb.append('\n');
}
}
// TODO(user): Make this save to file OR output to the application
logger.fine(sb.toString());
}
// -------------------------------------------------------------------------
/**
* A class that holds the location of a single JavaScript string literal
*/
private static final class StringOccurrence {
final Node node;
final Node parent;
StringOccurrence(Node node, Node parent) {
this.node = node;
this.parent = parent;
}
}
/**
* A class that holds information about a JavaScript string that might become
* aliased.
*/
private final class StringInfo {
final int id;
boolean isAliased; // set to 'true' when reference to alias created
final List<StringOccurrence> occurrences;
int numOccurrences;
JSModule moduleToContainDecl;
Node parentForNewVarDecl;
Node siblingToInsertVarDeclBefore;
String aliasName;
StringInfo(int id) {
this.id = id;
this.occurrences = new ArrayList<>();
this.isAliased = false;
}
/** Returns the JS variable name to be substituted for this string. */
String getVariableName(String stringLiteral) {
if (aliasName == null) {
aliasName =
encodeStringAsIdentifier(STRING_ALIAS_PREFIX, stringLiteral);
}
return aliasName;
}
/**
* Returns a legal identifier that uniquely characterizes string 's'.
*
* We want the identifier to be a function of the string value because that
* makes the identifiers stable as the program is changed.
*
* The digits of a good hash function would be adequate, but for short
* strings the following algorithm is easier to work with for unit tests.
*
* ASCII alphanumerics are mapped to themselves. Other characters are
* mapped to $XXX or $XXX_ where XXX is a variable number of hex digits.
* The underscore is inserted as necessary to avoid ambiguity when the
* character following is a hex digit. E.g. '\n1' maps to '$a_1',
* distinguished by the underscore from '\u00A1' which maps to '$a1'.
*
* If the string is short enough, this is sufficient. Longer strings are
* truncated after encoding an initial prefix and appended with a hash
* value.
*/
String encodeStringAsIdentifier(String prefix, String s) {
// Limit to avoid generating very long identifiers
final int maxLimit = 20;
final int length = s.length();
final int limit = Math.min(length, maxLimit);
StringBuilder sb = new StringBuilder();
sb.append(prefix);
boolean protectHex = false;
for (int i = 0; i < limit; i++) {
char ch = s.charAt(i);
if (protectHex) {
if ((ch >= '0' && ch <= '9') ||
(ch >= 'a' && ch <= 'f')) { // toHexString generate lowercase
sb.append('_');
}
protectHex = false;
}
if ((ch >= '0' && ch <= '9') ||
(ch >= 'A' && ch <= 'Z') ||
(ch >= 'a' && ch <= 'z')) {
sb.append(ch);
} else {
sb.append('$');
sb.append(Integer.toHexString(ch));
protectHex = true;
}
}
if (length == limit) {
return sb.toString();
}
// The identifier is not unique because we omitted part, so add a
// checksum as a hashcode.
int hash = s.hashCode() & unitTestHashReductionMask;
sb.append('_');
sb.append(Integer.toHexString(hash));
String encoded = sb.toString();
if (!usedHashedAliases.add(encoded)) {
// A collision has been detected (which is very rare). Use the sequence
// id to break the tie. This means that the name is no longer invariant
// across source code changes and recompilations.
encoded += "_" + id;
}
return encoded;
}
}
}