JsMessage.java
/*
* Copyright 2006 The Closure Compiler Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.javascript.jscomp;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.base.Ascii;
import com.google.javascript.jscomp.parsing.parser.util.format.SimpleFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
/**
* A representation of a translatable message in JavaScript source code.
*
* <p>Instances are created using a {@link JsMessage.Builder},
* like this:
* <pre>
* JsMessage m = new JsMessage.Builder(key)
* .appendPart("Hi ")
* .appendPlaceholderReference("firstName")
* .appendPart("!")
* .setDesc("A welcome message")
* .build();
* </pre>
*
* @author anatol@google.com (Anatol Pomazau)
*/
public final class JsMessage {
/**
* Message style that could be used for JS code parsing.
* The enum order is from most relaxed to most restricted.
*/
public enum Style {
LEGACY, // All legacy code is completely OK
RELAX, // You allowed to use legacy code but it would be reported as warn
CLOSURE; // Any legacy code is prohibited
/**
* Calculates current messages {@link Style} based on the given arguments.
*
* @param useClosure if true then use closure style, otherwise not
* @param allowLegacyMessages if true then allow legacy messages otherwise
* not
* @return the message style based on the given arguments
*/
static Style getFromParams(boolean useClosure,
boolean allowLegacyMessages) {
if (useClosure) {
return allowLegacyMessages ? RELAX : CLOSURE;
} else {
return LEGACY;
}
}
}
private static final String MESSAGE_REPRESENTATION_FORMAT = "{$%s}";
private final String key;
private final String id;
private final List<CharSequence> parts;
private final Set<String> placeholders;
private final String desc;
private final boolean hidden;
private final String meaning;
private final String sourceName;
private final boolean isAnonymous;
private final boolean isExternal;
/**
* Creates an instance. Client code should use a {@link JsMessage.Builder}.
*
* @param key a key that should identify this message in sources; typically
* it is the message's name (e.g. {@code "MSG_HELLO"}).
* @param id an id that *uniquely* identifies the message in the bundle.
* It could be either the message name or id generated from the message
* content.
* @param meaning The user-specified meaning of the message. May be null if
* the user did not specify an explicit meaning.
*/
private JsMessage(String sourceName, String key,
boolean isAnonymous, boolean isExternal,
String id, List<CharSequence> parts, Set<String> placeholders,
String desc, boolean hidden, String meaning) {
checkState(key != null);
checkState(id != null);
this.key = key;
this.id = id;
this.parts = Collections.unmodifiableList(parts);
this.placeholders = Collections.unmodifiableSet(placeholders);
this.desc = desc;
this.hidden = hidden;
this.meaning = meaning;
this.sourceName = sourceName;
this.isAnonymous = isAnonymous;
this.isExternal = isExternal;
}
/**
* Gets the message's sourceName.
*/
public String getSourceName() {
return sourceName;
}
/**
* Gets the message's key, or name (e.g. {@code "MSG_HELLO"}).
*/
public String getKey() {
return key;
}
public boolean isAnonymous() {
return isAnonymous;
}
public boolean isExternal() {
return isExternal;
}
/**
* Gets the message's id, or name (e.g. {@code "92430284230902938293"}).
*/
public String getId() {
return id;
}
/**
* Gets the description associated with this message, intended to help
* translators, or null if this message has no description.
*/
public String getDesc() {
return desc;
}
/**
* Gets the meaning annotated to the message, intended to force different
* translations.
*/
public String getMeaning() {
return meaning;
}
/**
* Gets whether this message should be hidden from volunteer translators (to
* reduce the chances of a new feature leak).
*/
public boolean isHidden() {
return hidden;
}
/**
* Gets a read-only list of the parts of this message. Each part is either a
* {@link String} or a {@link PlaceholderReference}.
*/
public List<CharSequence> parts() {
return parts;
}
/** Gets a read-only set of the registered placeholders in this message. */
public Set<String> placeholders() {
return placeholders;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (CharSequence p : parts) {
sb.append(p.toString());
}
return sb.toString();
}
/** @return false iff the message is represented by empty string. */
public boolean isEmpty() {
for (CharSequence part : parts) {
if (part.length() > 0) {
return false;
}
}
return true;
}
@Override
public boolean equals(Object o) {
if (o == this) {
return true;
}
if (!(o instanceof JsMessage)) {
return false;
}
JsMessage m = (JsMessage) o;
return id.equals(m.id) &&
key.equals(m.key) &&
isAnonymous == m.isAnonymous &&
parts.equals(m.parts) &&
(meaning == null ? m.meaning == null : meaning.equals(m.meaning)) &&
placeholders.equals(m.placeholders) &&
(desc == null ? m.desc == null : desc.equals(m.desc)) &&
(sourceName == null
? m.sourceName == null
: sourceName.equals(m.sourceName)) &&
hidden == m.hidden;
}
@Override
public int hashCode() {
int hash = key.hashCode();
hash = 31 * hash + (isAnonymous ? 1 : 0);
hash = 31 * hash + id.hashCode();
hash = 31 * hash + parts.hashCode();
hash = 31 * hash + (desc != null ? desc.hashCode() : 0);
hash = 31 * hash + (hidden ? 1 : 0);
hash = 31 * hash + (sourceName != null ? sourceName.hashCode() : 0);
return hash;
}
/** A reference to a placeholder in a translatable message. */
public static class PlaceholderReference implements CharSequence {
private final String name;
PlaceholderReference(String name) {
this.name = name;
}
@Override
public int length() {
return name.length();
}
@Override
public char charAt(int index) {
return name.charAt(index);
}
@Override
public CharSequence subSequence(int start, int end) {
return name.subSequence(start, end);
}
public String getName() {
return name;
}
@Override
public String toString() {
return SimpleFormat.format(MESSAGE_REPRESENTATION_FORMAT, name);
}
@Override
public boolean equals(Object o) {
return o == this
|| (o instanceof PlaceholderReference && name.equals(((PlaceholderReference) o).name));
}
@Override
public int hashCode() {
return 31 * name.hashCode();
}
}
/**
* Contains functionality for creating JS messages. Generates authoritative
* keys and fingerprints for a message that must stay constant over time.
*
* This implementation correctly processes unnamed messages and creates a key
* for them that looks like {@code MSG_<fingerprint value>};.
*/
@GwtIncompatible("java.util.regex")
public static class Builder {
// Allow arbitrary suffixes to allow for local variable disambiguation.
private static final Pattern MSG_EXTERNAL_PATTERN = Pattern.compile("MSG_EXTERNAL_(\\d+).*");
/**
* @return an external message id or null if this is not an
* external message identifier
*/
private static String getExternalMessageId(String identifier) {
Matcher m = MSG_EXTERNAL_PATTERN.matcher(identifier);
return m.matches() ? m.group(1) : null;
}
private String key;
private String meaning;
private String desc;
private boolean hidden;
private final List<CharSequence> parts = new ArrayList<>();
private final Set<String> placeholders = new HashSet<>();
private String sourceName;
public Builder() {
this(null);
}
/** Creates an instance. */
public Builder(String key) {
this.key = key;
}
/** Gets the message's key (e.g. {@code "MSG_HELLO"}). */
public String getKey() {
return key;
}
/**
* @param key a key that should uniquely identify this message; typically
* it is the message's name (e.g. {@code "MSG_HELLO"}).
*/
public Builder setKey(String key) {
this.key = key;
return this;
}
/**
* @param sourceName The message's sourceName.
*/
public Builder setSourceName(String sourceName) {
this.sourceName = sourceName;
return this;
}
/**
* Appends a placeholder reference to the message
*/
public Builder appendPlaceholderReference(String name) {
checkNotNull(name, "Placeholder name could not be null");
parts.add(new PlaceholderReference(name));
placeholders.add(name);
return this;
}
/** Appends a translatable string literal to the message. */
public Builder appendStringPart(String part) {
checkNotNull(part, "String part of the message could not be null");
parts.add(part);
return this;
}
/** Returns the message registered placeholders */
public Set<String> getPlaceholders() {
return placeholders;
}
/** Sets the description of the message, which helps translators. */
public Builder setDesc(String desc) {
this.desc = desc;
return this;
}
/**
* Sets the programmer-specified meaning of this message, which
* forces this message to translate differently.
*/
public Builder setMeaning(String meaning) {
this.meaning = meaning;
return this;
}
/** Sets whether the message should be hidden from volunteer translators. */
public Builder setIsHidden(boolean hidden) {
this.hidden = hidden;
return this;
}
/** Gets whether at least one part has been appended. */
public boolean hasParts() {
return !parts.isEmpty();
}
public List<CharSequence> getParts() {
return parts;
}
public JsMessage build() {
return build(null);
}
public JsMessage build(IdGenerator idGenerator) {
boolean isAnonymous = false;
boolean isExternal = false;
String id = null;
if (getKey() == null) {
// Before constructing a message we need to change unnamed messages name
// to the unique one.
key = JsMessageVisitor.MSG_PREFIX + fingerprint(getParts());
isAnonymous = true;
}
if (!isAnonymous) {
String externalId = getExternalMessageId(key);
if (externalId != null) {
isExternal = true;
id = externalId;
}
}
if (!isExternal) {
String defactoMeaning = meaning != null ? meaning : key;
id = idGenerator == null ? defactoMeaning :
idGenerator.generateId(defactoMeaning, parts);
}
return new JsMessage(sourceName, key, isAnonymous, isExternal, id, parts,
placeholders, desc, hidden, meaning);
}
/**
* Generates a compact uppercase alphanumeric text representation of a
* 63-bit fingerprint of the content parts of a message.
*/
private static String fingerprint(List<CharSequence> messageParts) {
StringBuilder sb = new StringBuilder();
for (CharSequence part : messageParts) {
if (part instanceof JsMessage.PlaceholderReference) {
sb.append(part.toString());
} else {
sb.append(part);
}
}
long nonnegativeHash = Long.MAX_VALUE & Hash.hash64(sb.toString());
return Ascii.toUpperCase(Long.toString(nonnegativeHash, 36));
}
}
/**
* This class contains routines for hashing.
*
* <p>The hash takes a byte array representing arbitrary data (a
* number, String, or Object) and turns it into a small, hopefully
* unique, number. There are additional convenience functions which
* hash int, long, and String types.
*
* <p><b>Note</b>: this hash has weaknesses in the two
* most-significant key bits and in the three least-significant seed
* bits. The weaknesses are small and practically speaking, will not
* affect the distribution of hash values. Still, it would be good
* practice not to choose seeds 0, 1, 2, 3, ..., n to yield n,
* independent hash functions. Use pseudo-random seeds instead.
*
* <p>This code is based on the work of Craig Silverstein and Sanjay
* Ghemawat in, then forked from com.google.common.
*
* <p>The original code for the hash function is courtesy
* <a href="http://burtleburtle.net/bob/hash/evahash.html">Bob Jenkins</a>.
*
* <p>TODO(anatol): Add stream hashing functionality.
*/
static final class Hash {
private Hash() {}
/** Default hash seed (64 bit) */
private static final long SEED64 =
0x2b992ddfa23249d6L; // part of pi, arbitrary
/** Hash constant (64 bit) */
private static final long CONSTANT64 =
0xe08c1d668b756f82L; // part of golden ratio, arbitrary
/******************
* STRING HASHING *
******************/
/**
* Hash a string to a 64 bit value. The digits of pi are used for
* the hash seed.
*
* @param value the string to hash
* @return 64 bit hash value
*/
static long hash64(@Nullable String value) {
return hash64(value, SEED64);
}
/**
* Hash a string to a 64 bit value using the supplied seed.
*
* @param value the string to hash
* @param seed the seed
* @return 64 bit hash value
*/
private static long hash64(@Nullable String value, long seed) {
if (value == null) {
return hash64(null, 0, 0, seed);
}
return hash64(value.getBytes(UTF_8), seed);
}
/**
* Hash byte array to a 64 bit value using the supplied seed.
*
* @param value the bytes to hash
* @param seed the seed
* @return 64 bit hash value
*/
private static long hash64(byte[] value, long seed) {
return hash64(value, 0, value == null ? 0 : value.length, seed);
}
/**
* Hash byte array to a 64 bit value using the supplied seed.
*
* @param value the bytes to hash
* @param offset the starting position of value where bytes are
* used for the hash computation
* @param length number of bytes of value that are used for the
* hash computation
* @param seed the seed
* @return 64 bit hash value
*/
@SuppressWarnings("fallthrough")
private static long hash64(
byte[] value, int offset, int length, long seed) {
long a = CONSTANT64;
long b = a;
long c = seed;
int keylen;
for (keylen = length; keylen >= 24; keylen -= 24, offset += 24) {
a += word64At(value, offset);
b += word64At(value, offset + 8);
c += word64At(value, offset + 16);
// Mix
a -= b; a -= c; a ^= c >>> 43;
b -= c; b -= a; b ^= a << 9;
c -= a; c -= b; c ^= b >>> 8;
a -= b; a -= c; a ^= c >>> 38;
b -= c; b -= a; b ^= a << 23;
c -= a; c -= b; c ^= b >>> 5;
a -= b; a -= c; a ^= c >>> 35;
b -= c; b -= a; b ^= a << 49;
c -= a; c -= b; c ^= b >>> 11;
a -= b; a -= c; a ^= c >>> 12;
b -= c; b -= a; b ^= a << 18;
c -= a; c -= b; c ^= b >>> 22;
}
c += length;
if (keylen >= 16) {
if (keylen == 23) {
c += ((long) value[offset + 22]) << 56;
}
if (keylen >= 22) {
c += (value[offset + 21] & 0xffL) << 48;
}
if (keylen >= 21) {
c += (value[offset + 20] & 0xffL) << 40;
}
if (keylen >= 20) {
c += (value[offset + 19] & 0xffL) << 32;
}
if (keylen >= 19) {
c += (value[offset + 18] & 0xffL) << 24;
}
if (keylen >= 18) {
c += (value[offset + 17] & 0xffL) << 16;
}
if (keylen >= 17) {
c += (value[offset + 16] & 0xffL) << 8;
// the first byte of c is reserved for the length
}
if (keylen >= 16) {
b += word64At(value, offset + 8);
a += word64At(value, offset);
}
} else if (keylen >= 8) {
if (keylen == 15) {
b += (value[offset + 14] & 0xffL) << 48;
}
if (keylen >= 14) {
b += (value[offset + 13] & 0xffL) << 40;
}
if (keylen >= 13) {
b += (value[offset + 12] & 0xffL) << 32;
}
if (keylen >= 12) {
b += (value[offset + 11] & 0xffL) << 24;
}
if (keylen >= 11) {
b += (value[offset + 10] & 0xffL) << 16;
}
if (keylen >= 10) {
b += (value[offset + 9] & 0xffL) << 8;
}
if (keylen >= 9) {
b += (value[offset + 8] & 0xffL);
}
if (keylen >= 8) {
a += word64At(value, offset);
}
} else {
if (keylen == 7) {
a += (value[offset + 6] & 0xffL) << 48;
}
if (keylen >= 6) {
a += (value[offset + 5] & 0xffL) << 40;
}
if (keylen >= 5) {
a += (value[offset + 4] & 0xffL) << 32;
}
if (keylen >= 4) {
a += (value[offset + 3] & 0xffL) << 24;
}
if (keylen >= 3) {
a += (value[offset + 2] & 0xffL) << 16;
}
if (keylen >= 2) {
a += (value[offset + 1] & 0xffL) << 8;
}
if (keylen >= 1) {
a += (value[offset + 0] & 0xffL);
// case 0: nothing left to add
}
}
return mix64(a, b, c);
}
private static long word64At(byte[] bytes, int offset) {
return (bytes[offset + 0] & 0xffL)
+ ((bytes[offset + 1] & 0xffL) << 8)
+ ((bytes[offset + 2] & 0xffL) << 16)
+ ((bytes[offset + 3] & 0xffL) << 24)
+ ((bytes[offset + 4] & 0xffL) << 32)
+ ((bytes[offset + 5] & 0xffL) << 40)
+ ((bytes[offset + 6] & 0xffL) << 48)
+ ((bytes[offset + 7] & 0xffL) << 56);
}
/**
* Mixes longs a, b, and c, and returns the final value of c.
*/
private static long mix64(long a, long b, long c) {
a -= b; a -= c; a ^= c >>> 43;
b -= c; b -= a; b ^= a << 9;
c -= a; c -= b; c ^= b >>> 8;
a -= b; a -= c; a ^= c >>> 38;
b -= c; b -= a; b ^= a << 23;
c -= a; c -= b; c ^= b >>> 5;
a -= b; a -= c; a ^= c >>> 35;
b -= c; b -= a; b ^= a << 49;
c -= a; c -= b; c ^= b >>> 11;
a -= b; a -= c; a ^= c >>> 12;
b -= c; b -= a; b ^= a << 18;
c -= a; c -= b; c ^= b >>> 22;
return c;
}
}
/** ID generator */
public interface IdGenerator {
/**
* Generate the ID for the message. Messages with the same messageParts
* and meaning will get the same id. Messages with the same id
* will get the same translation.
*
* @param meaning The programmer-specified meaning. If no {@code @meaning}
* annotation appears, we will use the name of the variable it's
* assigned to. If the variable is unnamed, then we will just
* use a fingerprint of the message.
* @param messageParts The parts of the message, including the main
* message text.
*/
String generateId(String meaning, List<CharSequence> messageParts);
}
}