SourceFile.java

/*
 * Copyright 2009 The Closure Compiler Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.javascript.jscomp;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.isNullOrEmpty;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.GwtIncompatible;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.io.CharStreams;
import com.google.common.io.Resources;
import com.google.javascript.rhino.StaticSourceFile;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;
import java.util.Objects;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

/**
 * An abstract representation of a source file that provides access to language-neutral features.
 * The source file can be loaded from various locations, such as from disk or from a preloaded
 * string.
 *
 * @author nicksantos@google.com (Nick Santos)
 */
public class SourceFile implements StaticSourceFile, Serializable {
  private static final long serialVersionUID = 1L;
  private static final String UTF8_BOM = "\uFEFF";

  /** A JavaScript source code provider.  The value should
   * be cached so that the source text stays consistent throughout a single
   * compile. */
  public interface Generator {
    public String getCode();
  }

  /**
   * Number of lines in the region returned by {@link #getRegion(int)}.
   * This length must be odd.
   */
  private static final int SOURCE_EXCERPT_REGION_LENGTH = 5;

  private final String fileName;
  private boolean isExternFile = false;

  // The fileName may not always identify the original file - for example,
  // supersourced Java inputs, or Java inputs that come from Jar files. This
  // is an optional field that the creator of an AST or SourceFile can set.
  // It could be a path to the original file, or in case this SourceFile came
  // from a Jar, it could be the path to the Jar.
  private String originalPath = null;

  // Source Line Information
  private transient int[] lineOffsets = null;

  private transient String code = null;

  static final DiagnosticType DUPLICATE_ZIP_CONTENTS = DiagnosticType.warning(
      "JSC_DUPLICATE_ZIP_CONTENTS",
      "Two zip entries containing the same relative path.\n"
      + "Entry 1: {0}\n"
      + "Entry 2: {1}");

  /**
   * Construct a new abstract source file.
   *
   * @param fileName The file name of the source file. It does not necessarily need to correspond to
   *     a real path. But it should be unique. Will appear in warning messages emitted by the
   *     compiler.
   */
  public SourceFile(String fileName) {
    if (isNullOrEmpty(fileName)) {
      throw new IllegalArgumentException("a source must have a name");
    }

    if (!"/".equals(File.separator)) {
      this.fileName = fileName.replace(File.separator, "/");
    } else {
      this.fileName = fileName;
    }
  }

  @Override
  public int getLineOffset(int lineno) {
    findLineOffsets();
    if (lineno < 1 || lineno > lineOffsets.length) {
      throw new IllegalArgumentException(
          "Expected line number between 1 and " + lineOffsets.length +
          "\nActual: " + lineno);
    }
    return lineOffsets[lineno - 1];
  }

  /** @return The number of lines in this source file. */
  int getNumLines() {
    findLineOffsets();
    return lineOffsets.length;
  }

  private void findLineOffsets() {
    if (lineOffsets != null) {
      return;
    }
    try {
      String[] sourceLines = getCode().split("\n", -1);
      lineOffsets = new int[sourceLines.length];
      for (int ii = 1; ii < sourceLines.length; ++ii) {
        lineOffsets[ii] =
            lineOffsets[ii - 1] + sourceLines[ii - 1].length() + 1;
      }
    } catch (IOException e) {
      lineOffsets = new int[1];
      lineOffsets[0] = 0;
    }
  }

  private void resetLineOffsets() {
    lineOffsets = null;
  }

  //////////////////////////////////////////////////////////////////////////////
  // Implementation

  /**
   * Gets all the code in this source file.
   * @throws IOException
   */
  public String getCode() throws IOException {
    return code;
  }


  /**
   * Gets a reader for the code in this source file.
   */
  @GwtIncompatible("java.io.Reader")
  public Reader getCodeReader() throws IOException {
    return new StringReader(getCode());
  }

  @VisibleForTesting
  String getCodeNoCache() {
    return code;
  }

  void setCode(String sourceCode) {
    this.setCode(sourceCode, false);
  }

  void setCode(String sourceCode, boolean removeUtf8Bom) {
    if (removeUtf8Bom && sourceCode != null && sourceCode.startsWith(UTF8_BOM)) {
      code = sourceCode.substring(UTF8_BOM.length());
    } else {
      code = sourceCode;
    }
    resetLineOffsets();
  }

  public String getOriginalPath() {
    return originalPath != null ? originalPath : fileName;
  }

  public void setOriginalPath(String originalPath) {
    this.originalPath = originalPath;
  }

  // For SourceFile types which cache source code that can be regenerated
  // easily, flush the cache.  We maintain the cache mostly to speed up
  // generating source when displaying error messages, so dumping the file
  // contents after the compile is a fine thing to do.
  public void clearCachedSource() {
    // By default, do nothing.  Not all kinds of SourceFiles can regenerate
    // code.
  }

  boolean hasSourceInMemory() {
    return code != null;
  }

  /** Returns a unique name for the source file. */
  @Override
  public String getName() {
    return fileName;
  }

  /** Returns whether this is an extern. */
  @Override
  public boolean isExtern() {
    return isExternFile;
  }

  /** Sets that this is an extern. */
  void setIsExtern(boolean newVal) {
    isExternFile = newVal;
  }

  @Override
  public int getLineOfOffset(int offset) {
    findLineOffsets();
    int search = Arrays.binarySearch(lineOffsets, offset);
    if (search >= 0) {
      return search + 1; // lines are 1-based.
    } else {
      int insertionPoint = -1 * (search + 1);
      return Math.min(insertionPoint - 1, lineOffsets.length - 1) + 1;
    }
  }

  @Override
  public int getColumnOfOffset(int offset) {
    int line = getLineOfOffset(offset);
    return offset - lineOffsets[line - 1];
  }

  /**
   * Gets the source line for the indicated line number.
   *
   * @param lineNumber the line number, 1 being the first line of the file.
   * @return The line indicated. Does not include the newline at the end
   *     of the file. Returns {@code null} if it does not exist,
   *     or if there was an IO exception.
   */
  public String getLine(int lineNumber) {
    findLineOffsets();
    if (lineNumber > lineOffsets.length) {
      return null;
    }

    if (lineNumber < 1) {
      lineNumber = 1;
    }

    int pos = lineOffsets[lineNumber - 1];
    String js = "";
    try {
      // NOTE(nicksantos): Right now, this is optimized for few warnings.
      // This is probably the right trade-off, but will be slow if there
      // are lots of warnings in one file.
      js = getCode();
    } catch (IOException e) {
      return null;
    }

    if (js.indexOf('\n', pos) == -1) {
      // If next new line cannot be found, there are two cases
      // 1. pos already reaches the end of file, then null should be returned
      // 2. otherwise, return the contents between pos and the end of file.
      if (pos >= js.length()) {
        return null;
      } else {
        return js.substring(pos, js.length());
      }
    } else {
      return js.substring(pos, js.indexOf('\n', pos));
    }
  }

  /**
   * Get a region around the indicated line number. The exact definition of a
   * region is implementation specific, but it must contain the line indicated
   * by the line number. A region must not start or end by a carriage return.
   *
   * @param lineNumber the line number, 1 being the first line of the file.
   * @return The line indicated. Returns {@code null} if it does not exist,
   *     or if there was an IO exception.
   */
  public Region getRegion(int lineNumber) {
    String js = "";
    try {
      js = getCode();
    } catch (IOException e) {
      return null;
    }
    int pos = 0;
    int startLine = Math.max(1,
        lineNumber - (SOURCE_EXCERPT_REGION_LENGTH + 1) / 2 + 1);
    for (int n = 1; n < startLine; n++) {
      int nextpos = js.indexOf('\n', pos);
      if (nextpos == -1) {
        break;
      }
      pos = nextpos + 1;
    }
    int end = pos;
    int endLine = startLine;
    for (int n = 0; n < SOURCE_EXCERPT_REGION_LENGTH; n++, endLine++) {
      end = js.indexOf('\n', end);
      if (end == -1) {
        break;
      }
      end++;
    }
    if (lineNumber >= endLine) {
      return null;
    }
    if (end == -1) {
      int last = js.length() - 1;
      if (js.charAt(last) == '\n') {
        return
            new SimpleRegion(startLine, endLine, js.substring(pos, last));
      } else {
        return new SimpleRegion(startLine, endLine, js.substring(pos));
      }
    } else {
      return new SimpleRegion(startLine, endLine, js.substring(pos, end));
    }
  }

  @Override
  public String toString() {
    return fileName;
  }

  @GwtIncompatible("java.util.zip.ZipFile")
  public static List<SourceFile> fromZipFile(String zipName, Charset inputCharset)
      throws IOException {
    final String absoluteZipPath = new File(zipName).getAbsolutePath();
    List<SourceFile> sourceFiles = new ArrayList<>();

    try (ZipFile zipFile = new ZipFile(absoluteZipPath)) {
      Enumeration<? extends ZipEntry> zipEntries = zipFile.entries();

      while (zipEntries.hasMoreElements()) {
        ZipEntry zipEntry = zipEntries.nextElement();
        String entryName = zipEntry.getName();
        if (!entryName.endsWith(".js")) { // Only accept js files
          continue;
        }
        sourceFiles.add(fromZipEntry(zipName, absoluteZipPath, entryName, inputCharset));
      }
    }
    return sourceFiles;
  }

  static final String BANG_SLASH = "!/";
  static final String JAR_URL_PREFIX = "jar:file:";

  private static boolean isZipEntry(String path) {
    return path.contains(".zip!/") && (path.endsWith(".js") || path.endsWith(".js.map"));
  }

  @GwtIncompatible("java.io.File")
  private static SourceFile fromZipEntry(String zipURL, Charset inputCharset) {
    checkArgument(isZipEntry(zipURL));
    String[] components = zipURL.split(BANG_SLASH);
    try {
      String zipPath = components[0];
      String relativePath = components[1];
      return fromZipEntry(zipPath, zipPath, relativePath, inputCharset);
    } catch (MalformedURLException e) {
      throw new RuntimeException(e);
    }
  }

  @GwtIncompatible("java.net.URL")
  public static SourceFile fromZipEntry(
      String originalZipPath, String absoluteZipPath, String entryPath, Charset inputCharset)
      throws MalformedURLException {
    String zipEntryPath = JAR_URL_PREFIX + absoluteZipPath + BANG_SLASH + entryPath;
    URL zipEntryUrl = new URL(zipEntryPath);

    return builder()
        .withCharset(inputCharset)
        .withOriginalPath(originalZipPath + BANG_SLASH + entryPath)
        .buildFromUrl(zipEntryUrl);
  }

  @GwtIncompatible("java.io.File")
  public static SourceFile fromFile(String fileName, Charset charset) {
    return builder().withCharset(charset).buildFromFile(fileName);
  }

  @GwtIncompatible("java.io.File")
  public static SourceFile fromFile(String fileName) {
    return fromFile(fileName, UTF_8);
  }

  /** @deprecated Use {@link SourceFile#fromPath(Path, Charset)} */
  @Deprecated
  @GwtIncompatible("java.io.File")
  public static SourceFile fromFile(File file, Charset c) {
    return builder().withCharset(c).buildFromFile(file);
  }

  /** @deprecated Use {@link #fromPath(Path, Charset)} */
  @Deprecated
  @GwtIncompatible("java.io.File")
  public static SourceFile fromFile(File file) {
    return fromFile(file, UTF_8);
  }

  @GwtIncompatible("java.io.File")
  public static SourceFile fromPath(Path path, Charset c) {
    return builder().withCharset(c).buildFromPath(path);
  }

  public static SourceFile fromCode(String fileName, String code) {
    return builder().buildFromCode(fileName, code);
  }

  /**
   * @deprecated Use {@link #fromInputStream(String, InputStream, Charset)}
   */
  @Deprecated
  @GwtIncompatible("java.io.InputStream")
  public static SourceFile fromInputStream(String fileName, InputStream s)
      throws IOException {
    return builder().buildFromInputStream(fileName, s);
  }

  @GwtIncompatible("java.io.InputStream")
  public static SourceFile fromInputStream(String fileName, InputStream s,
      Charset charset) throws IOException {
    return builder().withCharset(charset).buildFromInputStream(fileName, s);
  }

  @GwtIncompatible("java.io.Reader")
  public static SourceFile fromReader(String fileName, Reader r)
      throws IOException {
    return builder().buildFromReader(fileName, r);
  }

  public static SourceFile fromGenerator(String fileName,
      Generator generator) {
    return builder().buildFromGenerator(fileName, generator);
  }

  /** Create a new builder for source files. */
  public static Builder builder() {
    return new Builder();
  }

  /**
   * A builder interface for source files.
   *
   * Allows users to customize the Charset, and the original path of
   * the source file (if it differs from the path on disk).
   */
  public static class Builder {
    private Charset charset = UTF_8;
    private String originalPath = null;

    public Builder() {}

    /** Set the charset to use when reading from an input stream or file. */
    public Builder withCharset(Charset charset) {
      this.charset = charset;
      return this;
    }

    public Builder withOriginalPath(String originalPath) {
      this.originalPath = originalPath;
      return this;
    }

    @GwtIncompatible("java.io.File")
    public SourceFile buildFromFile(String fileName) {
      return buildFromFile(new File(fileName));
    }

    /**
     * @deprecated Use {@link #buildFromPath(Path path)}
     */
    @GwtIncompatible("java.io.File")
    @Deprecated
    public SourceFile buildFromFile(File file) {
      return buildFromPath(file.toPath());
    }

    @GwtIncompatible("java.io.File")
    public SourceFile buildFromPath(Path path) {
      if (isZipEntry(path.toString())) {
        return fromZipEntry(path.toString(), charset);
      }
      return new OnDisk(path, originalPath, charset);
    }

    @GwtIncompatible("java.net.URL")
    public SourceFile buildFromUrl(URL url) {
      return new AtUrl(url, originalPath, charset);
    }

    public SourceFile buildFromCode(String fileName, String code) {
      return new Preloaded(fileName, originalPath, code);
    }

    @GwtIncompatible("java.io.InputStream")
    public SourceFile buildFromInputStream(String fileName, InputStream s) throws IOException {
      return buildFromCode(fileName, CharStreams.toString(new InputStreamReader(s, charset)));
    }

    @GwtIncompatible("java.io.Reader")
    public SourceFile buildFromReader(String fileName, Reader r) throws IOException {
      return buildFromCode(fileName, CharStreams.toString(r));
    }

    public SourceFile buildFromGenerator(String fileName, Generator generator) {
      return new Generated(fileName, originalPath, generator);
    }
  }


  //////////////////////////////////////////////////////////////////////////////
  // Implementations

  /**
   * A source file where the code has been preloaded.
   */
  static class Preloaded extends SourceFile {
    private static final long serialVersionUID = 1L;

    Preloaded(String fileName, String originalPath, String code) {
      super(fileName);
      super.setOriginalPath(originalPath);
      super.setCode(code);
    }
  }

  /**
   * A source file where the code will be dynamically generated
   * from the injected interface.
   */
  static class Generated extends SourceFile {
    // Avoid serializing generator and remove the burden to make classes that implement
    // Generator serializable. There should be no need to obtain generated source in the
    // second stage of compilation. Making the generator transient relies on not clearing the
    // code cache for these classes up serialization which might be quite wasteful.
    private transient Generator generator;

    // Not private, so that LazyInput can extend it.
    Generated(String fileName, String originalPath, Generator generator) {
      super(fileName);
      super.setOriginalPath(originalPath);
      this.generator = generator;
    }

    @Override
    public synchronized String getCode() throws IOException {
      String cachedCode = super.getCode();

      if (cachedCode == null) {
        cachedCode = generator.getCode();
        super.setCode(cachedCode);
      }
      return cachedCode;
    }

    // Clear out the generated code when finished with a compile; we can
    // regenerate it if we ever need it again.
    @Override
    public void clearCachedSource() {
      super.setCode(null);
    }

    @Override
    public void restoreFrom(SourceFile sourceFile) {
      super.restoreFrom(sourceFile);
      this.generator = ((Generated) sourceFile).generator;
    }
  }

  /**
   * A source file where the code is only read into memory if absolutely necessary. We will try to
   * delay loading the code into memory as long as possible.
   */
  @GwtIncompatible("java.io.File")
  static class OnDisk extends SourceFile {
    private static final long serialVersionUID = 1L;
    private transient Path path;
    private transient Charset inputCharset = UTF_8;

    OnDisk(Path path, String originalPath, Charset c) {
      super(path.toString());
      this.path = path;
      setOriginalPath(originalPath);
      if (c != null) {
        this.setCharset(c);
      }
    }

    @Override
    public synchronized String getCode() throws IOException {
      String cachedCode = super.getCode();

      if (cachedCode == null) {
        try (Reader r = getCodeReader()) {
          cachedCode = CharStreams.toString(r);
        } catch (java.nio.charset.MalformedInputException e) {
          throw new IOException("Failed to read: " + path + ", is this input UTF-8 encoded?", e);
        }

        super.setCode(cachedCode, Objects.equals(this.getCharset(), inputCharset));
        // Byte Order Mark can be removed by setCode
        cachedCode = super.getCode();
      }
      return cachedCode;
    }

    /**
     * Gets a reader for the code in this source file.
     */
    @Override
    public Reader getCodeReader() throws IOException {
      if (hasSourceInMemory()) {
        return super.getCodeReader();
      } else {
        // If we haven't pulled the code into memory yet, don't.
        return Files.newBufferedReader(path, inputCharset);
      }
    }

    // Flush the cached code after the compile; we can read it off disk
    // if we need it again.
    @Override
    public void clearCachedSource() {
      super.setCode(null);
    }

    /**
     * Store the Charset specification as the string version of the name,
     * rather than the Charset itself.  This allows us to serialize the
     * SourceFile class.
     * @param c charset to use when reading the input.
     */
    public void setCharset(Charset c) {
      inputCharset = c;
    }

    /**
     * Get the Charset specifying how we're supposed to read the file
     * in off disk and into UTF-16.  This is stored as a strong to allow
     * SourceFile to be serialized.
     * @return Charset object representing charset to use.
     */
    public Charset getCharset() {
      return inputCharset;
    }

    @GwtIncompatible("ObjectOutputStream")
    private void writeObject(java.io.ObjectOutputStream out) throws Exception {
      // Clear the cached source.
      out.defaultWriteObject();
      out.writeObject(inputCharset != null ? inputCharset.name() : null);
      out.writeObject(path != null ? path.toUri() : null);
    }
    
    @GwtIncompatible("ObjectInputStream")
    private void readObject(java.io.ObjectInputStream in) throws Exception {
      in.defaultReadObject();
      String inputCharsetName = (String) in.readObject();
      inputCharset = inputCharsetName != null ? Charset.forName(inputCharsetName) : null;
      URI uri = (URI) in.readObject();
      path = uri != null ? Paths.get(uri) : null;

      // Code will be reread or restored.
      super.setCode(null);
    }
  }

  /**
   * A source file at a URL where the code is only read into memory if absolutely
   * necessary. We will try to delay loading the code into memory as long as
   * possible.
   * <p>
   * In practice this is used to load code in entries inside of zip files.
   */
  @GwtIncompatible("java.net.URL")
  static class AtUrl extends SourceFile {
    private static final long serialVersionUID = 1L;
    private final URL url;

    // This is stored as a String, but passed in and out as a Charset so that
    // we can serialize the class.
    // Default input file format for the compiler has always been UTF_8.
    private String inputCharset = UTF_8.name();

    AtUrl(URL url, String originalPath, Charset c) {
      super(originalPath);
      this.url = url;
      super.setOriginalPath(originalPath);
      if (c != null) {
        this.setCharset(c);
      }
    }

    @Override
    public synchronized String getCode() throws IOException {
      String cachedCode = super.getCode();

      if (cachedCode == null) {
        URLConnection urlConnection = url.openConnection();
        // Perform the read through the URL connection while making sure that it does not internally
        // cache, because its default internal caching would defeat our own cache management.
        urlConnection.setUseCaches(false);
        InputStream inputStream = urlConnection.getInputStream();
        cachedCode = CharStreams.toString(new InputStreamReader(inputStream, this.getCharset()));
        // Must close the stream or else the cache won't be cleared.
        inputStream.close();

        super.setCode(cachedCode, Objects.equals(this.getCharset(), StandardCharsets.UTF_8));
        // Byte Order Mark can be removed by setCode
        cachedCode = super.getCode();
      }
      return cachedCode;
    }

    /**
     * Gets a reader for the code at this URL.
     */
    @Override
    public Reader getCodeReader() throws IOException {
      if (hasSourceInMemory()) {
        return super.getCodeReader();
      } else {
        // If we haven't pulled the code into memory yet, don't.
        return Resources.asCharSource(url, StandardCharsets.UTF_8).openStream();
      }
    }

    // Flush the cached code after the compile; we can read it from the URL
    // if we need it again.
    @Override
    public void clearCachedSource() {
      super.setCode(null);
    }

    /**
     * Store the Charset specification as the string version of the name,
     * rather than the Charset itself.  This allows us to serialize the
     * SourceFile class.
     * @param c charset to use when reading the input.
     */
    public void setCharset(Charset c) {
      inputCharset = c.name();
    }

    /**
     * Get the Charset specifying how we're supposed to read the URL
     * into UTF-16.  This is stored as a string to allow SourceFile to be
     * serialized.
     * @return Charset object representing charset to use.
     */
    public Charset getCharset() {
      return Charset.forName(inputCharset);
    }

    @GwtIncompatible("ObjectInputStream")
    private void readObject(java.io.ObjectInputStream in) throws Exception {
      in.defaultReadObject();
      // Code will be reread or restored.
      super.setCode(null);
    }
  }

  public void restoreFrom(SourceFile sourceFile) {
    this.code = sourceFile.code;
  }

  @GwtIncompatible("ObjectInputStream")
  private void readObject(java.io.ObjectInputStream in) throws Exception {
    in.defaultReadObject();
    code = "<UNAVAILABLE>";
  }
}