-
Notifications
You must be signed in to change notification settings - Fork 1.1k
[WIP] fix encoding issues #1494
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2a2f314
e104be1
6bce106
32819e2
46b3253
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ import java.io.IOException | |
import Chars._ | ||
import ScriptSourceFile._ | ||
import Positions._ | ||
import scala.io.Codec | ||
|
||
import java.util.Optional | ||
|
||
|
@@ -36,9 +37,9 @@ object ScriptSourceFile { | |
|
||
case class SourceFile(file: AbstractFile, content: Array[Char]) extends interfaces.SourceFile { | ||
|
||
def this(_file: AbstractFile) = this(_file, _file.toCharArray) | ||
def this(sourceName: String, cs: Seq[Char]) = this(new VirtualFile(sourceName), cs.toArray) | ||
def this(file: AbstractFile, cs: Seq[Char]) = this(file, cs.toArray) | ||
def this(_file: AbstractFile, codec: Codec) = this(_file, new String(_file.toByteArray, codec.charSet).toCharArray) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure using I suggest directly using |
||
def this(sourceName: String, cs: Seq[Char]) = this(new VirtualFile(sourceName), cs.toArray) | ||
def this(file: AbstractFile, cs: Seq[Char]) = this(file, cs.toArray) | ||
|
||
/** Tab increment; can be overridden */ | ||
def tabInc = 8 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -228,7 +228,7 @@ abstract class CompilerTest { | |
|
||
private def expectedErrors(filePath: String): List[ErrorsInFile] = expectedErrors(List(filePath)) | ||
|
||
private def isNegTest(testPath: String) = testPath.contains(JFile.separator + "neg" + JFile.separator) | ||
private def isNegTest(testPath: String) = testPath.contains("/neg/") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this change have been part of another commit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 32819e2 should probably be broken up in just the part that fixes dotty not honoring default and passed in -encoding arguments (which is an actual bug in dotty) and the parts that get the tests running on Windows (which is "just" a set of test-suite bugs). WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That sounds good :) |
||
|
||
private def compileArgs(args: Array[String], expectedErrorsPerFile: List[ErrorsInFile]) | ||
(implicit defaultOptions: List[String]): Unit = { | ||
|
@@ -413,7 +413,8 @@ abstract class CompilerTest { | |
val flags = oldFlags.map(f => if (f == oldOutput) partestOutput else f) ++ | ||
List(s"-classpath $partestOutput") // Required for separate compilation tests | ||
|
||
getExisting(dest).isDifferent(source, flags, nerr) match { | ||
val difference = getExisting(dest).isDifferent(source, flags, nerr) | ||
difference match { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Useless diff? |
||
case NotExists => copyFiles(source, dest, partestOutput, flags, nerr, kind) | ||
case ExistsSame => // nothing else to do | ||
case ExistsDifferent => | ||
|
@@ -449,11 +450,37 @@ abstract class CompilerTest { | |
/** Recursively copy over source files and directories, excluding extensions | ||
* that aren't in extensionsToCopy. */ | ||
private def recCopyFiles(sourceFile: Path, dest: Path): Unit = { | ||
processFileDir(sourceFile, { sf => | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Useless diff? |
||
def copyfile(file: SFile, bytewise: Boolean): Unit = { | ||
if (bytewise) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of having a boolean param, you should have two separate |
||
val in = file.inputStream() | ||
val out = SFile(dest).outputStream() | ||
val buffer = new Array[Byte](1024) | ||
def loop(available: Int):Unit = { | ||
if (available < 0) {()} | ||
else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider just |
||
out.write(buffer, 0, available) | ||
val read = in.read(buffer) | ||
loop(read) | ||
} | ||
} | ||
loop(0) | ||
in.close() | ||
out.close() | ||
} else { | ||
try { | ||
SFile(dest)(scala.io.Codec.UTF8).writeAll((s"/* !!!!! WARNING: DO NOT MODIFY. Original is at: $file !!!!! */").replace("\\", "/"), file.slurp("UTF-8")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. some path parts start with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah ah! |
||
} catch { | ||
case unmappable: java.nio.charset.MalformedInputException => | ||
copyfile(file, true) //there are bytes that can't be mapped with UTF-8. Bail and just do a straight byte-wise copy without the warning header. | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this whole thing anyway? Files should always be copied bytewise, shouldn't they? They have an encoding before, and they should have the exact same encoding after. There is no need to decode and re-encode them. The comment should be pure-ASCII anyway, so it doesn't really matter what encoding is used. It could actually be encoded as such (with US_ASCII) using a That would be much simpler, not to mention faster. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the comment is pure-ASCII, but you can't write it to file if you don't know the encoding of the file; the encoding of the comment in UTF-16 or UTF-32 is unequal to the encoding in UTF-8. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True. I stupidly did not think about UTF-16. That said, this code only applies to files in this repo, doesn't it? We shouldn't have UTF-16-encoded files in this repo, so it should be fine. It would be a different matter if this code were applied to arbitrary files in users' repositories, of course. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is one in there now, namely the UTF-16 test - which tests whether the passed in -encoding compiler flag is honored. I didn't see any way to test that other than this. I could see if I can resort to tricks, finding an encoding/file that is both valid UTF-8, but doesn't compile as UTF-8, but is also a valid different encoding and does compile as that encoding. A third alternative is not having a test for it. None of the alternatives (this ugly thing which is only needed for one test, unclear tricks with a polyglot file encoding, or not having a test) are really appealing IMO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Easier: use a latin1 test instead of a UTF-16 test. Use a French There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it won't be valid UTF-8, then the file can't be slurped with UTF-8, and the file copy fails There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My comment was to use that in the potential variant where files are never slurped to being with. They are always copied byte-wise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I think I get what you mean: like this: martijnhoekstra@42fc87a ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, like that. |
||
} | ||
|
||
processFileDir(sourceFile, { sf => | ||
if (extensionsToCopy.contains(sf.extension)) { | ||
dest.parent.jfile.mkdirs | ||
dest.toFile.writeAll("/* !!!!! WARNING: DO NOT MODIFY. Original is at: $sf !!!!! */", | ||
sf.slurp()) | ||
copyfile(sf, false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indentation issue (tabs) |
||
} else { | ||
log(s"WARNING: ignoring $sf") | ||
} | ||
|
@@ -465,7 +492,7 @@ abstract class CompilerTest { | |
|
||
/** Reads the existing files for the given test source if any. */ | ||
private def getExisting(dest: Path): ExistingFiles = { | ||
val content: Option[Option[String]] = processFileDir(dest, f => f.safeSlurp, d => Some("")) | ||
val content: Option[Option[String]] = processFileDir(dest, f => try Some(f.slurp("UTF8")) catch {case io: java.io.IOException => Some(io.toString())}, d => Some("")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This ugly thing definitely need be factored out (also used a couple lines below). What does this do, anyway? Shouldn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That said, it can still go from this line. |
||
if (content.isDefined && content.get.isDefined) { | ||
val flags = (dest changeExtension "flags").toFile.safeSlurp | ||
val nerr = (dest changeExtension "nerr").toFile.safeSlurp | ||
|
@@ -479,7 +506,7 @@ abstract class CompilerTest { | |
if (!genSrc.isDefined) { | ||
NotExists | ||
} else { | ||
val source = processFileDir(sourceFile, { f => f.safeSlurp }, { d => Some("") }, | ||
val source = processFileDir(sourceFile, { f => try Some(f.slurp("UTF8")) catch {case _: java.io.IOException => None} }, { d => Some("") }, | ||
Some("DPCompilerTest sourceFile doesn't exist: " + sourceFile)).get | ||
if (source == genSrc) { | ||
nerr match { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,7 @@ import scala.collection.mutable.ListBuffer | |
*/ | ||
class InterfaceEntryPointTest { | ||
@Test def runCompilerFromInterface = { | ||
val sources = List("./tests/pos/HelloWorld.scala") | ||
val sources = List("./tests/pos/HelloWorld.scala").map(p => new java.io.File(p).getPath()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be good to factor out this incantation as |
||
val args = sources ++ List("-d", "./out/") | ||
|
||
val mainClass = Class.forName("dotty.tools.dotc.Main") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
package test | ||
|
||
import scala.reflect.io._ | ||
import scala.io.Codec | ||
import dotty.tools.dotc.util._ | ||
import dotty.tools.dotc.parsing._ | ||
import Tokens._, Scanners._ | ||
|
@@ -16,8 +17,8 @@ class ScannerTest extends DottyTest { | |
def scan(name: String): Unit = scan(new PlainFile(name)) | ||
|
||
def scan(file: PlainFile): Unit = { | ||
println("***** scanning " + file) | ||
val source = new SourceFile(file) | ||
//println("***** scanning " + file) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment seems irrelevant to the PR. |
||
val source = new SourceFile(file, Codec.UTF8) | ||
val scanner = new Scanner(source) | ||
var i = 0 | ||
while (scanner.token != EOF) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,7 @@ class TestREPL(script: String) extends REPL { | |
while (lines.hasNext && lines.head.startsWith(continuationPrompt)) { | ||
val continued = lines.next | ||
output.println(continued) | ||
buf append "\n" | ||
buf append System.lineSeparator() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this still needed, despite the comparison through |
||
buf append continued.drop(continuationPrompt.length) | ||
} | ||
buf.toString | ||
|
@@ -49,7 +49,7 @@ class TestREPL(script: String) extends REPL { | |
out.close() | ||
val printed = out.toString | ||
val transcript = printed.drop(printed.indexOf(config.prompt)) | ||
if (transcript.toString != script) { | ||
if (transcript.toString.lines.toList != script.lines.toList) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can avoid the reification as if (transcript.toString.lines.sameElements(script.lines)) |
||
println("input differs from transcript:") | ||
println(transcript) | ||
assert(false) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
//this file is saved as UTF-8 | ||
object Test { | ||
def main(args: Array[String]): Unit = { | ||
val testchar = '⇒' | ||
println(testchar == '\u21D2') | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change seems irrelevant to the PR/commit. It shouldn't appear in the commit.