Skip to content

Added option of Byte encoding for New-AzDataLakeStoreItem, Add-AzDAtaLakeStoreItemContent, Get-AzDAtaLakeStoreItemContent #11355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public void TestAdlsFileSystem()
{
var workingPath = Path.GetDirectoryName(new Uri(Assembly.GetExecutingAssembly().CodeBase).AbsolutePath);
var testLocation = Path.Combine(workingPath, "ScenarioTests", (this.GetType().Name + ".ps1"));
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreFileSystem -fileToCopy '{0}' -location '{1}'", testLocation, AdlsTestsBase.ResourceGroupLocation));
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreFileSystem -fileToCopy '{0}' -location '{1}'", testLocation, AdlsTestsBase.TestFileSystemResourceGroupLocation));
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,13 @@ function Test-DataLakeStoreFileSystem
Assert-AreEqual 3 $headTailResult[0]
Assert-AreEqual 4 $headTailResult[1]

#Create a file with byte and read it
$byteDataFile="/byteData/filetest.txt"
[byte[]] $byteData = 1,2,3,4,5
New-AdlStoreItem -Account $accountName -Path $byteDataFile -Force -Value $byteData -Encoding Byte
$result = Get-AdlStoreItemContent -Account $accountName -path $byteDataFile -Encoding Byte
Assert-True {@(Compare-Object $byteData $result -SyncWindow 0).Length -eq 0}

# Import and get file
$localFileInfo = Get-ChildItem $fileToCopy
$result = Import-AdlStoreItem -Account $accountName -Path $fileToCopy -Destination $importFile
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ public void TestAdlsAccountTiers()
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreAccountTiers -location '{0}'", AdlsTestsBase.ResourceGroupLocation));
}

[Fact(Skip="This case has been unstable on Linux for approximately 20% of failure. Needs investigation.")]
[Fact(Skip = "This case has been unstable on Linux for approximately 20% of failure. Needs investigation.")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test case was previously skipped because it fails randomly in Linux environment. Details are in #10667
Please enable it and fix the error. Thanks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@isra-fel I did see you skipped it, however i could not reproduce it in my linux VM. So I still do not understand the exact root cause of it, however i feel why it might be happening. But thing is the failure is for adlsaliastest and you have skipped it for adlstest.

Now if you see there are two versions of test AdlsAliasTest and AdlsTest. Both are exact replicas functionally, only difference is one of them uses alias commandlet names. And in most of the logs i see "Export-AdlStoreChildItemProperties" failing for alias. As a part of this commandlet we write output to a local path. So i suspect adlstest and adlsaliastest are interfering with each other. Reason why i think is because adlsaliastest was the one failng, but you disabled adlstest and it started passing more. I still do not know why they are interfering.

What I will do is try to comment out "Export-AdlStoreChildItemProperties" in adlsaliastest.testfilesystem (since thats the one failing) and enable the adlstest.testfilesystem. But I dont think this PR is the correct place to fix this since this adresses a different bug.

I will send out a different PR after this with the proposed change.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for all your explanation, it makes sense to me.

[Trait(Category.AcceptanceType, Category.CheckIn)]
public void TestAdlsFileSystem()
{
var workingPath = Path.GetDirectoryName(new Uri(Assembly.GetExecutingAssembly().CodeBase).AbsolutePath);
var testLocation = Path.Combine(workingPath, "ScenarioTests", (this.GetType().Name + ".ps1"));
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreFileSystem -fileToCopy '{0}' -location '{1}'", testLocation, AdlsTestsBase.ResourceGroupLocation));
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreFileSystem -fileToCopy '{0}' -location '{1}'", testLocation, AdlsTestsBase.TestFileSystemResourceGroupLocation));
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,13 @@ function Test-DataLakeStoreFileSystem
Assert-AreEqual 3 $headTailResult[0]
Assert-AreEqual 4 $headTailResult[1]

#Create a file with byte and read it
$byteDataFile="/byteData/filetest.txt"
[byte[]] $byteData = 1,2,3,4,5
New-AzDataLakeStoreItem -Account $accountName -Path $byteDataFile -Force -Value $byteData -Encoding Byte
$result = Get-AzDataLakeStoreItemContent -Account $accountName -path $byteDataFile -Encoding Byte
Assert-True {@(Compare-Object $byteData $result -SyncWindow 0).Length -eq 0}

# Import and get file
$localFileInfo = Get-ChildItem $fileToCopy
$result = Import-AzDataLakeStoreItem -Account $accountName -Path $fileToCopy -Destination $importFile
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class AdlsTestsBase : RMTestBase
private readonly EnvironmentSetupHelper _helper;

internal const string ResourceGroupLocation = "westus";
internal const string TestFileSystemResourceGroupLocation = "eastus2";

public NewResourceManagementClient NewResourceManagementClient { get; private set; }

Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/DataLakeStore/DataLakeStore/ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- Additional information about change #1
-->
## Upcoming Release
Added option of Byte encoding for New-AzDataLakeStoreItem, Add-AzDAtaLakeStoreItemContent, Get-AzDAtaLakeStoreItemContent

## Version 1.2.7
* Added reference to System.Buffers explicitly in csproj and psd1.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class AddAzureDataLakeStoreItemContent : DataLakeStoreFileSystemCmdletBas
HelpMessage =
"Optionally indicates the encoding for the content being uploaded as part of 'Value'. Default is UTF8")]
[ArgumentToEncodingTransformation]
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32)]
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32, EncodingUtils.Byte)]
public Encoding Encoding { get; set; } = Encoding.UTF8;

public override void ExecuteCmdlet()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public class GetAzureDataLakeStoreContent : DataLakeStoreFileSystemCmdletBase
Mandatory = false,
HelpMessage = "Optionally indicates the encoding for the content being downloaded. Default is UTF8")]
[ArgumentToEncodingTransformation]
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32)]
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32, EncodingUtils.Byte)]

public Encoding Encoding { get; set; } = Encoding.UTF8;

Expand All @@ -97,6 +97,10 @@ public class GetAzureDataLakeStoreContent : DataLakeStoreFileSystemCmdletBase

public override void ExecuteCmdlet()
{
var useByteEncoding = UsingByteEncoding(Encoding);
// Byte encoding is not possible to use for reading rows from the stream. It has to be a defined encoding.
// Previously also we used to fall back to UTF8 in case of reading head rows and tail rows.
var fallBackEncoding = useByteEncoding ? Encoding.UTF8 : Encoding;
if (ParameterSetName.Equals(BaseParameterSetName, StringComparison.OrdinalIgnoreCase))
{
ConfirmAction(
Expand Down Expand Up @@ -137,19 +141,25 @@ public override void ExecuteCmdlet()
{
Array.Resize(ref byteArray, (int)totalLengthRead);
}

WriteObject(BytesToString(byteArray, Encoding));
if (useByteEncoding)
{
WriteObject(byteArray);
}
else
{
WriteObject(BytesToString(byteArray, Encoding));
}

}
});
}
else if (ParameterSetName.Equals(HeadRowParameterSetName, StringComparison.OrdinalIgnoreCase))
{
WriteObject(DataLakeStoreFileSystemClient.GetStreamRows(Path.TransformedPath, Account, Head, Encoding), true);
WriteObject(DataLakeStoreFileSystemClient.GetStreamRows(Path.TransformedPath, Account, Head, fallBackEncoding), true);
}
else
{
WriteObject(DataLakeStoreFileSystemClient.GetStreamRows(Path.TransformedPath, Account, Tail, Encoding, true), true);
WriteObject(DataLakeStoreFileSystemClient.GetStreamRows(Path.TransformedPath, Account, Tail, fallBackEncoding, true), true);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public class NewAzureDataLakeStoreItem : DataLakeStoreFileSystemCmdletBase
HelpMessage =
"Optionally indicates the encoding for the content being uploaded as part of 'Value'. Default is UTF8")]
[ArgumentToEncodingTransformation]
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32)]
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32, EncodingUtils.Byte)]
public Encoding Encoding { get; set; } = Encoding.UTF8;

[Parameter(ValueFromPipelineByPropertyName = true, Position = 3, Mandatory = false,
Expand Down
38 changes: 38 additions & 0 deletions src/DataLakeStore/DataLakeStore/DataPlaneModels/ByteEncoding.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
using System;
using System.Text;

namespace Microsoft.Azure.Commands.DataLakeStore.Models
{
internal class ByteEncoding : Encoding
{
public override int GetByteCount(char[] chars, int index, int count)
{
throw new NotImplementedException();
}

public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
{
throw new NotImplementedException();
}

public override int GetCharCount(byte[] bytes, int index, int count)
{
throw new NotImplementedException();
}

public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
throw new NotImplementedException();
}

public override int GetMaxByteCount(int charCount)
{
throw new NotImplementedException();
}

public override int GetMaxCharCount(int byteCount)
{
throw new NotImplementedException();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,15 @@ protected override void StopProcessing()
}

#region cmdlet helpers from the FilesystemProvider

/// <summary>
/// Checks whether type is ByteEncoding
/// </summary>
/// <param name="encoding"></param>
/// <returns></returns>
internal static bool UsingByteEncoding(Encoding encoding)
{
return encoding.GetType() == typeof(ByteEncoding);
}
/// <summary>
/// Converts the stream type string into an Encoding
/// </summary>
Expand All @@ -85,13 +93,52 @@ private static byte[] GetBytes(string content, Encoding encoding)

internal static byte[] GetBytes(object content, Encoding encoding)
{
var contentString = content as string;
if (contentString == null)
if (UsingByteEncoding(encoding))
{
throw new CloudException(Resources.InvalidContent);
// first attempt to convert it directly into a byte array
var byteArray = content as byte[];
if (byteArray != null)
{
return byteArray;
}
/*
* [byte[]] $byteData = 1,2,3,4,5
* $MyList = [System.Collections.Generic.List[object]]::new()
* $MyList.Add($byteData[0])
* $MyList.Add($byteData[1])
* And then pass $MyList.ToArray() this will pass object[] containing bytes
*/
// attempt to convert the object into an object array
var contentArray = content as object[];
if (contentArray == null)
{
throw new CloudException(Resources.InvalidEncoding);
}

// now, for each element in the content array, ensure it is of type byte
var byteList = new List<byte>();
foreach (var entry in contentArray)
{
if (!(entry is byte))
{
throw new CloudException(Resources.InvalidEncoding);
}

byteList.Add((byte)entry);
}

return byteList.ToArray();
}
else
{
var contentString = content as string;
if (contentString == null)
{
throw new CloudException(Resources.InvalidContent);
}

return GetBytes(contentString, encoding);
return GetBytes(contentString, encoding);
}
}

internal static string BytesToString(byte[] content, Encoding encoding)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ internal static class EncodingUtils
internal const string Default = "default";
internal const string Oem = "oem";
internal const string BigEndianUtf32 = "bigendianutf32";


internal const string Byte = "byte";
}

internal sealed class ArgumentToEncodingTransformationAttribute : ArgumentTransformationAttribute
Expand Down Expand Up @@ -54,6 +53,8 @@ public override object Transform(EngineIntrinsics engineIntrinsics, object input
var oemCP = NativeMethods.GetOEMCP();
return Encoding.GetEncoding((int)oemCP);
}
case EncodingUtils.Byte:
return new ByteEncoding();
default:
// Default to unicode encoding
throw new ArgumentException($"{encodingName} is not a supported Encoding type");
Expand Down