Skip to content

Commit 624fd79

Browse files
authored
Merge pull request #11355 from Azure/task-fix-byteencodingadls
Added option of Byte encoding for New-AzDataLakeStoreItem, Add-AzDAtaLakeStoreItemContent, Get-AzDAtaLakeStoreItemContent
2 parents 95d013d + 56a3c6b commit 624fd79

File tree

14 files changed

+2346
-2942
lines changed

14 files changed

+2346
-2942
lines changed

src/DataLakeStore/DataLakeStore.Test/ScenarioTests/AdlsAliasTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public void TestAdlsFileSystem()
7373
{
7474
var workingPath = Path.GetDirectoryName(new Uri(Assembly.GetExecutingAssembly().CodeBase).AbsolutePath);
7575
var testLocation = Path.Combine(workingPath, "ScenarioTests", (this.GetType().Name + ".ps1"));
76-
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreFileSystem -fileToCopy '{0}' -location '{1}'", testLocation, AdlsTestsBase.ResourceGroupLocation));
76+
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreFileSystem -fileToCopy '{0}' -location '{1}'", testLocation, AdlsTestsBase.TestFileSystemResourceGroupLocation));
7777
}
7878

7979
[Fact]

src/DataLakeStore/DataLakeStore.Test/ScenarioTests/AdlsAliasTests.ps1

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,13 @@ function Test-DataLakeStoreFileSystem
647647
Assert-AreEqual 3 $headTailResult[0]
648648
Assert-AreEqual 4 $headTailResult[1]
649649

650+
#Create a file with byte and read it
651+
$byteDataFile="/byteData/filetest.txt"
652+
[byte[]] $byteData = 1,2,3,4,5
653+
New-AdlStoreItem -Account $accountName -Path $byteDataFile -Force -Value $byteData -Encoding Byte
654+
$result = Get-AdlStoreItemContent -Account $accountName -path $byteDataFile -Encoding Byte
655+
Assert-True {@(Compare-Object $byteData $result -SyncWindow 0).Length -eq 0}
656+
650657
# Import and get file
651658
$localFileInfo = Get-ChildItem $fileToCopy
652659
$result = Import-AdlStoreItem -Account $accountName -Path $fileToCopy -Destination $importFile

src/DataLakeStore/DataLakeStore.Test/ScenarioTests/AdlsTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,13 @@ public void TestAdlsAccountTiers()
6666
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreAccountTiers -location '{0}'", AdlsTestsBase.ResourceGroupLocation));
6767
}
6868

69-
[Fact(Skip="This case has been unstable on Linux for approximately 20% of failure. Needs investigation.")]
69+
[Fact(Skip = "This case has been unstable on Linux for approximately 20% of failure. Needs investigation.")]
7070
[Trait(Category.AcceptanceType, Category.CheckIn)]
7171
public void TestAdlsFileSystem()
7272
{
7373
var workingPath = Path.GetDirectoryName(new Uri(Assembly.GetExecutingAssembly().CodeBase).AbsolutePath);
7474
var testLocation = Path.Combine(workingPath, "ScenarioTests", (this.GetType().Name + ".ps1"));
75-
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreFileSystem -fileToCopy '{0}' -location '{1}'", testLocation, AdlsTestsBase.ResourceGroupLocation));
75+
NewInstance.RunPsTest(_logger, string.Format("Test-DataLakeStoreFileSystem -fileToCopy '{0}' -location '{1}'", testLocation, AdlsTestsBase.TestFileSystemResourceGroupLocation));
7676
}
7777

7878
[Fact]

src/DataLakeStore/DataLakeStore.Test/ScenarioTests/AdlsTests.ps1

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,13 @@ function Test-DataLakeStoreFileSystem
646646
Assert-AreEqual 3 $headTailResult[0]
647647
Assert-AreEqual 4 $headTailResult[1]
648648

649+
#Create a file with byte and read it
650+
$byteDataFile="/byteData/filetest.txt"
651+
[byte[]] $byteData = 1,2,3,4,5
652+
New-AzDataLakeStoreItem -Account $accountName -Path $byteDataFile -Force -Value $byteData -Encoding Byte
653+
$result = Get-AzDataLakeStoreItemContent -Account $accountName -path $byteDataFile -Encoding Byte
654+
Assert-True {@(Compare-Object $byteData $result -SyncWindow 0).Length -eq 0}
655+
649656
# Import and get file
650657
$localFileInfo = Get-ChildItem $fileToCopy
651658
$result = Import-AzDataLakeStoreItem -Account $accountName -Path $fileToCopy -Destination $importFile

src/DataLakeStore/DataLakeStore.Test/ScenarioTests/AdlsTestsBase.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public class AdlsTestsBase : RMTestBase
3838
private readonly EnvironmentSetupHelper _helper;
3939

4040
internal const string ResourceGroupLocation = "westus";
41+
internal const string TestFileSystemResourceGroupLocation = "eastus2";
4142

4243
public NewResourceManagementClient NewResourceManagementClient { get; private set; }
4344

src/DataLakeStore/DataLakeStore.Test/SessionRecords/Microsoft.Azure.Commands.DataLakeStore.Test.ScenarioTests.AdlsAliasTests/TestAdlsFileSystem.json

Lines changed: 1158 additions & 1308 deletions
Large diffs are not rendered by default.

src/DataLakeStore/DataLakeStore.Test/SessionRecords/Microsoft.Azure.Commands.DataLakeStore.Test.ScenarioTests.AdlsTests/TestAdlsFileSystem.json

Lines changed: 1059 additions & 1617 deletions
Large diffs are not rendered by default.

src/DataLakeStore/DataLakeStore/ChangeLog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
- Additional information about change #1
1919
-->
2020
## Upcoming Release
21+
Added option of Byte encoding for New-AzDataLakeStoreItem, Add-AzDAtaLakeStoreItemContent, Get-AzDAtaLakeStoreItemContent
2122

2223
## Version 1.2.7
2324
* Added reference to System.Buffers explicitly in csproj and psd1.

src/DataLakeStore/DataLakeStore/DataPlaneCommands/AddAzureRmDataLakeStoreItemContent.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public class AddAzureDataLakeStoreItemContent : DataLakeStoreFileSystemCmdletBas
4747
HelpMessage =
4848
"Optionally indicates the encoding for the content being uploaded as part of 'Value'. Default is UTF8")]
4949
[ArgumentToEncodingTransformation]
50-
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32)]
50+
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32, EncodingUtils.Byte)]
5151
public Encoding Encoding { get; set; } = Encoding.UTF8;
5252

5353
public override void ExecuteCmdlet()

src/DataLakeStore/DataLakeStore/DataPlaneCommands/GetAzureRmDataLakeStoreItemContent.cs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public class GetAzureDataLakeStoreContent : DataLakeStoreFileSystemCmdletBase
8787
Mandatory = false,
8888
HelpMessage = "Optionally indicates the encoding for the content being downloaded. Default is UTF8")]
8989
[ArgumentToEncodingTransformation]
90-
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32)]
90+
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32, EncodingUtils.Byte)]
9191

9292
public Encoding Encoding { get; set; } = Encoding.UTF8;
9393

@@ -97,6 +97,10 @@ public class GetAzureDataLakeStoreContent : DataLakeStoreFileSystemCmdletBase
9797

9898
public override void ExecuteCmdlet()
9999
{
100+
var useByteEncoding = UsingByteEncoding(Encoding);
101+
// Byte encoding is not possible to use for reading rows from the stream. It has to be a defined encoding.
102+
// Previously also we used to fall back to UTF8 in case of reading head rows and tail rows.
103+
var fallBackEncoding = useByteEncoding ? Encoding.UTF8 : Encoding;
100104
if (ParameterSetName.Equals(BaseParameterSetName, StringComparison.OrdinalIgnoreCase))
101105
{
102106
ConfirmAction(
@@ -137,19 +141,25 @@ public override void ExecuteCmdlet()
137141
{
138142
Array.Resize(ref byteArray, (int)totalLengthRead);
139143
}
140-
141-
WriteObject(BytesToString(byteArray, Encoding));
144+
if (useByteEncoding)
145+
{
146+
WriteObject(byteArray);
147+
}
148+
else
149+
{
150+
WriteObject(BytesToString(byteArray, Encoding));
151+
}
142152

143153
}
144154
});
145155
}
146156
else if (ParameterSetName.Equals(HeadRowParameterSetName, StringComparison.OrdinalIgnoreCase))
147157
{
148-
WriteObject(DataLakeStoreFileSystemClient.GetStreamRows(Path.TransformedPath, Account, Head, Encoding), true);
158+
WriteObject(DataLakeStoreFileSystemClient.GetStreamRows(Path.TransformedPath, Account, Head, fallBackEncoding), true);
149159
}
150160
else
151161
{
152-
WriteObject(DataLakeStoreFileSystemClient.GetStreamRows(Path.TransformedPath, Account, Tail, Encoding, true), true);
162+
WriteObject(DataLakeStoreFileSystemClient.GetStreamRows(Path.TransformedPath, Account, Tail, fallBackEncoding, true), true);
153163
}
154164
}
155165
}

src/DataLakeStore/DataLakeStore/DataPlaneCommands/NewAzureRmDataLakeStoreItem.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public class NewAzureDataLakeStoreItem : DataLakeStoreFileSystemCmdletBase
4848
HelpMessage =
4949
"Optionally indicates the encoding for the content being uploaded as part of 'Value'. Default is UTF8")]
5050
[ArgumentToEncodingTransformation]
51-
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32)]
51+
[PSArgumentCompleter(EncodingUtils.Unknown, EncodingUtils.String, EncodingUtils.Unicode, EncodingUtils.BigEndianUnicode, EncodingUtils.Ascii, EncodingUtils.Utf8, EncodingUtils.Utf7, EncodingUtils.Utf32, EncodingUtils.Default, EncodingUtils.Oem, EncodingUtils.BigEndianUtf32, EncodingUtils.Byte)]
5252
public Encoding Encoding { get; set; } = Encoding.UTF8;
5353

5454
[Parameter(ValueFromPipelineByPropertyName = true, Position = 3, Mandatory = false,
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
using System;
2+
using System.Text;
3+
4+
namespace Microsoft.Azure.Commands.DataLakeStore.Models
5+
{
6+
internal class ByteEncoding : Encoding
7+
{
8+
public override int GetByteCount(char[] chars, int index, int count)
9+
{
10+
throw new NotImplementedException();
11+
}
12+
13+
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
14+
{
15+
throw new NotImplementedException();
16+
}
17+
18+
public override int GetCharCount(byte[] bytes, int index, int count)
19+
{
20+
throw new NotImplementedException();
21+
}
22+
23+
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
24+
{
25+
throw new NotImplementedException();
26+
}
27+
28+
public override int GetMaxByteCount(int charCount)
29+
{
30+
throw new NotImplementedException();
31+
}
32+
33+
public override int GetMaxCharCount(int byteCount)
34+
{
35+
throw new NotImplementedException();
36+
}
37+
}
38+
}

src/DataLakeStore/DataLakeStore/DataPlaneModels/DataLakeStoreFileSystemCmdletBase.cs

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,15 @@ protected override void StopProcessing()
6161
}
6262

6363
#region cmdlet helpers from the FilesystemProvider
64-
64+
/// <summary>
65+
/// Checks whether type is ByteEncoding
66+
/// </summary>
67+
/// <param name="encoding"></param>
68+
/// <returns></returns>
69+
internal static bool UsingByteEncoding(Encoding encoding)
70+
{
71+
return encoding.GetType() == typeof(ByteEncoding);
72+
}
6573
/// <summary>
6674
/// Converts the stream type string into an Encoding
6775
/// </summary>
@@ -85,13 +93,52 @@ private static byte[] GetBytes(string content, Encoding encoding)
8593

8694
internal static byte[] GetBytes(object content, Encoding encoding)
8795
{
88-
var contentString = content as string;
89-
if (contentString == null)
96+
if (UsingByteEncoding(encoding))
9097
{
91-
throw new CloudException(Resources.InvalidContent);
98+
// first attempt to convert it directly into a byte array
99+
var byteArray = content as byte[];
100+
if (byteArray != null)
101+
{
102+
return byteArray;
103+
}
104+
/*
105+
* [byte[]] $byteData = 1,2,3,4,5
106+
* $MyList = [System.Collections.Generic.List[object]]::new()
107+
* $MyList.Add($byteData[0])
108+
* $MyList.Add($byteData[1])
109+
* And then pass $MyList.ToArray() this will pass object[] containing bytes
110+
*/
111+
// attempt to convert the object into an object array
112+
var contentArray = content as object[];
113+
if (contentArray == null)
114+
{
115+
throw new CloudException(Resources.InvalidEncoding);
116+
}
117+
118+
// now, for each element in the content array, ensure it is of type byte
119+
var byteList = new List<byte>();
120+
foreach (var entry in contentArray)
121+
{
122+
if (!(entry is byte))
123+
{
124+
throw new CloudException(Resources.InvalidEncoding);
125+
}
126+
127+
byteList.Add((byte)entry);
128+
}
129+
130+
return byteList.ToArray();
92131
}
132+
else
133+
{
134+
var contentString = content as string;
135+
if (contentString == null)
136+
{
137+
throw new CloudException(Resources.InvalidContent);
138+
}
93139

94-
return GetBytes(contentString, encoding);
140+
return GetBytes(contentString, encoding);
141+
}
95142
}
96143

97144
internal static string BytesToString(byte[] content, Encoding encoding)

src/DataLakeStore/DataLakeStore/DataPlaneModels/EncodingUtils.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ internal static class EncodingUtils
1919
internal const string Default = "default";
2020
internal const string Oem = "oem";
2121
internal const string BigEndianUtf32 = "bigendianutf32";
22-
23-
22+
internal const string Byte = "byte";
2423
}
2524

2625
internal sealed class ArgumentToEncodingTransformationAttribute : ArgumentTransformationAttribute
@@ -54,6 +53,8 @@ public override object Transform(EngineIntrinsics engineIntrinsics, object input
5453
var oemCP = NativeMethods.GetOEMCP();
5554
return Encoding.GetEncoding((int)oemCP);
5655
}
56+
case EncodingUtils.Byte:
57+
return new ByteEncoding();
5758
default:
5859
// Default to unicode encoding
5960
throw new ArgumentException($"{encodingName} is not a supported Encoding type");

0 commit comments

Comments
 (0)