diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
index 7f17c24f07d..ae91ff0012c 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs
@@ -25,6 +25,14 @@ public partial class BasicHtmlWebResponseObject : WebResponseObject
///
public new string Content { get; private set; }
+ ///
+ /// Gets the Encoding that was used to decode the Content
+ ///
+ ///
+ /// The Encoding used to decode the Content; otherwise, a null reference if the content is not text.
+ ///
+ public Encoding Encoding { get; private set; }
+
private WebCmdletElementCollection _inputFields;
///
@@ -217,14 +225,16 @@ private void ParseAttributes(string outerHtml, PSObject elementObject)
///
/// Reads the response content from the web response.
///
- private void InitializeContent()
+ protected void InitializeContent()
{
string contentType = ContentHelper.GetContentType(BaseResponse);
if (ContentHelper.IsText(contentType))
{
+ Encoding encoding = null;
// fill the Content buffer
string characterSet = WebResponseHelper.GetCharacterSet(BaseResponse);
- this.Content = StreamHelper.DecodeStream(RawContentStream, characterSet);
+ this.Content = StreamHelper.DecodeStream(RawContentStream, characterSet, out encoding);
+ this.Encoding = encoding;
}
else
{
diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/HtmlWebResponseObject.CoreClr.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/HtmlWebResponseObject.CoreClr.cs
index 09879c6e3e3..c87d24a0d45 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/HtmlWebResponseObject.CoreClr.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/HtmlWebResponseObject.CoreClr.cs
@@ -49,8 +49,41 @@ internal HtmlWebResponseObject(HttpResponseMessage response, Stream contentStrea
#endregion Constructors
+ #region Properties
+
+ ///
+ /// Gets the Encoding that was used to decode the Content
+ ///
+ ///
+ /// The Encoding used to decode the Content; otherwise, a null reference if the content is not text.
+ ///
+ public Encoding Encoding { get; private set; }
+
+ #endregion Properties
+
#region Methods
+ // NOTE: Currently this code path is not enabled.
+ // See FillRequestStream in WebRequestPSCmdlet.CoreClr.cs and
+ // GetResponseObject in WebResponseObjectFactory.CoreClr.cs for details.
+ private void InitializeContent()
+ {
+ string contentType = ContentHelper.GetContentType(BaseResponse);
+ string content = null;
+ if (ContentHelper.IsText(contentType))
+ {
+ Encoding encoding = null;
+ // fill the Content buffer
+ string characterSet = WebResponseHelper.GetCharacterSet(BaseResponse);
+ this.Content = StreamHelper.DecodeStream(RawContentStream, characterSet, out encoding);
+ this.Encoding = encoding;
+ }
+ else
+ {
+ this.Content = string.Empty;
+ }
+ }
+
private void InitializeRawContent(HttpResponseMessage baseResponse)
{
StringBuilder raw = ContentHelper.GetRawContentHeader(baseResponse);
diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/InvokeRestMethodCommand.CoreClr.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/InvokeRestMethodCommand.CoreClr.cs
index 7d0d6c2d3bf..8be7c98342a 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/InvokeRestMethodCommand.CoreClr.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/InvokeRestMethodCommand.CoreClr.cs
@@ -51,7 +51,7 @@ internal override void ProcessResponse(HttpResponseMessage response)
object obj = null;
Exception ex = null;
- string str = StreamHelper.DecodeStream(responseStream, encoding);
+ string str = StreamHelper.DecodeStream(responseStream, ref encoding);
bool convertSuccess = false;
// On CoreCLR, we need to explicitly load Json.NET
diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs
index 0c905c2f968..cc4f0cacfcd 100644
--- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs
+++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs
@@ -4,6 +4,7 @@
using System;
using System.Text;
+using System.Text.RegularExpressions;
using System.IO;
using System.IO.Compression;
using System.Management.Automation;
@@ -391,20 +392,8 @@ internal static void SaveStreamToFile(Stream stream, string filePath, PSCmdlet c
}
}
- internal static string DecodeStream(Stream stream, string characterSet)
+ private static string StreamToString(Stream stream, Encoding encoding)
{
- Encoding encoding = ContentHelper.GetEncodingOrDefault(characterSet);
- return DecodeStream(stream, encoding);
- }
-
- internal static string DecodeStream(Stream stream, Encoding encoding)
- {
- if (null == encoding)
- {
- // just use the default encoding if one wasn't provided
- encoding = ContentHelper.GetDefaultEncoding();
- }
-
StringBuilder result = new StringBuilder(capacity: ChunkSize);
Decoder decoder = encoding.GetDecoder();
@@ -413,9 +402,8 @@ internal static string DecodeStream(Stream stream, Encoding encoding)
{
useBufferSize = encoding.GetMaxCharCount(10);
}
- char[] chars = new char[useBufferSize];
-
+ char[] chars = new char[useBufferSize];
byte[] bytes = new byte[useBufferSize * 4];
int bytesRead = 0;
do
@@ -444,12 +432,74 @@ internal static string DecodeStream(Stream stream, Encoding encoding)
// Increment byteIndex to the next block of bytes in the input buffer, if any, to convert.
byteIndex += bytesUsed;
}
- }
- while (bytesRead != 0);
+ } while (bytesRead != 0);
return result.ToString();
}
+ internal static string DecodeStream(Stream stream, string characterSet, out Encoding encoding)
+ {
+ try
+ {
+ encoding = Encoding.GetEncoding(characterSet);
+ }
+ catch (ArgumentException)
+ {
+ encoding = null;
+ }
+ return DecodeStream(stream, ref encoding);
+ }
+
+ static bool TryGetEncoding(string characterSet, out Encoding encoding)
+ {
+ bool result = false;
+ try
+ {
+ encoding = Encoding.GetEncoding(characterSet);
+ result = true;
+ }
+ catch (ArgumentException)
+ {
+ encoding = null;
+ }
+ return result;
+ }
+
+ static readonly Regex s_metaexp = new Regex(@"<]*charset\s*=\s*[""'\n]?(?[A-Za-z].[^\s""'\n<>]*)[\s""'\n>]");
+
+ internal static string DecodeStream(Stream stream, ref Encoding encoding)
+ {
+ bool isDefaultEncoding = false;
+ if (null == encoding)
+ {
+ // Use the default encoding if one wasn't provided
+ encoding = ContentHelper.GetDefaultEncoding();
+ isDefaultEncoding = true;
+ }
+
+ string content = StreamToString (stream, encoding);
+ if (isDefaultEncoding) do
+ {
+ // check for a charset attribute on the meta element to override the default.
+ Match match = s_metaexp.Match(content);
+ if (match.Success)
+ {
+ Encoding localEncoding = null;
+ string characterSet = match.Groups["charset"].Value;
+
+ if (TryGetEncoding(characterSet, out localEncoding))
+ {
+ stream.Seek(0, SeekOrigin.Begin);
+ content = StreamToString(stream, localEncoding);
+ // report the encoding used.
+ encoding = localEncoding;
+ }
+ }
+ } while (false);
+
+ return content;
+ }
+
internal static Byte[] EncodeToBytes(String str, Encoding encoding)
{
if (null == encoding)
diff --git a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1
index b2fdd05ae9b..638f94f1dd0 100644
--- a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1
+++ b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1
@@ -248,9 +248,9 @@ function ExecuteRequestWithCustomUserAgent {
try {
$Params = @{
- Uri = $Uri
- TimeoutSec = 5
- UserAgent = $UserAgent
+ Uri = $Uri
+ TimeoutSec = 5
+ UserAgent = $UserAgent
SkipHeaderValidation = $SkipHeaderValidation.IsPresent
}
if ($Cmdlet -eq 'Invoke-WebRequest') {
@@ -271,6 +271,32 @@ function ExecuteRequestWithCustomUserAgent {
return $result
}
+# This function calls Invoke-WebRequest with the given uri
+function ExecuteWebRequest
+{
+ param (
+ [Parameter(Mandatory)]
+ [string]
+ $Uri,
+
+ [switch] $UseBasicParsing
+ )
+ $result = [PSObject]@{Output = $null; Error = $null; Content = $null}
+
+ try
+ {
+ $result.Output = Invoke-WebRequest -Uri $Uri -TimeoutSec 5 -UseBasicParsing:$UseBasicParsing.IsPresent
+ $result.Content = $result.Output.Content
+ }
+ catch
+ {
+ $result.Error = $_
+ }
+
+ return $result
+}
+
+
<#
Defines the list of redirect codes to test as well as the
expected Method when the redirection is handled.
@@ -805,6 +831,245 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" {
#endregion SkipHeaderVerification Tests
+ #region charset encoding tests
+
+ Context "BasicHtmlWebResponseObject Encoding tests" {
+ It "Verifies Invoke-WebRequest detects charset meta value when the ContentType header does not define it." {
+ $output = ''
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest detects charset meta value when newlines are encountered in the element." {
+ $output = @'
+
+
+
+
+
+'@
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest detects charset meta value when the attribute value is unquoted." {
+ $output = ''
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest detects http-equiv charset meta value when the ContentType header does not define it." {
+ $output = @'
+
+
+
+
+'@
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest detects http-equiv charset meta value newlines are encountered in the element." {
+ $output = @'
+
+
+
+
+'@
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest ignores meta charset value when Content-Type header defines it." {
+ $output = ''
+ # NOTE: meta charset should be ignored
+ $expectedEncoding = [System.Text.Encoding]::UTF8
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html; charset=utf-8&output=$output" -UseBasicParsing
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest honors non-utf8 charsets in the Content-Type header" {
+ $output = ''
+ # NOTE: meta charset should be ignored
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('utf-16')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html; charset=utf-16&output=$output" -UseBasicParsing
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest defaults to iso-8859-1 when an unsupported/invalid charset is declared" {
+ $output = ''
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output" -UseBasicParsing
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest defaults to iso-8859-1 when an unsupported/invalid charset is declared using http-equiv" {
+ $output = @'
+
+
+
+
+'@
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output" -UseBasicParsing
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject'
+ }
+ }
+
+ Context "HtmlWebResponseObject Encoding" {
+ # these tests are dependent on https://github.com/PowerShell/PowerShell/issues/2867
+ # Currently, all paths return BasicHtmlWebResponseObject
+ It "Verifies Invoke-WebRequest detects charset meta value when the ContentType header does not define it." -Pending {
+ $output = ''
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output"
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved.
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest detects charset meta value when newlines are encountered in the element." -Pending {
+ $output = @'
+
+
+
+
+
+'@
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output"
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest ignores meta charset value when Content-Type header defines it." -Pending {
+ $output = ''
+ # NOTE: meta charset should be ignored
+ $expectedEncoding = [System.Text.Encoding]::UTF8
+ # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved.
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html; charset=utf-8&output=$output"
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved.
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest detects http-equiv charset meta value when the ContentType header does not define it." -Pending {
+ $output = @'
+
+
+
+
+'@
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output"
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest detects http-equiv charset meta value newlines are encountered in the element." -Pending {
+ $output = @'
+
+
+
+
+'@
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output"
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest honors non-utf8 charsets in the Content-Type header" -Pending {
+ $output = ''
+ # NOTE: meta charset should be ignored
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('utf-16')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html; charset=utf-16&output=$output"
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved.
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest defaults to iso-8859-1 when an unsupported/invalid charset is declared" -Pending {
+ $output = ''
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output"
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved.
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject'
+ }
+
+ It "Verifies Invoke-WebRequest defaults to iso-8859-1 when an unsupported/invalid charset is declared using http-equiv" -Pending {
+ $output = @'
+
+
+
+
+'@
+ $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1')
+ $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output"
+
+ $response.Error | Should BeNullOrEmpty
+ $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
+ $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject'
+ }
+ }
+
+ #endregion charset encoding tests
+
BeforeEach {
if ($env:http_proxy) {
$savedHttpProxy = $env:http_proxy