From 2fa0f51f69d3e1e0cc8e16d3141210b89868c56c Mon Sep 17 00:00:00 2001 From: Dan Travison Date: Mon, 24 Jul 2017 16:38:00 -0700 Subject: [PATCH 1/6] Use HTML meta charset attribute value, if present, when the Context-Type header does not specify it. --- .../BasicHtmlWebResponseObject.Common.cs | 14 ++- .../CoreCLR/HtmlWebResponseObject.CoreClr.cs | 33 ++++++ .../InvokeRestMethodCommand.CoreClr.cs | 2 +- .../utility/WebCmdlet/StreamHelper.cs | 74 ++++++++++--- .../WebCmdlets.Tests.ps1 | 104 ++++++++++++++++++ 5 files changed, 207 insertions(+), 20 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs index 7f17c24f07d..ae91ff0012c 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs @@ -25,6 +25,14 @@ public partial class BasicHtmlWebResponseObject : WebResponseObject /// public new string Content { get; private set; } + /// + /// Gets the Encoding that was used to decode the Content + /// + /// + /// The Encoding used to decode the Content; otherwise, a null reference if the content is not text. + /// + public Encoding Encoding { get; private set; } + private WebCmdletElementCollection _inputFields; /// @@ -217,14 +225,16 @@ private void ParseAttributes(string outerHtml, PSObject elementObject) /// /// Reads the response content from the web response. /// - private void InitializeContent() + protected void InitializeContent() { string contentType = ContentHelper.GetContentType(BaseResponse); if (ContentHelper.IsText(contentType)) { + Encoding encoding = null; // fill the Content buffer string characterSet = WebResponseHelper.GetCharacterSet(BaseResponse); - this.Content = StreamHelper.DecodeStream(RawContentStream, characterSet); + this.Content = StreamHelper.DecodeStream(RawContentStream, characterSet, out encoding); + this.Encoding = encoding; } else { diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/HtmlWebResponseObject.CoreClr.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/HtmlWebResponseObject.CoreClr.cs index 09879c6e3e3..c87d24a0d45 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/HtmlWebResponseObject.CoreClr.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/HtmlWebResponseObject.CoreClr.cs @@ -49,8 +49,41 @@ internal HtmlWebResponseObject(HttpResponseMessage response, Stream contentStrea #endregion Constructors + #region Properties + + /// + /// Gets the Encoding that was used to decode the Content + /// + /// + /// The Encoding used to decode the Content; otherwise, a null reference if the content is not text. + /// + public Encoding Encoding { get; private set; } + + #endregion Properties + #region Methods + // NOTE: Currently this code path is not enabled. + // See FillRequestStream in WebRequestPSCmdlet.CoreClr.cs and + // GetResponseObject in WebResponseObjectFactory.CoreClr.cs for details. + private void InitializeContent() + { + string contentType = ContentHelper.GetContentType(BaseResponse); + string content = null; + if (ContentHelper.IsText(contentType)) + { + Encoding encoding = null; + // fill the Content buffer + string characterSet = WebResponseHelper.GetCharacterSet(BaseResponse); + this.Content = StreamHelper.DecodeStream(RawContentStream, characterSet, out encoding); + this.Encoding = encoding; + } + else + { + this.Content = string.Empty; + } + } + private void InitializeRawContent(HttpResponseMessage baseResponse) { StringBuilder raw = ContentHelper.GetRawContentHeader(baseResponse); diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/InvokeRestMethodCommand.CoreClr.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/InvokeRestMethodCommand.CoreClr.cs index 7d0d6c2d3bf..8be7c98342a 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/InvokeRestMethodCommand.CoreClr.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/CoreCLR/InvokeRestMethodCommand.CoreClr.cs @@ -51,7 +51,7 @@ internal override void ProcessResponse(HttpResponseMessage response) object obj = null; Exception ex = null; - string str = StreamHelper.DecodeStream(responseStream, encoding); + string str = StreamHelper.DecodeStream(responseStream, ref encoding); bool convertSuccess = false; // On CoreCLR, we need to explicitly load Json.NET diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index 0c905c2f968..b4bf1efaa22 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -391,20 +391,8 @@ internal static void SaveStreamToFile(Stream stream, string filePath, PSCmdlet c } } - internal static string DecodeStream(Stream stream, string characterSet) + private static string StreamToString(Stream stream, Encoding encoding) { - Encoding encoding = ContentHelper.GetEncodingOrDefault(characterSet); - return DecodeStream(stream, encoding); - } - - internal static string DecodeStream(Stream stream, Encoding encoding) - { - if (null == encoding) - { - // just use the default encoding if one wasn't provided - encoding = ContentHelper.GetDefaultEncoding(); - } - StringBuilder result = new StringBuilder(capacity: ChunkSize); Decoder decoder = encoding.GetDecoder(); @@ -413,9 +401,8 @@ internal static string DecodeStream(Stream stream, Encoding encoding) { useBufferSize = encoding.GetMaxCharCount(10); } - char[] chars = new char[useBufferSize]; - + char[] chars = new char[useBufferSize]; byte[] bytes = new byte[useBufferSize * 4]; int bytesRead = 0; do @@ -444,12 +431,65 @@ internal static string DecodeStream(Stream stream, Encoding encoding) // Increment byteIndex to the next block of bytes in the input buffer, if any, to convert. byteIndex += bytesUsed; } - } - while (bytesRead != 0); + } while (bytesRead != 0); return result.ToString(); } + internal static string DecodeStream(Stream stream, string characterSet, out Encoding encoding) + { + try + { + encoding = Encoding.GetEncoding(characterSet); + } + catch (ArgumentException) + { + encoding = null; + } + return DecodeStream(stream, ref encoding); + } + + internal static string DecodeStream(Stream stream, ref Encoding encoding) + { + bool isDefaultEncoding = false; + if (null == encoding) + { + // Use the default encoding if one wasn't provided + encoding = ContentHelper.GetDefaultEncoding(); + isDefaultEncoding = true; + } + + string content = StreamToString (stream, encoding); + + if (isDefaultEncoding) do + { + // check for a charset meta element to override the default. + string searchString = " Date: Mon, 7 Aug 2017 13:34:21 -0700 Subject: [PATCH 2/6] Switch to regex for searching for meta/charset attribute Add negative test to validate handling of invalid/unsupported encodings in StreamHelper.DecodeStream --- .../utility/WebCmdlet/StreamHelper.cs | 52 +++++++++++-------- .../WebCmdlets.Tests.ps1 | 21 ++++++++ 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index b4bf1efaa22..7e86f411899 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -4,6 +4,7 @@ using System; using System.Text; +using System.Text.RegularExpressions; using System.IO; using System.IO.Compression; using System.Management.Automation; @@ -449,6 +450,23 @@ internal static string DecodeStream(Stream stream, string characterSet, out Enco return DecodeStream(stream, ref encoding); } + static bool TryGetEncoding(string characterSet, out Encoding encoding) + { + bool result = false; + try + { + encoding = Encoding.GetEncoding(characterSet); + result = true; + } + catch (ArgumentException) + { + encoding = null; + } + return result; + } + + static readonly Regex s_metaexp = new Regex(@" 1) { - Encoding localEncoding = Encoding.GetEncoding(characterSet); - stream.Seek(0, SeekOrigin.Begin); - content = StreamToString(stream, localEncoding); - // report the encoding used. - encoding = localEncoding; - } - catch (ArgumentException) - { - // don't propagate an invalid encoding string. + Encoding localEncoding = null; + string characterSet = match.Groups[1].Value; + + if (TryGetEncoding(characterSet, out localEncoding)) + { + stream.Seek(0, SeekOrigin.Begin); + content = StreamToString(stream, localEncoding); + // report the encoding used. + encoding = localEncoding; + } } } while (false); diff --git a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 index 74dc30608b6..5e934c91f36 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 @@ -865,6 +865,16 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject' } + + It "Verifies Invoke-WebRequest defaults to iso-8859-1 when an unsupported/invalid charset is declared" { + $output = '' + $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output" -UseBasicParsing + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject' + } } Context "HtmlWebResponseObject Encoding" { @@ -905,6 +915,17 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved. $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' } + + It "Verifies Invoke-WebRequest defaults to iso-8859-1 when an unsupported/invalid charset is declared" -Pending { + $output = '' + $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output" + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved. + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' + } } #endregion charset encoding tests From e5cfbb174670bca248bef32562ce8f9cb53a85fe Mon Sep 17 00:00:00 2001 From: dantraMSFT Date: Tue, 8 Aug 2017 12:20:13 -0700 Subject: [PATCH 3/6] Update tests to use encodings that are available on Linux --- .../WebCmdlets.Tests.ps1 | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 index 5e934c91f36..130a4a5fef5 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 @@ -835,8 +835,8 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { Context "BasicHtmlWebResponseObject Encoding tests" { It "Verifies Invoke-WebRequest detects charset meta value when the ContentType header does not define it." { - $output = '' - $expectedEncoding = [System.Text.Encoding]::GetEncoding('gb2312') + $output = '' + $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing $response.Error | Should BeNullOrEmpty @@ -845,7 +845,7 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { } It "Verifies Invoke-WebRequest ignores meta charset value when Content-Type header defines it." { - $output = '' + $output = '' # NOTE: meta charset should be ignored $expectedEncoding = [System.Text.Encoding]::UTF8 $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html; charset=utf-8&output=$output" -UseBasicParsing @@ -856,10 +856,10 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { } It "Verifies Invoke-WebRequest honors non-utf8 charsets in the Content-Type header" { - $output = '' + $output = '' # NOTE: meta charset should be ignored - $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-2022-jp') - $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html; charset=iso-2022-jp&output=$output" -UseBasicParsing + $expectedEncoding = [System.Text.Encoding]::GetEncoding('utf-16') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html; charset=utf-16&output=$output" -UseBasicParsing $response.Error | Should BeNullOrEmpty $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName @@ -881,8 +881,8 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { # these tests are dependent on https://github.com/PowerShell/PowerShell/issues/2867 # Currently, all paths return BasicHtmlWebResponseObject It "Verifies Invoke-WebRequest detects charset meta value when the ContentType header does not define it." -Pending { - $output = '' - $expectedEncoding = [System.Text.Encoding]::GetEncoding('gb2312') + $output = '' + $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" $response.Error | Should BeNullOrEmpty @@ -892,7 +892,7 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { } It "Verifies Invoke-WebRequest ignores meta charset value when Content-Type header defines it." -Pending { - $output = '' + $output = '' # NOTE: meta charset should be ignored $expectedEncoding = [System.Text.Encoding]::UTF8 # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved. @@ -905,10 +905,10 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { } It "Verifies Invoke-WebRequest honors non-utf8 charsets in the Content-Type header" -Pending { - $output = '' + $output = '' # NOTE: meta charset should be ignored - $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-2022-jp') - $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html; charset=iso-2022-jp&output=$output" + $expectedEncoding = [System.Text.Encoding]::GetEncoding('utf-16') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html; charset=utf-16&output=$output" $response.Error | Should BeNullOrEmpty $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName From d1a5b2b1db0fe11fca2a9fbd66fc74e09024086b Mon Sep 17 00:00:00 2001 From: Dan Travison Date: Tue, 8 Aug 2017 16:34:01 -0700 Subject: [PATCH 4/6] [Feature] Revise regex expression to be less restrictive of the charset attribute value and use a named group for the value. --- .../commands/utility/WebCmdlet/StreamHelper.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index 7e86f411899..57c4d51481d 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -465,7 +465,7 @@ static bool TryGetEncoding(string characterSet, out Encoding encoding) return result; } - static readonly Regex s_metaexp = new Regex(@".[^""']+)"); internal static string DecodeStream(Stream stream, ref Encoding encoding) { @@ -482,10 +482,10 @@ internal static string DecodeStream(Stream stream, ref Encoding encoding) { // check for a charset attribute on the meta element to override the default. Match match = s_metaexp.Match(content); - if (match.Success && match.Groups.Count > 1) + if (match.Success) { Encoding localEncoding = null; - string characterSet = match.Groups[1].Value; + string characterSet = match.Groups["charset"].Value; if (TryGetEncoding(characterSet, out localEncoding)) { From 4f4066b9e6c805ac18e2f389044918221e2722b4 Mon Sep 17 00:00:00 2001 From: Dan Travison Date: Wed, 9 Aug 2017 12:05:19 -0700 Subject: [PATCH 5/6] [Feature] Updated regex to handle http-equiv charset declarations Update tests to validate http-equiv charset declarations --- .../utility/WebCmdlet/StreamHelper.cs | 2 +- .../WebCmdlets.Tests.ps1 | 139 ++++++++++++++++++ 2 files changed, 140 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index 57c4d51481d..cc4f0cacfcd 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -465,7 +465,7 @@ static bool TryGetEncoding(string characterSet, out Encoding encoding) return result; } - static readonly Regex s_metaexp = new Regex(@".[^""']+)"); + static readonly Regex s_metaexp = new Regex(@"<]*charset\s*=\s*[""'\n]?(?[A-Za-z].[^\s""'\n<>]*)[\s""'\n>]"); internal static string DecodeStream(Stream stream, ref Encoding encoding) { diff --git a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 index 130a4a5fef5..089b3181450 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 @@ -844,6 +844,66 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject' } + It "Verifies Invoke-WebRequest detects charset meta value when newlines are encountered in the element." { + $output = @' + + + + + +'@ + $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject' + } + + It "Verifies Invoke-WebRequest detects charset meta value when the attribute value is unquoted." { + $output = '' + $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject' + } + + It "Verifies Invoke-WebRequest detects http-equiv charset meta value when the ContentType header does not define it." { + $output = @' + + + + +'@ + $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject' + } + + It "Verifies Invoke-WebRequest detects http-equiv charset meta value newlines are encountered in the element." { + $output = @' + + + + +'@ + $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject' + } + It "Verifies Invoke-WebRequest ignores meta charset value when Content-Type header defines it." { $output = '' # NOTE: meta charset should be ignored @@ -875,6 +935,21 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject' } + + It "Verifies Invoke-WebRequest defaults to iso-8859-1 when an unsupported/invalid charset is declared using http-equiv" { + $output = @' + + + + +'@ + $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output" -UseBasicParsing + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject' + } } Context "HtmlWebResponseObject Encoding" { @@ -891,6 +966,24 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' } + It "Verifies Invoke-WebRequest detects charset meta value when newlines are encountered in the element." -Pending { + $output = @' + + + + + +'@ + $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' + } + It "Verifies Invoke-WebRequest ignores meta charset value when Content-Type header defines it." -Pending { $output = '' # NOTE: meta charset should be ignored @@ -904,6 +997,38 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' } + It "Verifies Invoke-WebRequest detects http-equiv charset meta value when the ContentType header does not define it." -Pending { + $output = @' + + + + +'@ + $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' + } + + It "Verifies Invoke-WebRequest detects http-equiv charset meta value newlines are encountered in the element." -Pending { + $output = @' + + + + +'@ + $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' + } + It "Verifies Invoke-WebRequest honors non-utf8 charsets in the Content-Type header" -Pending { $output = '' # NOTE: meta charset should be ignored @@ -926,6 +1051,20 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved. $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' } + It "Verifies Invoke-WebRequest defaults to iso-8859-1 when an unsupported/invalid charset is declared using http-equiv" -Pending { + $output = @' + + + + +'@ + $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1') + $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output" + + $response.Error | Should BeNullOrEmpty + $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName + $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' + } } #endregion charset encoding tests From 09e7a46a0722e5345162f670bf11b0ebf0577924 Mon Sep 17 00:00:00 2001 From: Dan Travison Date: Wed, 9 Aug 2017 13:09:14 -0700 Subject: [PATCH 6/6] [Heading] Strip trailing whitespace. --- .../WebCmdlets.Tests.ps1 | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 index 089b3181450..638f94f1dd0 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Utility/WebCmdlets.Tests.ps1 @@ -248,9 +248,9 @@ function ExecuteRequestWithCustomUserAgent { try { $Params = @{ - Uri = $Uri - TimeoutSec = 5 - UserAgent = $UserAgent + Uri = $Uri + TimeoutSec = 5 + UserAgent = $UserAgent SkipHeaderValidation = $SkipHeaderValidation.IsPresent } if ($Cmdlet -eq 'Invoke-WebRequest') { @@ -848,7 +848,7 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { $output = @' - @@ -877,7 +877,7 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { - + '@ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing @@ -890,11 +890,11 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { It "Verifies Invoke-WebRequest detects http-equiv charset meta value newlines are encountered in the element." { $output = @' - - + '@ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" -UseBasicParsing @@ -941,7 +941,7 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { - + '@ $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1') $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output" -UseBasicParsing @@ -970,7 +970,7 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { $output = @' - @@ -1002,7 +1002,7 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { - + '@ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" @@ -1015,11 +1015,11 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { It "Verifies Invoke-WebRequest detects http-equiv charset meta value newlines are encountered in the element." -Pending { $output = @' - - + '@ $expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode') $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&output=$output" @@ -1028,7 +1028,7 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { $response.Output.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' } - + It "Verifies Invoke-WebRequest honors non-utf8 charsets in the Content-Type header" -Pending { $output = '' # NOTE: meta charset should be ignored @@ -1051,12 +1051,13 @@ Describe "Invoke-WebRequest tests" -Tags "Feature" { # Update to test for HtmlWebResponseObject when mshtl dependency has been resolved. $response.Output | Should BeOfType 'Microsoft.PowerShell.Commands.HtmlWebResponseObject' } + It "Verifies Invoke-WebRequest defaults to iso-8859-1 when an unsupported/invalid charset is declared using http-equiv" -Pending { $output = @' - + '@ $expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1') $response = ExecuteWebRequest -Uri "http://localhost:8080/PowerShell?test=response&contenttype=text/html&output=$output"