Add cross-platform implementation of cellHttpUtilParseUri

This commit is contained in:
Zion Nimchuk 2017-08-02 13:36:44 -07:00 committed by Ivan
parent 8ad44dd00f
commit cc880b53aa
4 changed files with 407 additions and 51 deletions

265
Utilities/LUrlParser.cpp Normal file
View File

@ -0,0 +1,265 @@
/*
* Lightweight URL & URI parser (RFC 1738, RFC 3986)
* https://github.com/corporateshark/LUrlParser
*
* The MIT License (MIT)
*
* Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "LUrlParser.h"
#include <algorithm>
#include <cstring>
#include <stdlib.h>
// check if the scheme name is valid
static bool IsSchemeValid( const std::string& SchemeName )
{
for ( auto c : SchemeName )
{
if ( !isalpha( c ) && c != '+' && c != '-' && c != '.' ) return false;
}
return true;
}
bool LUrlParser::clParseURL::GetPort( int* OutPort ) const
{
if ( !IsValid() ) { return false; }
int Port = atoi( m_Port.c_str() );
if ( Port <= 0 || Port > 65535 ) { return false; }
if ( OutPort ) { *OutPort = Port; }
return true;
}
// based on RFC 1738 and RFC 3986
LUrlParser::clParseURL LUrlParser::clParseURL::ParseURL( const std::string& URL )
{
LUrlParser::clParseURL Result;
const char* CurrentString = URL.c_str();
/*
* <scheme>:<scheme-specific-part>
* <scheme> := [a-z\+\-\.]+
* For resiliency, programs interpreting URLs should treat upper case letters as equivalent to lower case in scheme names
*/
// try to read scheme
{
const char* LocalString = strchr( CurrentString, ':' );
if ( !LocalString )
{
return clParseURL( LUrlParserError_NoUrlCharacter );
}
// save the scheme name
Result.m_Scheme = std::string( CurrentString, LocalString - CurrentString );
if ( !IsSchemeValid( Result.m_Scheme ) )
{
return clParseURL( LUrlParserError_InvalidSchemeName );
}
// scheme should be lowercase
std::transform( Result.m_Scheme.begin(), Result.m_Scheme.end(), Result.m_Scheme.begin(), ::tolower );
// skip ':'
CurrentString = LocalString+1;
}
/*
* //<user>:<password>@<host>:<port>/<url-path>
* any ":", "@" and "/" must be normalized
*/
// skip "//"
if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash );
if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash );
// check if the user name and password are specified
bool bHasUserName = false;
const char* LocalString = CurrentString;
while ( *LocalString )
{
if ( *LocalString == '@' )
{
// user name and password are specified
bHasUserName = true;
break;
}
else if ( *LocalString == '/' )
{
// end of <host>:<port> specification
bHasUserName = false;
break;
}
LocalString++;
}
// user name and password
LocalString = CurrentString;
if ( bHasUserName )
{
// read user name
while ( *LocalString && *LocalString != ':' && *LocalString != '@' ) LocalString++;
Result.m_UserName = std::string( CurrentString, LocalString - CurrentString );
// proceed with the current pointer
CurrentString = LocalString;
if ( *CurrentString == ':' )
{
// skip ':'
CurrentString++;
// read password
LocalString = CurrentString;
while ( *LocalString && *LocalString != '@' ) LocalString++;
Result.m_Password = std::string( CurrentString, LocalString - CurrentString );
CurrentString = LocalString;
}
// skip '@'
if ( *CurrentString != '@' )
{
return clParseURL( LUrlParserError_NoAtSign );
}
CurrentString++;
}
bool bHasBracket = ( *CurrentString == '[' );
// go ahead, read the host name
LocalString = CurrentString;
while ( *LocalString )
{
if ( bHasBracket && *LocalString == ']' )
{
// end of IPv6 address
LocalString++;
break;
}
else if ( !bHasBracket && ( *LocalString == ':' || *LocalString == '/' ) )
{
// port number is specified
break;
}
LocalString++;
}
Result.m_Host = std::string( CurrentString, LocalString - CurrentString );
CurrentString = LocalString;
// is port number specified?
if ( *CurrentString == ':' )
{
CurrentString++;
// read port number
LocalString = CurrentString;
while ( *LocalString && *LocalString != '/' ) LocalString++;
Result.m_Port = std::string( CurrentString, LocalString - CurrentString );
CurrentString = LocalString;
}
// end of string
if ( !*CurrentString )
{
Result.m_ErrorCode = LUrlParserError_Ok;
return Result;
}
// skip '/'
if ( *CurrentString != '/' )
{
return clParseURL( LUrlParserError_NoSlash );
}
CurrentString++;
// parse the path
LocalString = CurrentString;
while ( *LocalString && *LocalString != '#' && *LocalString != '?' ) LocalString++;
Result.m_Path = std::string( CurrentString, LocalString - CurrentString );
CurrentString = LocalString;
// check for query
if ( *CurrentString == '?' )
{
// skip '?'
CurrentString++;
// read query
LocalString = CurrentString;
while ( *LocalString && *LocalString != '#' ) LocalString++;
Result.m_Query = std::string( CurrentString, LocalString - CurrentString );
CurrentString = LocalString;
}
// check for fragment
if ( *CurrentString == '#' )
{
// skip '#'
CurrentString++;
// read fragment
LocalString = CurrentString;
while ( *LocalString ) LocalString++;
Result.m_Fragment = std::string( CurrentString, LocalString - CurrentString );
CurrentString = LocalString;
}
Result.m_ErrorCode = LUrlParserError_Ok;
return Result;
}

78
Utilities/LUrlParser.h Normal file
View File

@ -0,0 +1,78 @@
/*
* Lightweight URL & URI parser (RFC 1738, RFC 3986)
* https://github.com/corporateshark/LUrlParser
*
* The MIT License (MIT)
*
* Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include <string>
namespace LUrlParser
{
enum LUrlParserError
{
LUrlParserError_Ok = 0,
LUrlParserError_Uninitialized = 1,
LUrlParserError_NoUrlCharacter = 2,
LUrlParserError_InvalidSchemeName = 3,
LUrlParserError_NoDoubleSlash = 4,
LUrlParserError_NoAtSign = 5,
LUrlParserError_UnexpectedEndOfLine = 6,
LUrlParserError_NoSlash = 7,
};
class clParseURL
{
public:
LUrlParserError m_ErrorCode;
std::string m_Scheme;
std::string m_Host;
std::string m_Port;
std::string m_Path;
std::string m_Query;
std::string m_Fragment;
std::string m_UserName;
std::string m_Password;
clParseURL()
: m_ErrorCode( LUrlParserError_Uninitialized )
{}
/// return 'true' if the parsing was successful
bool IsValid() const { return m_ErrorCode == LUrlParserError_Ok; }
/// helper to convert the port number to int, return 'true' if the port is valid (within the 0..65535 range)
bool GetPort( int* OutPort ) const;
/// parse the URL
static clParseURL ParseURL( const std::string& URL );
private:
explicit clParseURL( LUrlParserError ErrorCode )
: m_ErrorCode( ErrorCode )
{}
};
} // namespace LUrlParser

View File

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "Emu/Cell/PPUModule.h"
#include "Utilities/LUrlParser.h"
#include "cellHttpUtil.h"
@ -14,50 +15,15 @@ logs::channel cellHttpUtil("cellHttpUtil");
s32 cellHttpUtilParseUri(vm::ptr<CellHttpUri> uri, vm::cptr<char> str, vm::ptr<void> pool, u32 size, vm::ptr<u32> required)
{
#ifdef _WIN32
URL_COMPONENTS stUrlComp;
ZeroMemory(&stUrlComp, sizeof(URL_COMPONENTS));
stUrlComp.dwStructSize = sizeof(URL_COMPONENTS);
wchar_t lpszScheme[MAX_PATH] = { 0 };
wchar_t lpszHostName[MAX_PATH] = { 0 };
wchar_t lpszPath[MAX_PATH] = { 0 };
wchar_t lpszUserName[MAX_PATH] = { 0 };
wchar_t lpszPassword[MAX_PATH] = { 0 };
stUrlComp.lpszScheme = lpszScheme;
stUrlComp.dwSchemeLength = MAX_PATH;
stUrlComp.lpszHostName = lpszHostName;
stUrlComp.dwHostNameLength = MAX_PATH;
stUrlComp.lpszUrlPath = lpszPath;
stUrlComp.dwUrlPathLength = MAX_PATH;
stUrlComp.lpszUserName = lpszUserName;
stUrlComp.dwUserNameLength = MAX_PATH;
stUrlComp.lpszPassword = lpszPassword;
stUrlComp.dwPasswordLength = MAX_PATH;
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
LPCWSTR stupidTypeUrlString = converter.from_bytes(str.get_ptr()).c_str();
if (!::WinHttpCrackUrl(stupidTypeUrlString, (DWORD)(LONG_PTR)wcslen(stupidTypeUrlString), ICU_ESCAPE, &stUrlComp))
cellHttpUtil.trace("cellHttpUtilParseUri(uri=*0x%x, str=%s, pool=*0x%x, size=%d, required=*0x%x)", uri, str, pool, size, required);
LUrlParser::clParseURL URL = LUrlParser::clParseURL::ParseURL(str.get_ptr());
if ( URL.IsValid() )
{
cellHttpUtil.error("Error %u in WinHttpCrackUrl.\n", GetLastError());
}
else
{
std::string scheme = converter.to_bytes(lpszScheme);
std::string host = converter.to_bytes(lpszHostName);
std::string path = converter.to_bytes(lpszPath);
std::string username = converter.to_bytes(lpszUserName);
std::string password = converter.to_bytes(lpszPassword);
std::string scheme = URL.m_Scheme;
std::string host = URL.m_Host;
std::string path = URL.m_Path;
std::string username = URL.m_UserName;
std::string password = URL.m_Password;
u32 schemeOffset = 0;
u32 hostOffset = scheme.length() + 1;
@ -67,10 +33,13 @@ s32 cellHttpUtilParseUri(vm::ptr<CellHttpUri> uri, vm::cptr<char> str, vm::ptr<v
u32 totalSize = passwordOffset + password.length() + 1;
//called twice, first to setup pool, then to populate.
if (!uri) {
if (!uri)
{
*required = totalSize;
return CELL_OK;
} else {
}
else
{
std::strncpy((char*)vm::base(pool.addr() + schemeOffset), (char*)scheme.c_str(), scheme.length() + 1);
std::strncpy((char*)vm::base(pool.addr() + hostOffset), (char*)host.c_str(), host.length() + 1);
std::strncpy((char*)vm::base(pool.addr() + pathOffset), (char*)path.c_str(), path.length() + 1);
@ -82,14 +51,55 @@ s32 cellHttpUtilParseUri(vm::ptr<CellHttpUri> uri, vm::cptr<char> str, vm::ptr<v
uri->path.set(pool.addr() + pathOffset);
uri->username.set(pool.addr() + usernameOffset);
uri->password.set(pool.addr() + passwordOffset);
uri->port = stUrlComp.nPort;
}
}
#else
cellHttpUtil.todo("cellHttpUtilParseUri(uri=*0x%x, str=%s, pool=*0x%x, size=%d, required=*0x%x)", uri, str, pool, size, required);
#endif
if (URL.m_Port != "")
{
int port = stoi(URL.m_Port);
uri->port = port;
}
else
{
uri->port = (u32)80;
}
return CELL_OK;
}
}
else
{
std::string parseError;
switch(URL.m_ErrorCode)
{
case LUrlParser::LUrlParserError_Ok:
parseError = "No error, URL was parsed fine";
break;
case LUrlParser::LUrlParserError_Uninitialized:
parseError = "Error, LUrlParser is uninitialized";
break;
case LUrlParser::LUrlParserError_NoUrlCharacter:
parseError = "Error, the URL has invalid characters";
break;
case LUrlParser::LUrlParserError_InvalidSchemeName:
parseError = "Error, the URL has an invalid scheme";
break;
case LUrlParser::LUrlParserError_NoDoubleSlash:
parseError = "Error, the URL did not contain a double slash";
break;
case LUrlParser::LUrlParserError_NoAtSign:
parseError = "Error, the URL did not contain an @ sign";
break;
case LUrlParser::LUrlParserError_UnexpectedEndOfLine:
parseError = "Error, unexpectedly got the end of the line";
break;
case LUrlParser::LUrlParserError_NoSlash:
parseError = "Error, URI didn't contain a slash";
break;
default:
parseError = "Error, unkown error #" + std::to_string(static_cast<int>(URL.m_ErrorCode));
break;
}
cellHttpUtil.error("%s, while parsing URI, %s.", parseError, str.get_ptr());
return -1;
}
}
s32 cellHttpUtilParseUriPath(vm::ptr<CellHttpUriPath> path, vm::cptr<char> str, vm::ptr<void> pool, u32 size, vm::ptr<u32> required)

View File

@ -91,6 +91,9 @@
<ClCompile Include="..\Utilities\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Utilities\LUrlParser.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Utilities\Config.cpp" />
<ClCompile Include="..\Utilities\mutex.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>