This might be of interest for those for those who want RFC2396 compliant parsing code
/** The Universal Resource Location implementation.
This is based on http://www.ietf.org/rfc/rfc2396.txt
*/
class URL
{
// Type used in this class
public:
/** String class to use */
typedef ::String String;
// Members
protected:
/** The scheme, also called protocol */
String scheme;
/** The authority, usually called the server */
String authority;
/** The path to the resource */
String path;
/** The query */
String query;
/** The fragment to reach */
String fragment;
// Helpers
private:
/** Split a text based URI */
bool splitURI(const String & inputURL);
/** Normalize a given path */
void normalizePath(String & pathToNormalize) const;
// Interface
public:
/** Check if this URL is valid */
inline bool isValid() { return authority.isNotEmpty(); }
/** Construct a text from this URL
@param defaultScheme The default scheme if missing */
inline String asURI(const String & defaultScheme = "") const
{
String schemeTmp = scheme.isNotEmpty() ? scheme : defaultScheme;
schemeTmp += "://" + authority + ((path.isNotEmpty() && path[0] != '/') ? "/" : "") + path;
if (query.isNotEmpty()) schemeTmp += "?" + query;
if (fragment.isNotEmpty()) schemeTmp += "#" + fragment;
return schemeTmp;
}
/** Append path from the given path */
inline URL appendRelativePath(String newPath) const
{
URL ret(scheme, authority, "");
// Check if new path contain a fragment or a query
if (newPath.contains(JUCE_T("#")))
{
ret.fragment = newPath.fromLastOccurrenceOf(JUCE_T("#"), false, false);
newPath = newPath.upToLastOccurrenceOf(JUCE_T("#"), false, false);
}
if (newPath.contains(JUCE_T("?")))
{
ret.query = newPath.fromLastOccurrenceOf(JUCE_T("?"), false, false);
newPath = newPath.upToLastOccurrenceOf(JUCE_T("?"), false, false);
}
// Let's first normalize newPath
// First remove any /./ in path
newPath.replace(JUCE_T("/./"), JUCE_T("/"), false);
// If the newPath start by /, just replace it
if (newPath[0] == JUCE_T('/')) { ret.path = newPath; normalizePath(ret.path); return ret; }
// Check if this path points to a file
int lastSlashInNewPath = newPath.lastIndexOfChar(JUCE_T('/')) + 1;
bool isFile = newPath.indexOfChar(lastSlashInNewPath, JUCE_T('.')) != -1;
// Need to first split the current path to trim any remaining filename
String curPath = path;
int lastSlash = curPath.lastIndexOfChar(JUCE_T('/'));
if (lastSlash != -1) curPath = curPath.substring(0, lastSlash);
// Need to count how many '../' this path contains
// For each one, we have to go up one level in the current path
int upPos = newPath.indexOf(JUCE_T("../"));
while (upPos != -1)
{
// Remove one level of the current path
lastSlash = curPath.lastIndexOfChar(JUCE_T('/'));
if (lastSlash != -1) curPath = curPath.substring(0, lastSlash);
newPath = newPath.substring(upPos + 3, newPath.length());
upPos = newPath.indexOf(JUCE_T("../"));
}
// Then concatenate
ret.path = curPath + (curPath.length() ? JUCE_T("/") : JUCE_T("")) + newPath;
ret.path += (!isFile && ret.path[ret.path.length() - 1] != JUCE_T('/')) ? JUCE_T("/") : JUCE_T("");
if (ret.path[0] != JUCE_T('/')) ret.path = "/" + ret.path;
// Normalize any /./ in path
normalizePath(ret.path);
return ret;
}
/** Escape an URL to only allowed chars */
static String escapedURI(const String & inputURL);
/** Strip port information from authority and return it if known */
inline uint16 stripPortFromAuthority(uint16 defaultPortValue)
{
int portPos = authority.lastIndexOfChar(JUCE_T(':'));
if (portPos != -1)
{
String portValue = authority.substring(portPos+1, authority.length());
authority = authority.substring(0, portPos);
portPos = portValue.getIntValue() ? portValue.getIntValue() : defaultPortValue;
if (portPos < 0) portPos = 0;
if (portPos > 65535) portPos = 65535;
return (uint16)portPos;
}
return defaultPortValue;
}
// Accessor
public:
/** Get the current authority */
inline const String & getAuthority() const { return authority; }
/** Get the current scheme */
inline const String & getScheme() const { return scheme; }
/** Get the current path */
inline const String & getPath() const { return path; }
/** Get the current query */
inline const String & getQuery() const { return query; }
/** Get the current fragment */
inline const String & getFragment() const { return fragment; }
public:
/** Default, and invalid constructor */
URL() {}
/** Construct an URL from a UTF8 text */
URL(const String & inputURL, const String & defaultScheme = "")
{
splitURI(inputURL);
if (!scheme.isNotEmpty()) scheme = defaultScheme;
}
/** Construct an URL from its part */
URL(const String & _scheme, const String & _authority, const String & _path, const String & _query = "", const String & _fragment = "")
: scheme(_scheme), authority(_authority), path(_path), query(_query), fragment(_fragment) { }
};
and the code:
#include "../../include/Network/URLHandler.hpp"
namespace Network
{
#define isIn(X, Y) _isIn(X, sizeof(X) / sizeof(X[0]), Y)
inline bool _isIn(const char * array, const unsigned int len, tchar ch)
{
for (unsigned int i = 0; i < len; ++i)
if (array[i] == ch) return true;
return false;
}
inline bool isHex(const char a) { return (a >= '0' && a <= '9') || (a >= 'a' && a <= 'f') || (a >= 'A' && a <= 'F'); }
static const char unreserved[] = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'-', '_', '.', '!', '~', '*', '\'', '(', ')' };
static const char less_reserved[] = { ';', ':', '@', '&', '=', '+', '$', ',' };
static const char reserved[] = { ';', '/', '?', ':', '@', '&', '=', '+', '$', ',' };
URL::String URL::escapedURI(const URL::String & inputURL)
{
String tmp = inputURL.trim();
String ret;
for (int i = 0; i < tmp.length(); i++)
{
if (isIn(unreserved, tmp[i]) || isIn(less_reserved, tmp[i]) || isIn(reserved, tmp[i]) )
{
ret += tmp[i];
} else
{
ret += String::formatted(JUCE_T("%%%02x"), (unsigned char)tmp[i]);
}
}
return ret;
}
bool URL::splitURI(const URL::String & inputURL)
{
// Based on http://www.ietf.org/rfc/rfc2396.txt
const char * input = (const char*)inputURL.toUTF8();
const int32 length = (int32)inputURL.length();
static const char escaped[] = { '%', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f' };
static const char excluded[] = { '{', '}', '|', '\\', '^', '[', ']', '`' };
static const char breakScheme[] = { ':', '/', '?', '#' };
int i = 0;
int scheme_e = 0, auth_s = 0, auth_e = 0, path_s = 0, path_e = 0, query_s = 0, query_e = 0, frag_s = 0, frag_e = 0;
int absoluteURI = 0, hier_part = 0, opaque_part = 0, net_path = 0, relativeURI = 0;
// First loop on data, validate char and determine if URI is absolute or relative
while (i < length)
{ // Disallowed characters
if (input[i] < 0x21 || (unsigned char)input[i] > 0x7F) return false;
if (input[i] == '<' || input[i] == '>' || input[i] == '"') return false;
if (input[i] == '%')
{
if ((i + 2 > length || !isHex(input[i+1]) || !isHex(input[i+2]))) return false;
i+= 2; continue;
}
if (isIn(excluded, input[i])) return false;
if (!absoluteURI && isIn(breakScheme, input[i]))
{
if (input[i] == ':')
{
scheme_e = i; absoluteURI = 1;
if (i + 1 < length && input[i+1] == '/') { hier_part = 1; ++i; }
else if (i + 1 < length && input[i+1] != '/') { opaque_part = 1; ++i; }
}
else { absoluteURI = 0; relativeURI = 1; }
++i; continue;
}
++i;
}
i = 0;
if (absoluteURI)
{
if (scheme_e) i = scheme_e + 1;
if (i+1 < length)
{
if (hier_part)
{
if (input[i+1] == '/')
{
net_path = 1;
i += 2;
auth_s = i;
// Read the authority now
while (i < length && (isIn(unreserved, input[i]) || input[i] == '%' || isIn(less_reserved, input[i]))) ++i;
auth_e = i;
}
// Read the path now if any existing
if (i < length && input[i] == '/')
{
// Path starting
path_s = i;
while (i < length && input[i] != '?' && input[i] != '#') ++i;
path_e = i;
// If there is a query read it
if (input[i] == '?')
{ query_s = i+1;
++i;
while (i < length && input[i] != '#') ++i;
query_e = i;
}
}
} else if (opaque_part)
{
// Read the path now
path_s = i;
while (i < length && (isIn(unreserved, input[i]) || input[i] == '%' || isIn(reserved, input[i]))) ++i;
path_e = i;
// No query to read
}
// Go to fragment parsing
}
} else if (relativeURI)
{
if (i+1 < length && input[i] == '/' && input[i+1] == '/')
{
net_path = 1; ++i;
auth_s = i;
// Read the authority now
while (i < length && (isIn(unreserved, input[i]) || input[i] == '%' || isIn(less_reserved, input[i]))) ++i;
auth_e = i;
// Read the path now if any existing
if (i < length && input[i] == '/')
{
// Path starting
path_s = i;
while (i < length && input[i] != '?' && input[i] != '#') ++i;
path_e = i;
// If there is a query read it
if (i < length && input[i] == '?')
{ query_s = i+1;
++i;
while (i < length && input[i] != '#') ++i;
query_e = i;
}
}
} else if (i < length)
{
// Read the path now
path_s = i;
while (i < length && (isIn(unreserved, input[i]) || input[i] == '%' || isIn(reserved, input[i])) && input[i] != '?') ++i;
path_e = i;
if (i < length && input[i] == '?')
{ query_s = i+1;
++i;
while (i < length && input[i] != '#') ++i;
query_e = i;
}
// Go to fragment parsing
}
}
// Parse the fragment
if (i < length && input[i] == '#')
{
frag_s = i+1;
frag_e = length;
}
scheme = scheme_e ? String(input, scheme_e) : "";
authority = auth_e ? String(&input[auth_s], auth_e - auth_s) : "";
path = path_e ? String(&input[path_s], path_e - path_s) : "";
query = query_e ? String(&input[query_s], query_e - query_s) : "";
fragment = frag_e ? String(&input[frag_s], frag_e - frag_s) : "";
return true;
}
#undef isIn
void URL::normalizePath(String & pathToNormalize) const
{
String outputStack;
while (pathToNormalize.length())
{
if (pathToNormalize.substring(0, 3) == JUCE_T("../")) pathToNormalize = pathToNormalize.substring(3, pathToNormalize.length());
else if (pathToNormalize.substring(0, 2) == JUCE_T("./")) pathToNormalize = pathToNormalize.substring(2, pathToNormalize.length());
else if (pathToNormalize.substring(0, 3) == JUCE_T("/./")) pathToNormalize = JUCE_T("/") + pathToNormalize.substring(3, pathToNormalize.length());
else if (pathToNormalize == JUCE_T("/.")) pathToNormalize = JUCE_T("/") + pathToNormalize.substring(2, pathToNormalize.length());
else if (pathToNormalize.substring(0, 3) == JUCE_T("/..") || pathToNormalize == JUCE_T("/../"))
{
pathToNormalize = JUCE_T("/") + pathToNormalize.substring(4, pathToNormalize.length());
int lastSegmentPos = outputStack.lastIndexOfChar(JUCE_T('/'));
if (lastSegmentPos != -1) outputStack = outputStack.substring(0, lastSegmentPos);
}
else if (pathToNormalize.containsOnly(JUCE_T("."))) pathToNormalize = JUCE_T("");
else
{
int firstSlash = pathToNormalize.indexOfChar(JUCE_T('/'));
if (firstSlash == 0) firstSlash = pathToNormalize.indexOfChar(JUCE_T('/'));
if (firstSlash == -1) { outputStack += pathToNormalize; pathToNormalize = JUCE_T(""); }
else
{
outputStack += pathToNormalize.substring(0, firstSlash);
pathToNormalize = pathToNormalize.substring(firstSlash, pathToNormalize.length());
}
}
}
pathToNormalize = outputStack;
}
}