Handle period at end of URL
Fix parsing URL when encountering a period at the end of the url by setting it as disallowed from being present at the end of a URL. Some characters are disallowed to be present at the end of URLs. Presently, the period character is the only disallowed character. A character is the last character in the URL if it is followed by is_whitespace() or if it's the last character in the string. Closes: https://github.com/damus-io/damus/issues/1638 LNURL1DP68GURN8GHJ7EM9W3SKCCNE9E3K7MF0D3H82UNVWQHKWUN9V4HXGCTHDC6RZVGR8SW3G Signed-off-by: kernelkind <kernelkind@gmail.com> Reviewed-by: William Casarin <jb55@jb55.com> Signed-off-by: William Casarin <jb55@jb55.com>
This commit is contained in:
committed by
William Casarin
parent
f6044a9eea
commit
e547e26d99
@@ -489,6 +489,32 @@ static inline int is_whitespace(int c) {
|
||||
return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
|
||||
}
|
||||
|
||||
|
||||
static inline int next_char_is_whitespace(unsigned char *curChar, unsigned char *endChar) {
|
||||
unsigned char * next = curChar + 1;
|
||||
if(next > endChar) return 0;
|
||||
else if(next == endChar) return 1;
|
||||
return is_whitespace(*next);
|
||||
}
|
||||
|
||||
static int char_disallowed_at_end_url(char c){
|
||||
return c == '.';
|
||||
}
|
||||
|
||||
static inline int is_final_url_char(unsigned char *curChar, unsigned char *endChar){
|
||||
if(is_whitespace(*curChar)){
|
||||
return 1;
|
||||
}
|
||||
else if(next_char_is_whitespace(curChar, endChar)) {
|
||||
// next char is whitespace so this char could be the final char in the url
|
||||
return char_disallowed_at_end_url(*curChar);
|
||||
}
|
||||
else{
|
||||
// next char isn't whitespace so it can't be a final char
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int is_underscore(int c) {
|
||||
return c == '_';
|
||||
}
|
||||
@@ -670,6 +696,23 @@ static inline int consume_until_whitespace(struct cursor *cur, int or_end) {
|
||||
return or_end;
|
||||
}
|
||||
|
||||
static inline int consume_until_end_url(struct cursor *cur, int or_end) {
|
||||
char c;
|
||||
int consumedAtLeastOne = 0;
|
||||
|
||||
while (cur->p < cur->end) {
|
||||
c = *cur->p;
|
||||
|
||||
if (is_final_url_char(cur->p, cur->end))
|
||||
return consumedAtLeastOne;
|
||||
|
||||
cur->p++;
|
||||
consumedAtLeastOne = 1;
|
||||
}
|
||||
|
||||
return or_end;
|
||||
}
|
||||
|
||||
static inline int consume_until_non_alphanumeric(struct cursor *cur, int or_end) {
|
||||
char c;
|
||||
int consumedAtLeastOne = 0;
|
||||
|
||||
@@ -117,7 +117,7 @@ static int consume_url_fragment(struct cursor *cur)
|
||||
|
||||
cur->p++;
|
||||
|
||||
return consume_until_whitespace(cur, 1);
|
||||
return consume_until_end_url(cur, 1);
|
||||
}
|
||||
|
||||
static int consume_url_path(struct cursor *cur)
|
||||
@@ -134,7 +134,7 @@ static int consume_url_path(struct cursor *cur)
|
||||
while (cur->p < cur->end) {
|
||||
c = *cur->p;
|
||||
|
||||
if (c == '?' || c == '#' || is_whitespace(c)) {
|
||||
if (c == '?' || c == '#' || is_final_url_char(cur->p, cur->end)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -152,7 +152,7 @@ static int consume_url_host(struct cursor *cur)
|
||||
while (cur->p < cur->end) {
|
||||
c = *cur->p;
|
||||
// TODO: handle IDNs
|
||||
if (is_alphanumeric(c) || c == '.' || c == '-')
|
||||
if ((is_alphanumeric(c) || c == '.' || c == '-') && !is_final_url_char(cur->p, cur->end))
|
||||
{
|
||||
count++;
|
||||
cur->p++;
|
||||
|
||||
Reference in New Issue
Block a user