Skip to content

⚡️ Speed up parse_url() by 57% in sentry_sdk/utils.py#15

Open
codeflash-ai[bot] wants to merge 1 commit intomasterfrom
codeflash/optimize-parse_url-2024-06-18T23.54.48
Open

⚡️ Speed up parse_url() by 57% in sentry_sdk/utils.py#15
codeflash-ai[bot] wants to merge 1 commit intomasterfrom
codeflash/optimize-parse_url-2024-06-18T23.54.48

Conversation

@codeflash-ai
Copy link
Copy Markdown

@codeflash-ai codeflash-ai Bot commented Jun 18, 2024

📄 parse_url() in sentry_sdk/utils.py

📈 Performance improved by 57% (0.57x faster)

⏱️ Runtime went down from 5.36 milliseconds to 3.42 milliseconds

Explanation and details

Certainly! To optimize the program, we'll ensure efficient use of data structures and remove any redundant operations. Here's the optimized version.

Optimizations performed.

  1. String Operations: Optimized string operations by simplifying conditions and removing redundant computations.
  2. Direct Variable Assignments: Directly assigned the parsed query component instead of manipulating intermediary steps.
  3. Eliminating Redundant Lookups: We used direct access and found ways to avoid repeated lookups or operations.

These changes make the code easier to read and improve its runtime efficiency by minimizing unnecessary computations and lookups.

Correctness verification

The new optimized code was tested for correctness. The results are listed below.

✅ 157 Passed − ⚙️ Existing Unit Tests

(click to show existing tests)
- test_utils.py

✅ 14 Passed − 🌀 Generated Regression Tests

(click to show generated tests)
# imports
from collections import namedtuple
from urllib.parse import parse_qs, unquote, urlencode, urlsplit, urlunsplit

import pytest  # used for our unit tests

# function to test
SENSITIVE_DATA_SUBSTITUTE = "SENSITIVE_DATA_SUBSTITUTE"

def sanitize_url(url, remove_authority=True, remove_query_values=True, split=False):
    # type: (str, bool, bool, bool) -> Union[str, Components]
    """
    Removes the authority and query parameter values from a given URL.
    """
    parsed_url = urlsplit(url)
    query_params = parse_qs(parsed_url.query, keep_blank_values=True)

    # strip username:password (netloc can be usr:[email protected])
    if remove_authority:
        netloc_parts = parsed_url.netloc.split("@")
        if len(netloc_parts) > 1:
            netloc = "%s:%s@%s" % (
                SENSITIVE_DATA_SUBSTITUTE,
                SENSITIVE_DATA_SUBSTITUTE,
                netloc_parts[-1],
            )
        else:
            netloc = parsed_url.netloc
    else:
        netloc = parsed_url.netloc

    # strip values from query string
    if remove_query_values:
        query_string = unquote(
            urlencode({key: SENSITIVE_DATA_SUBSTITUTE for key in query_params})
        )
    else:
        query_string = parsed_url.query

    components = Components(
        scheme=parsed_url.scheme,
        netloc=netloc,
        query=query_string,
        path=parsed_url.path,
        fragment=parsed_url.fragment,
    )

    if split:
        return components
    else:
        return urlunsplit(components)

Components = namedtuple("Components", ["scheme", "netloc", "path", "query", "fragment"])

ParsedUrl = namedtuple("ParsedUrl", ["url", "query", "fragment"])
from sentry_sdk.utils import parse_url

# unit tests

def test_standard_url():
    url = "http://example.com/path?query=value#fragment"
    result = parse_url(url, sanitize=False)
    assert result.url == "http://example.com/path"
    assert result.query == "query=value"
    assert result.fragment == "fragment"

def test_standard_url_with_sanitization():
    url = "http://example.com/path?query=value#fragment"
    result = parse_url(url, sanitize=True)
    assert result.url == "http://example.com/path"
    assert result.query == "query=SENSITIVE_DATA_SUBSTITUTE"
    assert result.fragment == "fragment"

def test_url_with_username_password():
    url = "http://user:[email protected]/path?query=value#fragment"
    result = parse_url(url, sanitize=False)
    assert result.url == "http://example.com/path"
    assert result.query == "query=value"
    assert result.fragment == "fragment"

def test_empty_url():
    url = ""
    result = parse_url(url, sanitize=False)
    assert result.url == ""
    assert result.query == ""
    assert result.fragment == ""

def test_url_without_scheme():
    url = "example.com/path?query=value#fragment"
    result = parse_url(url, sanitize=False)
    assert result.url == "example.com/path"
    assert result.query == "query=value"
    assert result.fragment == "fragment"

def test_url_with_unusual_scheme():
    url = "ftp://example.com/resource"
    result = parse_url(url, sanitize=False)
    assert result.url == "ftp://example.com/resource"
    assert result.query == ""
    assert result.fragment == ""

def test_url_with_special_characters_in_query():
    url = "http://example.com/path?query=value%20with%20spaces"
    result = parse_url(url, sanitize=False)
    assert result.url == "http://example.com/path"
    assert result.query == "query=value with spaces"
    assert result.fragment == ""

def test_multiple_query_parameters():
    url = "http://example.com/path?param1=value1&param2=value2&param3=value3"
    result = parse_url(url, sanitize=False)
    assert result.url == "http://example.com/path"
    assert result.query == "param1=value1&param2=value2&param3=value3"
    assert result.fragment == ""

def test_internationalized_url():
    url = "http://例子.测试/路径?查询=值#片段"
    result = parse_url(url, sanitize=False)
    assert result.url == "http://例子.测试/路径"
    assert result.query == "查询=值"
    assert result.fragment == "片段"

def test_large_number_of_query_parameters():
    url = "http://example.com/path?" + "&".join(f"param{i}=value{i}" for i in range(1000))
    result = parse_url(url, sanitize=False)
    assert result.url == "http://example.com/path"
    assert result.query == "&".join(f"param{i}=value{i}" for i in range(1000))
    assert result.fragment == ""

def test_malformed_url():
    url = "http:///example.com"
    result = parse_url(url, sanitize=False)
    assert result.url == "http:///example.com"
    assert result.query == ""
    assert result.fragment == ""

def test_url_with_only_fragment():
    url = "http://example.com/path#fragment"
    result = parse_url(url, sanitize=False)
    assert result.url == "http://example.com/path"
    assert result.query == ""
    assert result.fragment == "fragment"

def test_url_with_only_query():
    url = "http://example.com/path?query=value"
    result = parse_url(url, sanitize=False)
    assert result.url == "http://example.com/path"
    assert result.query == "query=value"
    assert result.fragment == ""

def test_url_with_empty_query_and_fragment():
    url = "http://example.com/path"
    result = parse_url(url, sanitize=False)
    assert result.url == "http://example.com/path"
    assert result.query == ""
    assert result.fragment == ""

🔘 (none found) − ⏪ Replay Tests

Certainly! To optimize the program, we'll ensure efficient use of data structures and remove any redundant operations. Here's the optimized version.



### Optimizations performed.
1. **String Operations**: Optimized string operations by simplifying conditions and removing redundant computations.
2. **Direct Variable Assignments**: Directly assigned the parsed query component instead of manipulating intermediary steps.
3. **Eliminating Redundant Lookups**: We used direct access and found ways to avoid repeated lookups or operations.

These changes make the code easier to read and improve its runtime efficiency by minimizing unnecessary computations and lookups.
@codeflash-ai codeflash-ai Bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Jun 18, 2024
@codeflash-ai codeflash-ai Bot requested a review from ihitamandal June 18, 2024 23:54
@ihitamandal
Copy link
Copy Markdown
Owner

Code change on line 1460 might not be good because it would fail if there are more than one '@' symbol.

@ihitamandal ihitamandal reopened this Jun 24, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant