#!/usr/bin/python2.7 """ Help automate the process of generating test URLs for rulesets. Many rulesets have complicated regexes like this: ^http://s3(?:-website)?(-ap-(?:nor|sou)theast-1|-(?:eu|us)-west-\d|-external-\d|-sa-east-1)?\.amazonaws\.com/ Under the new requirements for ruleset coverage testing, we need a test URL that covers each of these branches. Fortunately, the exrex library can automate expanding the branches of the regex. This script uses that library to generate a set of plausible test URLs. NOTE: Usually these test URLs will need manual verification. Some may not actually exist. Also, if a URL contains a wildcard, e.g. '.', exrex will attempt to substitute all possible values, creating an explosion of test URLs. We attempt to detect this by finding regexes that generate more than a thousand test URLs, and not printing any output for those. You will have to manually find test cases for URLs with broad wildcards. Usage: ./utils/test-generator.py src/chrome/content/rules/AmazonAWS.xml # ... Paste output into your ruleset ... # Then test the ruleset: python2.7 https-everywhere-checker/src/https_everywhere_checker/check_rules.py \ https-everywhere-checker/manual.checker.config src/chrome/content/rules/AmazonAWS.xml """ import exrex import lxml from lxml import etree import sys def generate(regex): i = 0 urls = [] for url in exrex.generate(regex): i += 1 if i > 1000: break urls.append(url) if i <= 1000: for url in urls: print "" % url for xmlFname in sys.argv[1:]: ruleset = etree.parse(file(xmlFname)).getroot() xpath_from = etree.XPath("/ruleset/rule/@from") for from_attrib in xpath_from(ruleset): generate(from_attrib) xpath_exclusion = etree.XPath("/ruleset/exclusion/@pattern") for pattern_attrib in xpath_exclusion(ruleset): generate(pattern_attrib)