diff --git a/.gitignore b/.gitignore index 72dde8933ff..cebd86388ef 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ myhosts blacklist whitelist hosts-* +domains.txt /web.config /__pycache__ /node_modules/ diff --git a/readme_template.md b/readme_template.md index 223bae8652d..088402f66d7 100644 --- a/readme_template.md +++ b/readme_template.md @@ -196,7 +196,8 @@ hosts files to include in the amalgamation. Example: `--extensions porn` or active when `--replace` is also active. `--ip nnn.nnn.nnn.nnn`, or `-i nnn.nnn.nnn.nnn`: the IP address to use as the -target. Default is `0.0.0.0`. +target. Default is `0.0.0.0`. Alternatively, you can pass `none` to remove the target IP from the hosts file. +This is useful when you want to omit a target IP altogether. `--keepdomaincomments`, or `-k`: `true` (default) or `false`, keep the comments that appear on the same line as domains. The default is `true`. diff --git a/testUpdateHostsFile.py b/testUpdateHostsFile.py index 4f580f2e246..63ece2968fe 100644 --- a/testUpdateHostsFile.py +++ b/testUpdateHostsFile.py @@ -968,6 +968,43 @@ def test_with_comments_raw(self): sys.stdout = StringIO() + def test_no_comment_alt_raw(self): + for rule in ( + "twitter.com", + "google.com", + "foo.bar.edu", + "www.example-foo.bar.edu", + "www.example-3045.foobar.com", + "www.example.xn--fr3nch", + ): + expected = (rule, rule + "\n") + + actual = normalize_rule(rule, target_ip=None, keep_domain_comments=False) + self.assertEqual(actual, expected) + + # Nothing gets printed if there's a match. + output = sys.stdout.getvalue() + self.assertEqual(output, "") + + sys.stdout = StringIO() + + def test_with_comments_alt_raw(self): + for comment in ("foo", "bar", "baz"): + rule = "1.google.co.uk " + comment + expected = ( + "1.google.co.uk", + ("1.google.co.uk # " + comment + "\n"), + ) + + actual = normalize_rule(rule, target_ip=None, keep_domain_comments=True) + self.assertEqual(actual, expected) + + # Nothing gets printed if there's a match. + output = sys.stdout.getvalue() + self.assertEqual(output, "") + + sys.stdout = StringIO() + class TestStripRule(Base): def test_strip_exactly_two(self): diff --git a/updateHostsFile.py b/updateHostsFile.py index c31ed79bd3a..9921ff5c010 100755 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -51,6 +51,8 @@ # Project Settings BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__)) +HOSTS_FILENAME = "hosts" +DOMAINS_FILENAME = "domains.txt" def get_defaults(): @@ -234,6 +236,7 @@ def main(): options = vars(parser.parse_args()) options["outputpath"] = path_join_robust(BASEDIR_PATH, options["outputsubfolder"]) + options["outputfilename"] = HOSTS_FILENAME options["freshen"] = not options["noupdate"] settings = get_defaults() @@ -259,6 +262,14 @@ def main(): source_data_filename = settings["sourcedatafilename"] no_unified_hosts = settings["nounifiedhosts"] + settings["targetip"] = ( + None if str(settings["targetip"]).lower() == "none" else settings["targetip"] + ) + if settings["targetip"] is None: + settings["skipstatichosts"] = True + settings["keepdomaincomments"] = False + options["outputfilename"] = DOMAINS_FILENAME + update_sources = prompt_for_update(freshen=settings["freshen"], update_auto=auto) if update_sources: update_all_sources(source_data_filename, settings["hostfilename"]) @@ -287,19 +298,23 @@ def main(): merge_file = create_initial_file( nounifiedhosts=no_unified_hosts, ) - remove_old_hosts_file(settings["outputpath"], "hosts", settings["backup"]) + remove_old_hosts_file( + settings["outputpath"], options["outputfilename"], settings["backup"] + ) + + final_file = open( + path_join_robust(settings["outputpath"], options["outputfilename"]), "w+b" + ) + temp_file = tempfile.NamedTemporaryFile() + remove_dups_and_excl(merge_file, exclusion_regexes, temp_file) + if settings["compress"]: - final_file = open(path_join_robust(settings["outputpath"], "hosts"), "w+b") - compressed_file = tempfile.NamedTemporaryFile() - remove_dups_and_excl(merge_file, exclusion_regexes, compressed_file) - compress_file(compressed_file, settings["targetip"], final_file) + compress_file(temp_file, settings["targetip"], final_file) elif settings["minimise"]: - final_file = open(path_join_robust(settings["outputpath"], "hosts"), "w+b") - minimised_file = tempfile.NamedTemporaryFile() - remove_dups_and_excl(merge_file, exclusion_regexes, minimised_file) - minimise_file(minimised_file, settings["targetip"], final_file) + minimise_file(temp_file, settings["targetip"], final_file) else: - final_file = remove_dups_and_excl(merge_file, exclusion_regexes) + shutil.copy(temp_file.name, final_file.name) + temp_file.close() number_of_rules = settings["numberofrules"] output_subfolder = settings["outputsubfolder"] @@ -853,12 +868,16 @@ def compress_file(input_file, target_ip, output_file): ---------- input_file : file The file object that contains the hostnames that we are reducing. - target_ip : str + target_ip : str | None The target IP address. output_file : file The file object that will contain the reduced hostnames. """ + if target_ip is None: + print("Compress file is not supported with targetip to None") + return + input_file.seek(0) # reset file pointer write_data(output_file, "\n") @@ -893,7 +912,7 @@ def minimise_file(input_file, target_ip, output_file): ---------- input_file : file The file object that contains the hostnames that we are reducing. - target_ip : str + target_ip : str | None The target IP address. output_file : file The file object that will contain the reduced hostnames. @@ -906,8 +925,10 @@ def minimise_file(input_file, target_ip, output_file): for line in input_file.readlines(): line = line.decode("UTF-8") - if line.startswith(target_ip): - lines.append(line[: line.find("#")].strip() + "\n") + if target_ip is None or line.startswith(target_ip): + minimised_line = line[: line.find("#")].strip() + "\n" + if minimised_line != "\n": + lines.append(minimised_line) for line in lines: write_data(output_file, line) @@ -915,7 +936,7 @@ def minimise_file(input_file, target_ip, output_file): input_file.close() -def remove_dups_and_excl(merge_file, exclusion_regexes, output_file=None): +def remove_dups_and_excl(merge_file, exclusion_regexes, output_file): """ Remove duplicates and remove hosts that we are excluding. @@ -929,8 +950,7 @@ def remove_dups_and_excl(merge_file, exclusion_regexes, output_file=None): exclusion_regexes : list The list of regex patterns used to exclude domains. output_file : file - The file object in which the result is written. If None, the file - 'settings["outputpath"]' will be created. + The file object in which the result is written. """ number_of_rules = settings["numberofrules"] @@ -943,14 +963,6 @@ def remove_dups_and_excl(merge_file, exclusion_regexes, output_file=None): if line and not line.startswith("#"): settings["exclusions"].append(line) - if not os.path.exists(settings["outputpath"]): - os.makedirs(settings["outputpath"]) - - if output_file is None: - final_file = open(path_join_robust(settings["outputpath"], "hosts"), "w+b") - else: - final_file = output_file - merge_file.seek(0) # reset file pointer hostnames = {"localhost", "localhost.localdomain", "local", "broadcasthost"} exclusions = settings["exclusions"] @@ -969,7 +981,7 @@ def remove_dups_and_excl(merge_file, exclusion_regexes, output_file=None): # Testing the first character doesn't require startswith if line[0] == "#" or re.match(r"^\s*$", line[0]): - write_data(final_file, line) + write_data(output_file, line) continue if "::1" in line: continue @@ -995,15 +1007,14 @@ def remove_dups_and_excl(merge_file, exclusion_regexes, output_file=None): break if normalized_rule and (hostname not in hostnames) and write_line: - write_data(final_file, normalized_rule) + write_data(output_file, normalized_rule) hostnames.add(hostname) number_of_rules += 1 settings["numberofrules"] = number_of_rules merge_file.close() - if output_file is None: - return final_file + return output_file def normalize_rule(rule, target_ip, keep_domain_comments): @@ -1014,7 +1025,7 @@ def normalize_rule(rule, target_ip, keep_domain_comments): ---------- rule : str The rule whose spelling and spacing we are standardizing. - target_ip : str + target_ip : str | None The target IP address for the rule. keep_domain_comments : bool Whether or not to keep comments regarding these domains in @@ -1048,7 +1059,10 @@ def normalize_response( and spacing reformatted. """ - rule = "%s %s" % (target_ip, extracted_hostname) + if target_ip is None: + rule = extracted_hostname + else: + rule = "%s %s" % (target_ip, extracted_hostname) if keep_domain_comments and extracted_suffix: if not extracted_suffix.strip().startswith("#"):