feat(plugin): add clearURLs (#47)

2024-09-20 15:20:35 +00:00 · 2022-10-05 10:37:49 -04:00 · 2022-10-05 10:37:49 -04:00 · 45644dec43
commit 45644dec43
parent 3e0355cb53
2 changed files with 268 additions and 0 deletions
--- a/src/plugins/clearURLs/defaultRules.ts
+++ b/src/plugins/clearURLs/defaultRules.ts
@ -0,0 +1,134 @@
+/*
+ * Global Parameter
+ * `utm_source`
+ *
+ * Parameter restricted to domain
+ * `feature@youtube.com`
+ *
+ * Domains wildcards
+ * `tag@amazon.*`
+ *
+ * Parameter wildcards
+ * `utm_*`>
+ *
+ */
+
+export const defaultRules = [
+    "action_object_map",
+    "action_type_map",
+    "action_ref_map",
+    "spm@*.aliexpress.com",
+    "scm@*.aliexpress.com",
+    "aff_platform",
+    "aff_trace_key",
+    "algo_expid@*.aliexpress.*",
+    "algo_pvid@*.aliexpress.*",
+    "btsid",
+    "ws_ab_test",
+    "pd_rd_*@amazon.*",
+    "_encoding@amazon.*",
+    "psc@amazon.*",
+    "tag@amazon.*",
+    "ref_@amazon.*",
+    "pf_rd_*@amazon.*",
+    "pf@amazon.*",
+    "crid@amazon.*",
+    "keywords@amazon.*",
+    "sprefix@amazon.*",
+    "sr@amazon.*",
+    "ie@amazon.*",
+    "node@amazon.*",
+    "qid@amazon.*",
+    "callback@bilibili.com",
+    "cvid@bing.com",
+    "form@bing.com",
+    "sk@bing.com",
+    "sp@bing.com",
+    "sc@bing.com",
+    "qs@bing.com",
+    "pq@bing.com",
+    "sc_cid",
+    "mkt_tok",
+    "trk",
+    "trkCampaign",
+    "ga_*",
+    "gclid",
+    "gclsrc",
+    "hmb_campaign",
+    "hmb_medium",
+    "hmb_source",
+    "spReportId",
+    "spJobID",
+    "spUserID",
+    "spMailingID",
+    "itm_*",
+    "s_cid",
+    "elqTrackId",
+    "elqTrack",
+    "assetType",
+    "assetId",
+    "recipientId",
+    "campaignId",
+    "siteId",
+    "mc_cid",
+    "mc_eid",
+    "pk_*",
+    "sc_campaign",
+    "sc_channel",
+    "sc_content",
+    "sc_medium",
+    "sc_outcome",
+    "sc_geo",
+    "sc_country",
+    "nr_email_referer",
+    "vero_conv",
+    "vero_id",
+    "yclid",
+    "_openstat",
+    "mbid",
+    "cmpid",
+    "cid",
+    "c_id",
+    "campaign_id",
+    "Campaign",
+    "hash@ebay.*",
+    "fb_action_ids",
+    "fb_action_types",
+    "fb_ref",
+    "fb_source",
+    "fbclid",
+    "refsrc@facebook.com",
+    "hrc@facebook.com",
+    "gs_l",
+    "gs_lcp@google.*",
+    "ved@google.*",
+    "ei@google.*",
+    "sei@google.*",
+    "gws_rd@google.*",
+    "gs_gbg@google.*",
+    "gs_mss@google.*",
+    "gs_rn@google.*",
+    "_hsenc",
+    "_hsmi",
+    "__hssc",
+    "__hstc",
+    "hsCtaTracking",
+    "source@sourceforge.net",
+    "position@sourceforge.net",
+    "t@*.twitter.com",
+    "s@*.twitter.com",
+    "ref_*@*.twitter.com",
+    "tt_medium",
+    "tt_content",
+    "lr@yandex.*",
+    "redircnt@yandex.*",
+    "feature@youtube.com",
+    "kw@youtube.com",
+    "wt_zmc",
+    "utm_source",
+    "utm_content",
+    "utm_medium",
+    "utm_campaign",
+    "utm_term",
+    "si@open.spotify.com",
+];
--- a/src/plugins/clearURLs/index.ts
+++ b/src/plugins/clearURLs/index.ts
@ -0,0 +1,134 @@
+import { defaultRules } from "./defaultRules";
+import {
+    addPreSendListener,
+    addPreEditListener,
+    MessageObject,
+    removePreSendListener,
+    removePreEditListener,
+} from "../../api/MessageEvents";
+import definePlugin from "../../utils/types";
+
+// From lodash
+const reRegExpChar = /[\\^$.*+?()[\]{}|]/g;
+const reHasRegExpChar = RegExp(reRegExpChar.source);
+
+export default definePlugin({
+    name: "clearURLs",
+    description: "Removes tracking garbage from URLs",
+    authors: [
+        {
+            name: "adryd",
+            id: 0n,
+        },
+    ],
+    dependencies: ["MessageEventsAPI"],
+
+    escapeRegExp(str: string) {
+        return (str && reHasRegExpChar.test(str))
+            ? str.replace(reRegExpChar, "\\$&")
+            : (str || "");
+    },
+
+    createRules() {
+        // Can be extended upon once user configs are available
+        // Eg. (useDefaultRules: boolean, customRules: Array[string])
+        const rules = defaultRules;
+
+        this.universalRules = new Set();
+        this.rulesByHost = new Map();
+        this.hostRules = new Map();
+
+        for (const rule of rules) {
+            const splitRule = rule.split("@");
+            const paramRule = new RegExp(
+                "^" +
+                this.escapeRegExp(splitRule[0]).replace(/\\\*/, ".+?") +
+                "$"
+            );
+
+            if (!splitRule[1]) {
+                this.universalRules.add(paramRule);
+                continue;
+            }
+            const hostRule = new RegExp(
+                "^(www\\.)?" +
+                this.escapeRegExp(splitRule[1])
+                    .replace(/\\\./, "\\.")
+                    .replace(/^\\\*\\\./, "(.+?\\.)?")
+                    .replace(/\\\*/, ".+?") +
+                "$"
+            );
+            const hostRuleIndex = hostRule.toString();
+
+            this.hostRules.set(hostRuleIndex, hostRule);
+            if (this.rulesByHost.get(hostRuleIndex) == null) {
+                this.rulesByHost.set(hostRuleIndex, new Set());
+            }
+            this.rulesByHost.get(hostRuleIndex).add(paramRule);
+        }
+    },
+
+    removeParam(rule: string | RegExp, param: string, parent: URLSearchParams) {
+        if (param === rule || rule instanceof RegExp && rule.test(param)) {
+            parent.delete(param);
+        }
+    },
+
+    replacer(match: string) {
+        // Parse URL without throwing errors
+        try {
+            var url = new URL(match);
+        } catch (error) {
+            // Don't modify anything if we can't parse the URL
+            return match;
+        }
+
+        // Cheap way to check if there are any search params
+        if (url.searchParams.entries().next().done) {
+            // If there are none, we don't need to modify anything
+            return match;
+        }
+
+        // Check all universal rules
+        this.universalRules.forEach((rule) => {
+            url.searchParams.forEach((_value, param, parent) => {
+                this.removeParam(rule, param, parent);
+            });
+        });
+
+        // Check rules for each hosts that match
+        this.hostRules.forEach((regex, hostRuleName) => {
+            if (!regex.test(url.hostname)) return;
+            this.rulesByHost.get(hostRuleName).forEach((rule) => {
+                url.searchParams.forEach((_value, param, parent) => {
+                    this.removeParam(rule, param, parent);
+                });
+            });
+        });
+
+        return url.toString();
+    },
+
+    onSend(msg: MessageObject) {
+        // Only run on messages that contain URLs
+        if (msg.content.match(/http(s)?:\/\//)) {
+            msg.content = msg.content.replace(
+                /(https?:\/\/[^\s<]+[^<.,:;"'>)|\]\s])/g,
+                (match) => this.replacer(match)
+            );
+        }
+    },
+
+    start() {
+        this.createRules();
+        this.preSend = addPreSendListener((_, msg) => this.onSend(msg));
+        this.preEdit = addPreEditListener((_cid, _mid, msg) =>
+            this.onSend(msg)
+        );
+    },
+
+    stop() {
+        removePreSendListener(this.preSend);
+        removePreEditListener(this.preEdit);
+    },
+});