diff --git a/clients/ts-sdk/openapi.json b/clients/ts-sdk/openapi.json
index 453d39f48..b0ce87fdc 100644
--- a/clients/ts-sdk/openapi.json
+++ b/clients/ts-sdk/openapi.json
@@ -12,7 +12,7 @@
       "name": "BSL",
       "url": "https://github.com/devflowinc/trieve/blob/main/LICENSE.txt"
     },
-    "version": "0.11.13"
+    "version": "0.12.0"
   },
   "servers": [
     {
@@ -86,8 +86,8 @@
         "tags": [
           "Analytics"
         ],
-        "summary": "Send Event Data",
-        "description": "This route allows you to send event data to the system.",
+        "summary": "Send User Event Data",
+        "description": "This route allows you to send user event data to the system.",
         "operationId": "send_event_data",
         "parameters": [
           {
@@ -141,8 +141,8 @@
         "tags": [
           "Analytics"
         ],
-        "summary": "Get All Events",
-        "description": "This route allows you to view all events.",
+        "summary": "Get All User Events",
+        "description": "This route allows you to view all user events.",
         "operationId": "get_all_events",
         "requestBody": {
           "description": "JSON request payload to filter the events",
@@ -253,8 +253,8 @@
         "tags": [
           "Analytics"
         ],
-        "summary": "Get Event By ID",
-        "description": "This route allows you to view an event by its ID.",
+        "summary": "Get User Event By ID",
+        "description": "This route allows you to view an user event by its ID. You can pass in any type of event and get the details for that event.",
         "operationId": "get_event_by_id",
         "parameters": [
           {
@@ -6640,6 +6640,7 @@
         "type": "string",
         "enum": [
           "search",
+          "rag",
           "recommendation"
         ]
       },
@@ -7662,6 +7663,14 @@
             "description": "Option for allowing the crawl to follow links to external websites.",
             "nullable": true
           },
+          "body_remove_strings": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Text strings to remove from body when creating chunks for each page",
+            "nullable": true
+          },
           "boost_titles": {
             "type": "boolean",
             "description": "Boost titles such that keyword matches in titles are prioritized in search results. Strongly recommended to leave this on. Defaults to true.",
@@ -7683,6 +7692,14 @@
             "description": "Specify the HTML tags, classes and ids to exclude from the response.",
             "nullable": true
           },
+          "heading_remove_strings": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Text strings to remove from headings when creating chunks for each page",
+            "nullable": true
+          },
           "ignore_sitemap": {
             "type": "boolean",
             "description": "Ignore the website sitemap when crawling, defaults to true.",
@@ -7739,6 +7756,10 @@
           }
         },
         "example": {
+          "body_remove_strings": [
+            "Edit on github"
+          ],
+          "boost_titles": true,
           "exclude_paths": [
             "https://example.com/exclude*"
           ],
@@ -7746,6 +7767,10 @@
             "#ad",
             "#footer"
           ],
+          "heading_remove_strings": [
+            "Advertisement",
+            "Sponsored"
+          ],
           "include_paths": [
             "https://example.com/include*"
           ],
@@ -8842,6 +8867,10 @@
             "type": "string",
             "nullable": true
           },
+          "request_type": {
+            "type": "string",
+            "nullable": true
+          },
           "updated_at": {
             "type": "string"
           },
@@ -8941,9 +8970,12 @@
                 "description": "Any other metadata associated with the event",
                 "nullable": true
               },
-              "request_id": {
-                "type": "string",
-                "description": "The request id of the event to associate it with a request",
+              "request": {
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/RequestInfo"
+                  }
+                ],
                 "nullable": true
               },
               "user_id": {
@@ -8987,9 +9019,12 @@
                 "description": "Any other metadata associated with the event",
                 "nullable": true
               },
-              "request_id": {
-                "type": "string",
-                "description": "The request id of the event to associate it with a request",
+              "request": {
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/RequestInfo"
+                  }
+                ],
                 "nullable": true
               },
               "user_id": {
@@ -9025,9 +9060,12 @@
                 "description": "Whether the event is a conversion event",
                 "nullable": true
               },
-              "request_id": {
-                "type": "string",
-                "description": "The request id of the event to associate it with a request",
+              "request": {
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/RequestInfo"
+                  }
+                ],
                 "nullable": true
               },
               "user_id": {
@@ -9072,9 +9110,12 @@
                 },
                 "description": "The items that were purchased"
               },
-              "request_id": {
-                "type": "string",
-                "description": "The request id of the event to associate it with a request",
+              "request": {
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/RequestInfo"
+                  }
+                ],
                 "nullable": true
               },
               "user_id": {
@@ -9120,9 +9161,12 @@
                   "type": "string"
                 }
               },
-              "request_id": {
-                "type": "string",
-                "description": "The request id of the event to associate it with a request",
+              "request": {
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/RequestInfo"
+                  }
+                ],
                 "nullable": true
               },
               "user_id": {
@@ -11864,6 +11908,22 @@
           }
         }
       },
+      "RequestInfo": {
+        "type": "object",
+        "required": [
+          "request_type",
+          "request_id"
+        ],
+        "properties": {
+          "request_id": {
+            "type": "string",
+            "format": "uuid"
+          },
+          "request_type": {
+            "$ref": "#/components/schemas/CTRType"
+          }
+        }
+      },
       "ReturnQueuedChunk": {
         "oneOf": [
           {
diff --git a/clients/ts-sdk/package.json b/clients/ts-sdk/package.json
index e388e2f93..e8dcffa6e 100644
--- a/clients/ts-sdk/package.json
+++ b/clients/ts-sdk/package.json
@@ -6,7 +6,7 @@
   "files": [
     "dist"
   ],
-  "version": "0.0.12",
+  "version": "0.0.13",
   "license": "MIT",
   "scripts": {
     "lint": "eslint 'src/**/*.ts'",
diff --git a/clients/ts-sdk/src/types.gen.ts b/clients/ts-sdk/src/types.gen.ts
index 626a88994..00a31ab47 100644
--- a/clients/ts-sdk/src/types.gen.ts
+++ b/clients/ts-sdk/src/types.gen.ts
@@ -160,7 +160,7 @@ export type CTRSearchQueryWithoutClicksResponse = {
     queries: Array<SearchQueriesWithoutClicksCTRResponse>;
 };
 
-export type CTRType = 'search' | 'recommendation';
+export type CTRType = 'search' | 'rag' | 'recommendation';
 
 export type ChatMessageProxy = {
     content: string;
@@ -446,6 +446,10 @@ export type CrawlOptions = {
      * Option for allowing the crawl to follow links to external websites.
      */
     allow_external_links?: (boolean) | null;
+    /**
+     * Text strings to remove from body when creating chunks for each page
+     */
+    body_remove_strings?: Array<(string)> | null;
     /**
      * Boost titles such that keyword matches in titles are prioritized in search results. Strongly recommended to leave this on. Defaults to true.
      */
@@ -458,6 +462,10 @@ export type CrawlOptions = {
      * Specify the HTML tags, classes and ids to exclude from the response.
      */
     exclude_tags?: Array<(string)> | null;
+    /**
+     * Text strings to remove from headings when creating chunks for each page
+     */
+    heading_remove_strings?: Array<(string)> | null;
     /**
      * Ignore the website sitemap when crawling, defaults to true.
      */
@@ -864,6 +872,7 @@ export type EventData = {
     items: Array<(string)>;
     metadata?: unknown;
     request_id?: (string) | null;
+    request_type?: (string) | null;
     updated_at: string;
     user_id?: (string) | null;
 };
@@ -890,10 +899,7 @@ export type EventTypes = {
      * Any other metadata associated with the event
      */
     metadata?: unknown;
-    /**
-     * The request id of the event to associate it with a request
-     */
-    request_id?: (string) | null;
+    request?: ((RequestInfo) | null);
     /**
      * The user id of the user who viewed the items
      */
@@ -916,10 +922,7 @@ export type EventTypes = {
      * Any other metadata associated with the event
      */
     metadata?: unknown;
-    /**
-     * The request id of the event to associate it with a request
-     */
-    request_id?: (string) | null;
+    request?: ((RequestInfo) | null);
     /**
      * The user id of the user who added the items to the cart
      */
@@ -935,10 +938,7 @@ export type EventTypes = {
      * Whether the event is a conversion event
      */
     is_conversion?: (boolean) | null;
-    /**
-     * The request id of the event to associate it with a request
-     */
-    request_id?: (string) | null;
+    request?: ((RequestInfo) | null);
     /**
      * The user id of the user who clicked the items
      */
@@ -961,10 +961,7 @@ export type EventTypes = {
      * The items that were purchased
      */
     items: Array<(string)>;
-    /**
-     * The request id of the event to associate it with a request
-     */
-    request_id?: (string) | null;
+    request?: ((RequestInfo) | null);
     /**
      * The user id of the user who purchased the items
      */
@@ -989,10 +986,7 @@ export type EventTypes = {
     items: {
         [key: string]: (string);
     };
-    /**
-     * The request id of the event to associate it with a request
-     */
-    request_id?: (string) | null;
+    request?: ((RequestInfo) | null);
     /**
      * The user id of the user who clicked the items
      */
@@ -1843,6 +1837,11 @@ export type RemoveChunkFromGroupReqPayload = {
     chunk_id: string;
 };
 
+export type RequestInfo = {
+    request_id: string;
+    request_type: CTRType;
+};
+
 export type ReturnQueuedChunk = SingleQueuedChunkResponse | BatchQueuedChunkResponse;
 
 export type RoleProxy = 'system' | 'user' | 'assistant';
diff --git a/frontends/dashboard/src/pages/dataset/CrawlingSettings.tsx b/frontends/dashboard/src/pages/dataset/CrawlingSettings.tsx
index 40386faea..f5ce0d25c 100644
--- a/frontends/dashboard/src/pages/dataset/CrawlingSettings.tsx
+++ b/frontends/dashboard/src/pages/dataset/CrawlingSettings.tsx
@@ -492,12 +492,12 @@ const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => {
           <Error error={errors.exclude_paths} />
         </div>
         <div class="">
-          <div>Include Tags</div>
+          <div>Include Query Selectors</div>
           <MultiStringInput
             disabled={isShopify()}
             placeholder="h1..."
             addClass="bg-magenta-100/40 text-sm px-2 rounded border border-magenta-300/40"
-            addLabel="Add Tag"
+            addLabel="Add Selector"
             onChange={(value) => {
               setOptions("include_tags", value);
             }}
@@ -506,12 +506,12 @@ const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => {
           <Error error={errors.include_tags} />
         </div>
         <div class="">
-          <div>Exclude Tags</div>
+          <div>Exclude Query Selectors</div>
           <MultiStringInput
             disabled={isShopify()}
             placeholder="button..."
             addClass="bg-magenta-100/40 px-2 text-sm rounded border border-magenta-300/40"
-            addLabel="Add Tag"
+            addLabel="Add Selector"
             onChange={(value) => {
               setOptions("exclude_tags", value);
             }}
@@ -519,6 +519,32 @@ const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => {
           />
           <Error error={errors.exclude_tags} />
         </div>
+        <div class="">
+          <div>Heading Remove Strings</div>
+          <MultiStringInput
+            placeholder="#"
+            addClass="bg-magenta-100/40 px-2 text-sm rounded border border-magenta-300/40"
+            addLabel="Add Text"
+            onChange={(value) => {
+              setOptions("heading_remove_strings", value);
+            }}
+            value={options.heading_remove_strings || []}
+          />
+          <Error error={errors.heading_remove_strings} />
+        </div>
+        <div class="">
+          <div>Body Remove Strings</div>
+          <MultiStringInput
+            placeholder="#"
+            addClass="bg-magenta-100/40 px-2 text-sm rounded border border-magenta-300/40"
+            addLabel="Add Text"
+            onChange={(value) => {
+              setOptions("body_remove_strings", value);
+            }}
+            value={options.body_remove_strings || []}
+          />
+          <Error error={errors.body_remove_strings} />
+        </div>
       </div>
       <Spacer h={18} />
       <div class="flex justify-end">
diff --git a/server/src/bin/crawl-worker.rs b/server/src/bin/crawl-worker.rs
index 490ce8b75..91f86b9ec 100644
--- a/server/src/bin/crawl-worker.rs
+++ b/server/src/bin/crawl-worker.rs
@@ -81,6 +81,22 @@ fn create_chunk_req_payload(
 ) -> Result<ChunkReqPayload, ServiceError> {
     let image_urls: Vec<String> = product.images.iter().map(|img| img.src.clone()).collect();
 
+    let mut product_title = product.title.clone();
+    let mut variant_title = variant.title.clone();
+    let mut product_body_html = product.body_html.clone();
+
+    if let Some(heading_remove_strings) = &scrape_request.crawl_options.heading_remove_strings {
+        heading_remove_strings.iter().for_each(|remove_string| {
+            product_title = product_title.replace(remove_string, "");
+            variant_title = variant_title.replace(remove_string, "");
+        });
+    }
+    if let Some(body_remove_strings) = &scrape_request.crawl_options.body_remove_strings {
+        body_remove_strings.iter().for_each(|remove_string| {
+            product_body_html = product_body_html.replace(remove_string, "");
+        });
+    }
+
     let link = format!(
         "{}/products/{}?variant={}",
         base_url, product.handle, variant.id
@@ -475,7 +491,7 @@ async fn get_chunks_with_firecrawl(
             }
         }
 
-        let chunked_html = chunk_html(&page_html.clone());
+        let chunked_html = chunk_html(&page_html.clone(), &scrape_request.crawl_options);
 
         for chunk in chunked_html {
             let heading = chunk.0.clone();
diff --git a/server/src/data/models.rs b/server/src/data/models.rs
index 69a6b4473..99645a943 100644
--- a/server/src/data/models.rs
+++ b/server/src/data/models.rs
@@ -6580,6 +6580,9 @@ impl From<CrawlRequest> for CrawlRequestPG {
     "max_depth": 10,
     "include_tags": ["h1", "p", "a", ".main-content"],
     "exclude_tags": ["#ad", "#footer"],
+    "heading_remove_strings": ["Advertisement", "Sponsored"],
+    "body_remove_strings": ["Edit on github"],
+    "boost_titles": true,
 }))]
 pub struct CrawlOptions {
     /// The URL to crawl
@@ -6604,6 +6607,10 @@ pub struct CrawlOptions {
     pub allow_external_links: Option<bool>,
     /// Ignore the website sitemap when crawling, defaults to true.
     pub ignore_sitemap: Option<bool>,
+    /// Text strings to remove from headings when creating chunks for each page
+    pub heading_remove_strings: Option<Vec<String>>,
+    /// Text strings to remove from body when creating chunks for each page
+    pub body_remove_strings: Option<Vec<String>>,
     /// Options for including an openapi spec in the crawl
     pub scrape_options: Option<ScrapeOptions>,
 }
@@ -6643,6 +6650,14 @@ impl CrawlOptions {
             boost_titles: self.boost_titles.or(other.boost_titles),
             scrape_options: self.scrape_options.clone(),
             allow_external_links: self.allow_external_links.or(other.allow_external_links),
+            heading_remove_strings: self
+                .heading_remove_strings
+                .clone()
+                .or(other.heading_remove_strings.clone()),
+            body_remove_strings: self
+                .body_remove_strings
+                .clone()
+                .or(other.body_remove_strings.clone()),
         }
     }
 }
diff --git a/server/src/operators/crawl_operator.rs b/server/src/operators/crawl_operator.rs
index 3268efa84..0557f48e6 100644
--- a/server/src/operators/crawl_operator.rs
+++ b/server/src/operators/crawl_operator.rs
@@ -587,7 +587,7 @@ pub fn get_tags(url: String) -> Vec<String> {
     Vec::new()
 }
 
-pub fn chunk_html(html: &str) -> Vec<(String, String)> {
+pub fn chunk_html(html: &str, crawl_options: &CrawlOptions) -> Vec<(String, String)> {
     let re = Regex::new(r"(?i)<h[1-6].*?>").unwrap();
     let mut chunks = Vec::new();
     let mut current_chunk = String::new();
@@ -651,6 +651,27 @@ pub fn chunk_html(html: &str) -> Vec<(String, String)> {
         chunks.push((headings_text, last_short_chunk));
     }
 
+    chunks = chunks
+        .into_iter()
+        .map(|(headings_text, content)| {
+            let mut headings_text = headings_text.clone();
+            let mut content = content.clone();
+
+            if let Some(heading_remove_strings) = &crawl_options.heading_remove_strings {
+                heading_remove_strings.iter().for_each(|remove_string| {
+                    headings_text = headings_text.replace(remove_string, "");
+                });
+            }
+            if let Some(body_remove_strings) = &crawl_options.body_remove_strings {
+                body_remove_strings.iter().for_each(|remove_string| {
+                    content = content.replace(remove_string, "");
+                });
+            }
+
+            (headings_text, content)
+        })
+        .collect();
+
     chunks.retain(|(headings_text, content)| {
         !headings_text.trim().is_empty() && !content.trim().is_empty()
     });