diff --git a/docs/api/models/azure_llama2_model.md b/docs/api/models/azure_llama2_model.md
index 090708d..613efcb 100644
--- a/docs/api/models/azure_llama2_model.md
+++ b/docs/api/models/azure_llama2_model.md
@@ -2,6 +2,7 @@
 ### Methods
 ```python
 __init__(
+    config: AzureSelfDeployedConfiguration,
     temperature: float = 0.0,
     top_p: float = 1.0,
     max_output_tokens: int = 512,
@@ -11,6 +12,7 @@ __init__(
 )
 ```
 #### Parameters
+- `config` (`AzureSelfDeployedConfiguration`): an instance of `AzureSelfDeployedConfiguration` class
 - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
 - `top_p` (`float`): Default: `1.0`.
@@ -26,12 +28,14 @@ __init__(
 ```python
 generate(
     prompt: str,
+    system_prompt: Optional[str] = None,
     input_data: typing.Optional[typing.List[InputData]] = None,
     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
 ) -> typing.List[ResponseData]:
 ```
 #### Parameters
 - `prompt` (`str`): Prompt to use to query the model.
+- `system_prompt` (`Optional[str]`): System prompt that will be used by the model.
 - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
    in the `input_mappings` of `InputData`.
@@ -45,16 +49,16 @@ is not provided, the length of this list is equal 1, and the first element is th
 ---
 
 ```python
-AzureLlama2Model.setup_environment(
-    azure_api_key: str,
-    azure_endpoint_url: str,
-    azure_deployment_name: str
+AzureSelfDeployedConfiguration(
+    api_key: str,
+    endpoint_url: str,
+    deployment: str
 )
 ```
 #### Parameters
-- `azure_api_key` (`str`): Authentication key for the endpoint.
-- `azure_endpoint_url` (`str`): URL of pre-existing endpoint.
-- `azure_deployment_name` (`str`): The name under which the model was deployed.
+- `api_key` (`str`): Authentication key for the endpoint.
+- `endpoint_url` (`str`): URL of pre-existing endpoint.
+- `deployment` (`str`): The name under which the model was deployed.
 
 ---
 
diff --git a/docs/api/models/azure_mistral_model.md b/docs/api/models/azure_mistral_model.md
index e250314..af459b7 100644
--- a/docs/api/models/azure_mistral_model.md
+++ b/docs/api/models/azure_mistral_model.md
@@ -2,6 +2,7 @@
 ### Methods
 ```python
 __init__(
+    config: AzureSelfDeployedConfiguration,
     temperature: float = 0.0,
     top_p: float = 1.0,
     max_output_tokens: int = 1024,
@@ -11,6 +12,7 @@ __init__(
 )
 ```
 #### Parameters
+- `config` (`AzureSelfDeployedConfiguration`): an instance of `AzureSelfDeployedConfiguration` class
 - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
 - `top_p` (`float`): Default: `1.0`.
@@ -38,6 +40,8 @@ generate(
 - `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the
   format defined by the passed class. Generated response is automatically parsed to this class.
 
+Note that Mistral-based models currently don't support system prompts.
+
 #### Returns
 `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data`
 is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 
@@ -45,16 +49,16 @@ is not provided, the length of this list is equal 1, and the first element is th
 ---
 
 ```python
-AzureMistralModel.setup_environment(
-    azure_api_key: str,
-    azure_endpoint_url: str,
-    azure_deployment_name: str
+AzureSelfDeployedConfiguration(
+    api_key: str,
+    endpoint_url: str,
+    deployment: str
 )
 ```
 #### Parameters
-- `azure_api_key` (`str`): Authentication key for the endpoint.
-- `azure_endpoint_url` (`str`): URL of pre-existing endpoint.
-- `azure_deployment_name` (`str`): The name under which the model was deployed.
+- `api_key` (`str`): Authentication key for the endpoint.
+- `endpoint_url` (`str`): URL of pre-existing endpoint.
+- `deployment` (`str`): The name under which the model was deployed.
 
 ---
 
@@ -69,6 +73,6 @@ configuration = AzureSelfDeployedConfiguration(
     deployment="<AZURE_DEPLOYMENT_NAME>"
 )
 
-mistral_model = AzureMistralAIModel(config=configuration)
+mistral_model = AzureMistralModel(config=configuration)
 mistral_response = mistral_model.generate("2+2 is?")
 ```
\ No newline at end of file
diff --git a/docs/api/models/azure_openai_model.md b/docs/api/models/azure_openai_model.md
index 0a8bbf2..ed812e8 100644
--- a/docs/api/models/azure_openai_model.md
+++ b/docs/api/models/azure_openai_model.md
@@ -2,6 +2,7 @@
 ### Methods
 ```python
 __init__(
+    config: AzureOpenAIConfiguration,
     temperature: float = 0.0,
     max_output_tokens: int = 512,
     request_timeout_s: int = 60,
@@ -11,6 +12,7 @@ __init__(
 )
 ```
 #### Parameters
+- `config` (`AzureOpenAIConfiguration`): An instance of `AzureOpenAIConfiguration` class
 - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
 - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 
@@ -26,12 +28,14 @@ __init__(
 ```python
 generate(
     prompt: str,
+    system_prompt: Optional[str] = None,
     input_data: typing.Optional[typing.List[InputData]] = None,
     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
 ) -> typing.List[ResponseData]:
 ```
 #### Parameters
 - `prompt` (`str`): Prompt to use to query the model.
+- `system_prompt` (`Optional[str]`): System prompt that will be used by the model.
 - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
    in the `input_mappings` of `InputData`.
@@ -45,32 +49,30 @@ is not provided, the length of this list is equal 1, and the first element is th
 ---
 
 ```python
-AzureOpenAIModel.setup_environment(
-    openai_api_key: str,
-    openai_api_base: str,
-    openai_api_version: str,
-    openai_api_deployment_name: str,
-    openai_api_type: str = "azure",
-    model_name: str = "gpt-3.5-turbo",
+AzureOpenAIConfiguration(
+    api_key: str,
+    base_url: str,
+    api_version: str,
+    deployment: str,
+    model_name: str
 )
 ```
 Sets up the environment for the `AzureOpenAIModel` model.
 #### Parameters
-- `openai_api_key` (`str`):  The API key for your Azure OpenAI resource. You can find this in the Azure portal under
+- `api_key` (`str`):  The API key for your Azure OpenAI resource. You can find this in the Azure portal under
    your Azure OpenAI resource.
-- `openai_api_base` (`str`): The base URL for your Azure OpenAI resource. You can find this in the Azure portal under
+- `base_url` (`str`): The base URL for your Azure OpenAI resource. You can find this in the Azure portal under
    your Azure OpenAI resource. 
-- `openai_api_version` (`str`): The API version.
-- `openai_api_deployment_name` (`str`): The name under which the model was deployed.
-- `openai_api_type` (`str`): Default: `"azure"`.
-- `model_name` (`str`): Model name to use. Default: `"gpt-3.5-turbo"`.
+- `api_version` (`str`): The API version (for example: `2023-03-15-preview`)
+- `deployment` (`str`): The name under which the model was deployed.
+- `model_name` (`str`): Model name to use (for example: `{gpt-3.5-turbo, gpt-4}`)
 
 ---
 
 ### Example usage
 ```python
 from llm_wrapper.models import AzureOpenAIModel
-from llm_wrapped.domain.configuration import AzureOpenAIConfiguration
+from llm_wrapper.domain.configuration import AzureOpenAIConfiguration
 
 configuration = AzureOpenAIConfiguration(
     api_key="<OPENAI_API_KEY>",
diff --git a/docs/api/models/vertexai_gemini_model.md b/docs/api/models/vertexai_gemini_model.md
index 401fa0c..db6ba9b 100644
--- a/docs/api/models/vertexai_gemini_model.md
+++ b/docs/api/models/vertexai_gemini_model.md
@@ -2,6 +2,7 @@
 ### Methods
 ```python
 __init__(
+    config: VertexAIConfiguration,
     temperature: float = 0.0,
     top_k: int = 40,
     top_p: float = 0.95,
@@ -13,6 +14,7 @@ __init__(
 )
 ```
 #### Parameters
+- `config` (`VertexAIConfiguration`): An instance of `VertexAIConfiguration` class
 - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
 - `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected
@@ -31,12 +33,14 @@ __init__(
 ```python
 generate(
     prompt: str,
+    system_prompt: Optional[str] = None,
     input_data: typing.Optional[typing.List[InputData]] = None,
     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
 ) -> typing.List[ResponseData]:
 ```
 #### Parameters
 - `prompt` (`str`): Prompt to use to query the model.
+- `system_prompt` (`Optional[str]`): System prompt that will be used by the model.
 - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
    in the `input_mappings` of `InputData`.
@@ -50,16 +54,14 @@ is not provided, the length of this list is equal 1, and the first element is th
 ---
 
 ```python
-VertexAIGeminiModel.setup_environment(
-    gcp_project_id: str,
-    gcp_llm_region: str,
-    gcp_model_name: str = "gemini-pro"
+VertexAIConfiguration(
+    cloud_project: str,
+    cloud_location: str
 )
 ```
 #### Parameters
-- `gcp_project_id` (`str`): The GCP project to use when making Vertex API calls.
-- `gcp_llm_region` (`str`): The region to use when making API calls.
-- `gcp_model_name` (`str`): Default: `"gemini-pro"`.
+- `cloud_project` (`str`): The GCP project to use when making Vertex API calls.
+- `cloud_location` (`str`): The region to use when making API calls.
 
 ---
 
diff --git a/docs/api/models/vertexai_palm_model.md b/docs/api/models/vertexai_palm_model.md
index f366edd..acfd91b 100644
--- a/docs/api/models/vertexai_palm_model.md
+++ b/docs/api/models/vertexai_palm_model.md
@@ -2,6 +2,7 @@
 ### Methods
 ```python
 __init__(
+    config: VertexAIConfiguration,
     temperature: float = 0.0,
     top_k: int = 40,
     top_p: float = 0.95,
@@ -13,6 +14,7 @@ __init__(
 )
 ```
 #### Parameters
+- `config` (`VertexAIConfiguration`): An instance of `VertexAIConfiguration` class
 - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
 - `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected
@@ -31,12 +33,14 @@ __init__(
 ```python
 generate(
     prompt: str,
+    system_prompt: Optional[str] = None,
     input_data: typing.Optional[typing.List[InputData]] = None,
     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
 ) -> typing.List[ResponseData]:
 ```
 #### Parameters
 - `prompt` (`str`): Prompt to use to query the model.
+- `system_prompt` (`Optional[str]`): System prompt that will be used by the model.
 - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
    in the `input_mappings` of `InputData`.
@@ -50,16 +54,14 @@ is not provided, the length of this list is equal 1, and the first element is th
 ---
 
 ```python
-VertexAIPalmModel.setup_environment(
-    gcp_project_id: str,
-    gcp_llm_region: str,
-    gcp_model_name: str = "text-bison@001"
+VertexAIConfiguration(
+    cloud_project: str,
+    cloud_location: str
 )
 ```
 #### Parameters
-- `gcp_project_id` (`str`): The GCP project to use when making Vertex API calls.
-- `gcp_llm_region` (`str`): The region to use when making API calls.
-- `gcp_model_name` (`str`): Default: `"text-bison@001"`.
+- `cloud_project` (`str`): The GCP project to use when making Vertex API calls.
+- `cloud_location` (`str`): The region to use when making API calls.
 
 ---
 
diff --git a/docs/usage/deploy_open_source_models.md b/docs/usage/deploy_open_source_models.md
index 6ed1bda..9cf2daa 100644
--- a/docs/usage/deploy_open_source_models.md
+++ b/docs/usage/deploy_open_source_models.md
@@ -3,6 +3,7 @@
 
 To use Open-source models like Llama or Mistral with llm-wrapper, first you have to deploy it on your own on Azure as a ML Online Endpoint. 
 Here's how to do it:
+
 1. Go to [ml.azure.com](https://ml.azure.com/) and use a subscription with a workspace that has access to the
    `Model catalog`.
 2. On the left click `Model catalog`, then under `Introducing Llama 2` click `View models`.
diff --git a/docs/usage/forcing_response_format.md b/docs/usage/forcing_response_format.md
index d6dfc5c..d9bfbdf 100644
--- a/docs/usage/forcing_response_format.md
+++ b/docs/usage/forcing_response_format.md
@@ -7,43 +7,46 @@ it. You just need to provide a data model that describes the desired output form
 As an output you get string already parsed to a provided data model class.
 
 Here's how to use this functionality step by step:
+
 1. Define the desired output data model class. It needs to inherit from pydantic `BaseModel`. Each field should have
 type defined and a description provided in `Field()` which should describe what given field means. By providing accurate
 description, you make it easier for the model to generate proper response.
-    ```python
-    import typing
+
+```python
+import typing
     
-    from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field
     
-    class ReviewOutputDataModel(BaseModel):
-        summary: str = Field(description="Summary of a product description")
-        should_buy: bool = Field(description="Recommendation whether I should buy the product or not")
-        brand_name: str = Field(description="Brand of the coffee")
-        aroma:str = Field(description="Description of the coffee aroma")
-        cons: typing.List[str] = Field(description="List of cons of the coffee")
-    ```
+class ReviewOutputDataModel(BaseModel):
+    summary: str = Field(description="Summary of a product description")
+    should_buy: bool = Field(description="Recommendation whether I should buy the product or not")
+    brand_name: str = Field(description="Brand of the coffee")
+    aroma:str = Field(description="Description of the coffee aroma")
+    cons: typing.List[str] = Field(description="List of cons of the coffee")
+```
 
 2. Provide the data model class together with prompt and input data to the `.generate()` method. `llm-wrapper` will 
 automatically force the model to output the data in the provided format and will parse the string returned from the
 model to the provided data model class.
 
- ```python
-    review = "Marketing is doing its job and I was tempted too, but this Blue Orca coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend."
+```python
+review = "Marketing is doing its job and I was tempted too, but this Blue Orca coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend."
     
-    prompt = "Summarize review of the coffee. Review: {review}"
-    input_data = [
-        InputData(input_mappings={"review": review}, id="0")
-    ]
+prompt = "Summarize review of the coffee. Review: {review}"
+input_data = [
+    InputData(input_mappings={"review": review}, id="0")
+]
     
-    responses = model.generate(
-        prompt=prompt, 
-        input_data=input_data,
-        output_data_model_class=ReviewOutputDataModel
-    )
-    response = responses[0].response
+responses = model.generate(
+    prompt=prompt, 
+    input_data=input_data,
+    output_data_model_class=ReviewOutputDataModel
+)
+response = responses[0].response
 ```
 
 Now we can check the response:
+
 ```python
 >>> type(response)
 ReviewOutputDataModel
@@ -74,6 +77,7 @@ to use them in the JSON example.
 ## How forcing response format works under the hood?
 To force the model to provide output in a desired format, under the hood `llm-wrapper` automatically adds a description
 of the desired output format. For example, for the `ReviewOutputDataModel` the description looks like this:
+
 ````text
 The output should be formatted as a JSON instance that conforms to the JSON schema below.