Skip to content

Commit

Permalink
Introduce a customizable and flexible system role
Browse files Browse the repository at this point in the history
  • Loading branch information
igorlima committed Feb 17, 2025
1 parent 295e2a6 commit 4c42b81
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 7 deletions.
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,14 @@ kwargs = {}

## system prompt to use for the vision model
custom_system_prompt = None

# to override
# custom_system_prompt = "For the below pdf page, do something..something..." ## example

## system role to use for the vision model
custom_role = None
# to override
# custom_role = "user" ## example

###################### Example for OpenAI ######################
model = "gpt-4o-mini" ## openai model
os.environ["OPENAI_API_KEY"] = "" ## your-api-key
Expand Down Expand Up @@ -236,7 +240,8 @@ async def main():

output_dir = "./output_test" ## directory to save the consolidated markdown file
result = await zerox(file_path=file_path, model=model, output_dir=output_dir,
custom_system_prompt=custom_system_prompt,select_pages=select_pages, **kwargs)
custom_system_prompt=custom_system_prompt, select_pages=select_pages,
custom_role=custom_role, **kwargs)
return result


Expand All @@ -259,6 +264,7 @@ async def zerox(
output_dir: Optional[str] = None,
temp_dir: Optional[str] = None,
custom_system_prompt: Optional[str] = None,
custom_role: Optional[str] = None,
select_pages: Optional[Union[int, Iterable[int]]] = None,
**kwargs
) -> ZeroxOutput:
Expand All @@ -283,7 +289,9 @@ Parameters
- **temp_dir** (str, optional):
The directory to store temporary files, defaults to some named folder in system's temp directory. If already exists, the contents will be deleted before zerox uses it.
- **custom_system_prompt** (str, optional):
The system prompt to use for the model, this overrides the default system prompt of zerox.Generally it is not required unless you want some specific behaviour. When set, it will raise a friendly warning. Defaults to None.
The system prompt to use for the model, this overrides the default system prompt of zerox. Generally it is not required unless you want some specific behaviour. When set, it will raise a friendly warning. Defaults to None.
- **custom_role** (str, optional):
The role assigned to the model can be customized, overriding the default system role. Typically, this isn't necessary unless you need to specify a particular role for a given LLM. If you choose to set it, a friendly warning will be displayed. By default, this option is set to None.
- **select_pages** (Optional[Union[int, Iterable[int]]], optional):
Pages to process, can be a single page number or an iterable of page numbers, Defaults to None
- **kwargs** (dict, optional):
Expand Down
4 changes: 4 additions & 0 deletions py_zerox/pyzerox/constants/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ class Messages:
Custom system prompt was provided which overrides the default system prompt. We assume that you know what you are doing.
"""

CUSTOM_SYSTEM_ROLE_WARNING = """
Custom system role was provided which overrides the default system role. We assume that you know what you are doing.\
"""

MAINTAIN_FORMAT_SELECTED_PAGES_WARNING = """
The maintain_format flag is set to True in conjunction with select_pages input given. This may result in unexpected behavior.
"""
Expand Down
2 changes: 1 addition & 1 deletion py_zerox/pyzerox/constants/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ class Prompts:
Convert the following PDF page to markdown.
Return only the markdown with no explanation text.
Do not exclude any content from the page.
"""
"""
4 changes: 4 additions & 0 deletions py_zerox/pyzerox/constants/roles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class Roles:
"""Class for storing roles for the Zerox system."""

DEFAULT_SYSTEM_ROLE = "system"
1 change: 1 addition & 0 deletions py_zerox/pyzerox/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class ZeroxArgs:
output_dir: Optional[str] = None
temp_dir: Optional[str] = None
custom_system_prompt: Optional[str] = None
custom_role: Optional[str] = None
select_pages: Optional[Union[int, Iterable[int]]] = None
kwargs: Dict[str, Any] = field(default_factory=dict)

Expand Down
9 changes: 8 additions & 1 deletion py_zerox/pyzerox/core/zerox.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ async def zerox(
output_dir: Optional[str] = None,
temp_dir: Optional[str] = None,
custom_system_prompt: Optional[str] = None,
custom_role: Optional[str] = None,
select_pages: Optional[Union[int, Iterable[int]]] = None,
**kwargs
) -> ZeroxOutput:
Expand All @@ -57,6 +58,8 @@ async def zerox(
:type temp_dir: str, optional
:param custom_system_prompt: The system prompt to use for the model, this overrides the default system prompt of zerox. Generally it is not required unless you want some specific behaviour. When set, it will raise a friendly warning, defaults to None
:type custom_system_prompt: str, optional
:param custom_role: The role assigned to the model can be customized, overriding the default system role. Typically, this isn't necessary unless you need to specify a particular role for a given LLM. If you choose to set it, a friendly warning will be displayed. By default, this option is set to None.
:type custom_role: str, optional
:param select_pages: Pages to process, can be a single page number or an iterable of page numbers, defaults to None
:type select_pages: int or Iterable[int], optional
Expand All @@ -82,6 +85,10 @@ async def zerox(
if custom_system_prompt:
vision_model.system_prompt = custom_system_prompt

# override the system role if a custom role is provided
if custom_role:
vision_model.system_role = custom_role

# Check if both maintain_format and select_pages are provided
if maintain_format and select_pages is not None:
warnings.warn(Messages.MAINTAIN_FORMAT_SELECTED_PAGES_WARNING)
Expand Down Expand Up @@ -199,4 +206,4 @@ async def zerox(
input_tokens=input_token_count,
output_tokens=output_token_count,
pages=formatted_pages,
)
)
21 changes: 19 additions & 2 deletions py_zerox/pyzerox/models/modellitellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@
from ..errors import ModelAccessError, NotAVisionModel, MissingEnvironmentVariables
from ..constants.messages import Messages
from ..constants.prompts import Prompts
from ..constants.roles import Roles
from ..processor.image import encode_image_to_base64

DEFAULT_SYSTEM_PROMPT = Prompts.DEFAULT_SYSTEM_PROMPT
DEFAULT_SYSTEM_ROLE = Roles.DEFAULT_SYSTEM_ROLE


class litellmmodel(BaseModel):
## setting the default system prompt
_system_prompt = DEFAULT_SYSTEM_PROMPT
_system_role = DEFAULT_SYSTEM_ROLE

def __init__(
self,
Expand All @@ -41,6 +44,11 @@ def __init__(
def system_prompt(self) -> str:
'''Returns the system prompt for the model.'''
return self._system_prompt

@property
def system_role(self) -> str:
'''Returns the system role for the model.'''
return self._system_role

@system_prompt.setter
def system_prompt(self, prompt: str) -> None:
Expand All @@ -49,6 +57,15 @@ def system_prompt(self, prompt: str) -> None:
'''
self._system_prompt = prompt

@system_role.setter
def system_role(self, role: str) -> None:
'''
Sets/overrides the system role for the model.
Will raise a friendly warning to notify the user.
'''
warnings.warn(f"{Messages.CUSTOM_SYSTEM_ROLE_WARNING}. Default role for zerox is: {DEFAULT_SYSTEM_ROLE}")
self._system_role = role

## custom method on top of BaseModel
def validate_environment(self) -> None:
"""Validates the environment variables required for the model."""
Expand Down Expand Up @@ -123,7 +140,7 @@ async def _prepare_messages(
# Default system message
messages: List[Dict[str, Any]] = [
{
"role": "system",
"role": self._system_role,
"content": self._system_prompt,
},
]
Expand All @@ -133,7 +150,7 @@ async def _prepare_messages(
if maintain_format and prior_page:
messages.append(
{
"role": "system",
"role": self._system_role,
"content": f'Markdown must maintain consistent formatting with the following page: \n\n """{prior_page}"""',
},
)
Expand Down

0 comments on commit 4c42b81

Please sign in to comment.