Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for include_directive in C #57

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions api/analyzers/c/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,50 @@ def process_struct_specifier(self, parent: File, node: Node, path: Path,
# Connect parent to entity
graph.connect_entities('DEFINES', parent.id, entity.id)

def process_include_directive(self, parent: File, node: Node, path: Path, graph: Graph) -> None:
"""
Processes an include directive node to create an edge between files.

Args:
parent (File): The parent File object.
node (Node): The AST node representing the include directive.
path (Path): The file path where the include directive is found.
graph (Graph): The Graph object to which the file entities and edges will be added.

Returns:
None
"""

assert(node.type == 'system_lib_string' or node.type == 'string_literal')


try:
included_file_path = node.text.decode('utf-8').strip('"<>')
if not included_file_path:
logger.warning("Empty include path found in %s", path)
return

# Normalize and validate path
normalized_path = os.path.normpath(included_file_path)
except UnicodeDecodeError as e:
logger.error("Failed to decode include path in %s: %s", path, e)
return

Comment on lines +335 to +346
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Enhance path validation for security.

While the path normalization is implemented, it should be enhanced to prevent path traversal attacks and validate system includes differently from local includes.

 try:
     included_file_path = node.text.decode('utf-8').strip('"<>')
     if not included_file_path:
         logger.warning("Empty include path found in %s", path)
         return
     
-    # Normalize and validate path
-    normalized_path = os.path.normpath(included_file_path)
+    # Handle system includes and local includes differently
+    is_system_include = node.type == 'system_lib_string'
+    normalized_path = os.path.normpath(included_file_path)
+    
+    if is_system_include:
+        # System includes should be searched in system include paths
+        if '/' in normalized_path:
+            logger.warning("System include with path separator in %s: %s", 
+                         path, included_file_path)
+            return
+    else:
+        # Local includes should not traverse outside the project
+        if normalized_path.startswith('..') or normalized_path.startswith('/'):
+            logger.warning("Path traversal attempt in %s: %s", 
+                         path, included_file_path)
+            return
 except UnicodeDecodeError as e:
     logger.error("Failed to decode include path in %s: %s", path, e)
     return
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
included_file_path = node.text.decode('utf-8').strip('"<>')
if not included_file_path:
logger.warning("Empty include path found in %s", path)
return
# Normalize and validate path
normalized_path = os.path.normpath(included_file_path)
except UnicodeDecodeError as e:
logger.error("Failed to decode include path in %s: %s", path, e)
return
try:
included_file_path = node.text.decode('utf-8').strip('"<>')
if not included_file_path:
logger.warning("Empty include path found in %s", path)
return
# Handle system includes and local includes differently
is_system_include = node.type == 'system_lib_string'
normalized_path = os.path.normpath(included_file_path)
if is_system_include:
# System includes should be searched in system include paths
if '/' in normalized_path:
logger.warning("System include with path separator in %s: %s",
path, included_file_path)
return
else:
# Local includes should not traverse outside the project
if normalized_path.startswith('..') or normalized_path.startswith('/'):
logger.warning("Path traversal attempt in %s: %s",
path, included_file_path)
return
except UnicodeDecodeError as e:
logger.error("Failed to decode include path in %s: %s", path, e)
return

splitted = os.path.splitext(normalized_path)
if len(splitted) < 2:
logger.warning("Include path has no extension: %s", included_file_path)
return

# Create file entity for the included file
path = os.path.dirname(normalized_path)
name = os.path.basename(normalized_path)
ext = splitted[1]
included_file = File(path, name, ext)
graph.add_file(included_file)

# Connect the parent file to the included file
graph.connect_entities('INCLUDES', parent.id, included_file.id)

def first_pass(self, path: Path, f: io.TextIOWrapper, graph:Graph) -> None:
"""
Perform the first pass processing of a C source file or header file.
Expand Down Expand Up @@ -388,6 +432,15 @@ def first_pass(self, path: Path, f: io.TextIOWrapper, graph:Graph) -> None:
for node in structs:
self.process_struct_specifier(file, node, path, graph)

# Process include directives
query = C_LANGUAGE.query("(preproc_include [(string_literal) (system_lib_string)] @include)")
captures = query.captures(tree.root_node)

if 'include' in captures:
includes = captures['include']
for node in includes:
self.process_include_directive(file, node, path, graph)

def second_pass(self, path: Path, f: io.TextIOWrapper, graph: Graph) -> None:
"""
Perform the second pass processing of a C source file or header file to establish function call relationships.
Expand Down
8 changes: 8 additions & 0 deletions tests/test_c_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,11 @@ def test_analyzer(self):
self.assertIn('add', callers)
self.assertIn('main', callers)

# Test for include_directive edge creation
included_file = g.get_file('', 'myheader.h', '.h')
self.assertIsNotNone(included_file)

includes = g.get_neighbors([f.id], rel='INCLUDES')
self.assertEqual(len(includes), 3)
included_files = [node['properties']['name'] for node in includes['nodes']]
gkorland marked this conversation as resolved.
Show resolved Hide resolved
self.assertIn('myheader.h', included_files)
Loading