about summary refs log tree commit diff stats
path: root/tree-sitter-yts/src/tree_sitter/parser.h
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2024-12-14 12:29:47 +0100
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2024-12-14 12:29:47 +0100
commita9a4ba60a37eb351d3ed872f1cfcf53e673be9ef (patch)
tree59ab6fe348e7e42c10c33799b5ff51433dc16842 /tree-sitter-yts/src/tree_sitter/parser.h
parentfix(yt_dlp/wrappers/info_json): Add missing fields to `Subtitle` (diff)
downloadyt-a9a4ba60a37eb351d3ed872f1cfcf53e673be9ef.zip
feat(tree-sitter-yts): Update to new tree-sitter version & improve parsing
Diffstat (limited to 'tree-sitter-yts/src/tree_sitter/parser.h')
-rw-r--r--tree-sitter-yts/src/tree_sitter/parser.h433
1 files changed, 229 insertions, 204 deletions
diff --git a/tree-sitter-yts/src/tree_sitter/parser.h b/tree-sitter-yts/src/tree_sitter/parser.h
index 433fdf0..fab4621 100644
--- a/tree-sitter-yts/src/tree_sitter/parser.h
+++ b/tree-sitter-yts/src/tree_sitter/parser.h
@@ -14,240 +14,265 @@
 #define TREE_SITTER_PARSER_H_
 
 #ifdef __cplusplus
-extern "C"
-{
+extern "C" {
 #endif
 
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
 
-#define ts_builtin_sym_error ((TSSymbol) - 1)
+#define ts_builtin_sym_error ((TSSymbol)-1)
 #define ts_builtin_sym_end 0
 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
 
-  typedef uint16_t TSStateId;
-
 #ifndef TREE_SITTER_API_H_
-  typedef uint16_t TSSymbol;
-  typedef uint16_t TSFieldId;
-  typedef struct TSLanguage TSLanguage;
+typedef uint16_t TSStateId;
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
 #endif
 
-  typedef struct
-  {
-    TSFieldId field_id;
-    uint8_t child_index;
-    bool inherited;
-  } TSFieldMapEntry;
-
-  typedef struct
-  {
-    uint16_t index;
-    uint16_t length;
-  } TSFieldMapSlice;
-
-  typedef struct
-  {
-    bool visible;
-    bool named;
-    bool supertype;
-  } TSSymbolMetadata;
-
-  typedef struct TSLexer TSLexer;
-
-  struct TSLexer
-  {
-    int32_t lookahead;
-    TSSymbol result_symbol;
-    void (*advance) (TSLexer *, bool);
-    void (*mark_end) (TSLexer *);
-    uint32_t (*get_column) (TSLexer *);
-    bool (*is_at_included_range_start) (const TSLexer *);
-    bool (*eof) (const TSLexer *);
-  };
-
-  typedef enum
-  {
-    TSParseActionTypeShift,
-    TSParseActionTypeReduce,
-    TSParseActionTypeAccept,
-    TSParseActionTypeRecover,
-  } TSParseActionType;
-
-  typedef union
-  {
-    struct
-    {
-      uint8_t type;
-      TSStateId state;
-      bool extra;
-      bool repetition;
-    } shift;
-    struct
-    {
-      uint8_t type;
-      uint8_t child_count;
-      TSSymbol symbol;
-      int16_t dynamic_precedence;
-      uint16_t production_id;
-    } reduce;
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+  void (*log)(const TSLexer *, const char *, ...);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
     uint8_t type;
-  } TSParseAction;
-
-  typedef struct
-  {
-    uint16_t lex_state;
-    uint16_t external_lex_state;
-  } TSLexMode;
-
-  typedef union
-  {
-    TSParseAction action;
-    struct
-    {
-      uint8_t count;
-      bool reusable;
-    } entry;
-  } TSParseActionEntry;
-
-  struct TSLanguage
-  {
-    uint32_t version;
-    uint32_t symbol_count;
-    uint32_t alias_count;
-    uint32_t token_count;
-    uint32_t external_token_count;
-    uint32_t state_count;
-    uint32_t large_state_count;
-    uint32_t production_id_count;
-    uint32_t field_count;
-    uint16_t max_alias_sequence_length;
-    const uint16_t *parse_table;
-    const uint16_t *small_parse_table;
-    const uint32_t *small_parse_table_map;
-    const TSParseActionEntry *parse_actions;
-    const char *const *symbol_names;
-    const char *const *field_names;
-    const TSFieldMapSlice *field_map_slices;
-    const TSFieldMapEntry *field_map_entries;
-    const TSSymbolMetadata *symbol_metadata;
-    const TSSymbol *public_symbol_map;
-    const uint16_t *alias_map;
-    const TSSymbol *alias_sequences;
-    const TSLexMode *lex_modes;
-    bool (*lex_fn) (TSLexer *, TSStateId);
-    bool (*keyword_lex_fn) (TSLexer *, TSStateId);
-    TSSymbol keyword_capture_token;
-    struct
-    {
-      const bool *states;
-      const TSSymbol *symbol_map;
-      void *(*create) (void);
-      void (*destroy) (void *);
-      bool (*scan) (void *, TSLexer *, const bool *symbol_whitelist);
-      unsigned (*serialize) (void *, char *);
-      void (*deserialize) (void *, const char *, unsigned);
-    } external_scanner;
-    const TSStateId *primary_state_ids;
-  };
-
-  /*
-   *  Lexer Macros
-   */
-
-#define START_LEXER()                                                         \
-  bool result = false;                                                        \
-  bool skip = false;                                                          \
-  bool eof = false;                                                           \
-  int32_t lookahead;                                                          \
-  goto start;                                                                 \
-  next_state:                                                                 \
-  lexer->advance (lexer, skip);                                               \
-  start:                                                                      \
-  skip = false;                                                               \
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+typedef struct {
+  int32_t start;
+  int32_t end;
+} TSCharacterRange;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
+  uint32_t index = 0;
+  uint32_t size = len - index;
+  while (size > 1) {
+    uint32_t half_size = size / 2;
+    uint32_t mid_index = index + half_size;
+    TSCharacterRange *range = &ranges[mid_index];
+    if (lookahead >= range->start && lookahead <= range->end) {
+      return true;
+    } else if (lookahead > range->end) {
+      index = mid_index;
+    }
+    size -= half_size;
+  }
+  TSCharacterRange *range = &ranges[index];
+  return (lookahead >= range->start && lookahead <= range->end);
+}
+
+/*
+ *  Lexer Macros
+ */
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  UNUSED                        \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
   lookahead = lexer->lookahead;
 
-#define ADVANCE(state_value)                                                  \
-  {                                                                           \
-    state = state_value;                                                      \
-    goto next_state;                                                          \
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
   }
 
-#define SKIP(state_value)                                                     \
-  {                                                                           \
-    skip = true;                                                              \
-    state = state_value;                                                      \
-    goto next_state;                                                          \
+#define ADVANCE_MAP(...)                                              \
+  {                                                                   \
+    static const uint16_t map[] = { __VA_ARGS__ };                    \
+    for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \
+      if (map[i] == lookahead) {                                      \
+        state = map[i + 1];                                           \
+        goto next_state;                                              \
+      }                                                               \
+    }                                                                 \
   }
 
-#define ACCEPT_TOKEN(symbol_value)                                            \
-  result = true;                                                              \
-  lexer->result_symbol = symbol_value;                                        \
-  lexer->mark_end (lexer);
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
 
 #define END_STATE() return result;
 
-  /*
-   *  Parse Table Macros
-   */
+/*
+ *  Parse Table Macros
+ */
 
-#define SMALL_STATE(id) id - LARGE_STATE_COUNT
+#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
 
 #define STATE(id) id
 
 #define ACTIONS(id) id
 
-#define SHIFT(state_value)                                                    \
-  {                                                                           \
-    {                                                                         \
-      .shift = {.type = TSParseActionTypeShift, .state = state_value }        \
-    }                                                                         \
-  }
-
-#define SHIFT_REPEAT(state_value)                                             \
-  {                                                                           \
-    {                                                                         \
-      .shift                                                                  \
-          = {.type = TSParseActionTypeShift,                                  \
-             .state = state_value,                                            \
-             .repetition = true }                                             \
-    }                                                                         \
-  }
-
-#define SHIFT_EXTRA()                                                         \
-  {                                                                           \
-    {                                                                         \
-      .shift = {.type = TSParseActionTypeShift, .extra = true }               \
-    }                                                                         \
-  }
-
-#define REDUCE(symbol_val, child_count_val, ...)                              \
-  {                                                                           \
-    {                                                                         \
-      .reduce = { .type = TSParseActionTypeReduce,                            \
-                  .symbol = symbol_val,                                       \
-                  .child_count = child_count_val,                             \
-                  __VA_ARGS__ },                                              \
-    }                                                                         \
-  }
-
-#define RECOVER()                                                             \
-  {                                                                           \
-    {                                                                         \
-      .type = TSParseActionTypeRecover                                        \
-    }                                                                         \
-  }
-
-#define ACCEPT_INPUT()                                                        \
-  {                                                                           \
-    {                                                                         \
-      .type = TSParseActionTypeAccept                                         \
-    }                                                                         \
-  }
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value)          \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value),         \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_name, children, precedence, prod_id) \
+  {{                                                       \
+    .reduce = {                                            \
+      .type = TSParseActionTypeReduce,                     \
+      .symbol = symbol_name,                               \
+      .child_count = children,                             \
+      .dynamic_precedence = precedence,                    \
+      .production_id = prod_id                             \
+    },                                                     \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif // TREE_SITTER_PARSER_H_
+#endif  // TREE_SITTER_PARSER_H_