commit 807527310d6d985dc147a759d0da0a3a147058a7 Author: Benedek László Date: Fri Apr 12 14:50:11 2024 +0200 init diff --git a/classfile.c b/classfile.c new file mode 100644 index 0000000..470616a --- /dev/null +++ b/classfile.c @@ -0,0 +1,196 @@ +#include "classfile.h" +#include +#include +#include +#include + +#define SWAP_ENDIANNESS + +void read_u2(void *ptr, size_t n, FILE *file) { + u2 buffer[n]; + fread(&buffer, sizeof(u2), n, file); +#ifdef SWAP_ENDIANNESS + for (size_t i = 0; i < n; i++) { + buffer[i] = (buffer[i] >> 8) | (buffer[i] << 8); + } +#endif + memcpy(ptr, &buffer, n*sizeof(u2)); +} + +void read_u4(void *ptr, size_t n, FILE *file) { + u4 buffer[n]; + fread(&buffer, sizeof(u2), n, file); +#ifdef SWAP_ENDIANNESS + for (size_t i = 0; i < n; i++) { + buffer[i] = ((buffer[i] >> 24) & 0xff) | // move byte 3 to byte 0 + ((buffer[i] << 8) & 0xff0000) | // move byte 1 to byte 2 + ((buffer[i] >> 8) & 0xff00) | // move byte 2 to byte 1 + ((buffer[i] << 24) & 0xff000000); + } +#endif + memcpy(ptr, &buffer, n*sizeof(u4)); +} + +ClassFile *ClassFile_load(const char *path) { + FILE *file = fopen(path, "rb"); + if (file == NULL) + return NULL; + + ClassFile *cf = (ClassFile *)malloc(sizeof(ClassFile)); + + // magic 4bytes, minor_version 2bytes, major_version 2bytes, + // constant_pool_count 2bytes + fread(&(cf->magic), 4 + 2 + 2 + 2, 1, file); + + // constant_pool + cf->constant_pool = + (cp_info *)malloc(sizeof(cp_info) * (cf->constant_pool_count - 1)); + for (u2 i = 0; i < (cf->constant_pool_count - 1); i++) { + // cp_info.tag 2bytes + fread(&(cf->constant_pool[i].tag), 2, 1, file); + size_t size; + switch (cf->constant_pool[i].tag) { + case CONSTANT_Class: + size = sizeof(CONSTANT_Class_info); + break; + case CONSTANT_Fieldref: + size = sizeof(CONSTANT_Fieldref_info); + break; + case CONSTANT_Methodref: + size = sizeof(CONSTANT_Methodref_info); + break; + case CONSTANT_InterfaceMethodref: + size = sizeof(CONSTANT_InterfaceMethodref_info); + break; + case CONSTANT_String: + size = sizeof(CONSTANT_String_info); + break; + case CONSTANT_Integer: + size = sizeof(CONSTANT_Integer_info); + break; + case CONSTANT_Float: + size = sizeof(CONSTANT_Float_info); + break; + case CONSTANT_Long: + size = sizeof(CONSTANT_Long_info); + break; + case CONSTANT_Double: + size = sizeof(CONSTANT_Double_info); + break; + case CONSTANT_NameAndType: + size = sizeof(CONSTANT_NameAndType_info); + break; + case CONSTANT_Utf8: + size = sizeof(CONSTANT_Utf8_info); + break; + case CONSTANT_MethodHandle: + size = sizeof(CONSTANT_MethodHandle_info); + break; + case CONSTANT_MethodType: + size = sizeof(CONSTANT_MethodType_info); + break; + case CONSTANT_Dynamic: + size = sizeof(CONSTANT_Dynamic_info); + break; + case CONSTANT_InvokeDynamic: + size = sizeof(CONSTANT_InvokeDynamic_info); + break; + case CONSTANT_Module: + size = sizeof(CONSTANT_Module_info); + break; + case CONSTANT_Package: + size = sizeof(CONSTANT_Package_info); + break; + } + + cf->constant_pool[i].info = (void *)malloc(size); + fread(cf->constant_pool[i].info, size, 1, file); + } + + // access_flags 2bytes, this_class 2bytes, super_class 2bytes, interface_count + // 2bytes + fread(&(cf->access_flags), 2 + 2 + 2 + 2, 1, file); + + // interfaces + cf->interfaces = (u2 *)malloc(2 * cf->interfaces_count); + fread(cf->interfaces, 2 * cf->interfaces_count, 1, file); + + // fields_count 2bytes + fread(&(cf->fields_count), 2, 1, file); + + // fields + cf->fields = (field_info *)malloc(sizeof(field_info) * cf->fields_count); + for (u2 i = 0; i < cf->fields_count; i++) { + // access_flags 2bytes, name_index 2bytes, descriptor_index 2bytes, + // attributes_count 2bytes + fread(&(cf->fields[i].access_flags), 2 + 2 + 2 + 2, 1, file); + cf->fields[i].attributes = (attribute_info *)malloc( + sizeof(attribute_info) * cf->fields[i].attributes_count); + for (u2 j = 0; j < cf->fields[i].attributes_count; j++) { + // attribute_name_index 2bytes, attribute_length 4bytes + fread(&(cf->fields[i].attributes[j].attribute_name_index), 2 + 4, 1, + file); + cf->fields[i].attributes[j].info = + (u1 *)malloc(cf->fields[i].attributes[j].attribute_length); + fread(cf->fields[i].attributes[j].info, + cf->fields[i].attributes[j].attribute_length, 1, file); + } + } + + // methods_count 2bytes + fread(&(cf->methods_count), 2, 1, file); + + // methods + cf->methods = (method_info *)malloc(sizeof(method_info) * cf->methods_count); + for (u2 i = 0; i < cf->methods_count; i++) { + // access_flags 2bytes, name_index 2bytes, descriptor_index 2bytes, + // attributes_count 2bytes + fread(&(cf->methods[i].access_flags), 2 + 2 + 2 + 2, 1, file); + cf->methods[i].attributes = (attribute_info *)malloc( + sizeof(attribute_info) * cf->methods[i].attributes_count); + for (u2 j = 0; j < cf->methods[i].attributes_count; j++) { + // attribute_name_index 2bytes, attribute_length 4bytes + fread(&(cf->methods[i].attributes[j].attribute_name_index), 2 + 4, 1, + file); + cf->methods[i].attributes[j].info = + (u1 *)malloc(cf->methods[i].attributes[j].attribute_length); + fread(cf->methods[i].attributes[j].info, + cf->methods[i].attributes[j].attribute_length, 1, file); + } + } + + // attributes_count 2bytes + fread(&(cf->attributes_count), 2, 1, file); + + // attributes + cf->attributes = + (attribute_info *)malloc(sizeof(attribute_info) * cf->attributes_count); + for (u2 i = 0; i < cf->attributes_count; i++) { + // attribute_name_index 2bytes, attribute_length 4bytes + fread(&(cf->attributes[i].attribute_name_index), 2 + 4, 1, file); + cf->attributes[i].info = (u1 *)malloc(cf->attributes[i].attribute_length); + fread(cf->attributes[i].info, cf->attributes[i].attribute_length, 1, file); + } + + fclose(file); + + return cf; +} + +void ClassFile_info(const ClassFile *cf) { + printf("magic=%X\n" + "minor_version=%hu\n" + "major_version=%hu\n" + "constant_pool_count=%hu\n" + "access_flags=%hu\n" + "this_class=%hu\n" + "super_class=%hu\n" + "interfaces_count=%hu\n" + "fields_count=%hu\n" + "methods_count=%hu\n" + "attributes_count=%hu\n", + cf->magic, cf->minor_version, cf->major_version, + cf->constant_pool_count, cf->access_flags, cf->this_class, + cf->super_class, cf->interfaces_count, cf->fields_count, + cf->methods_count, cf->attributes_count); +} \ No newline at end of file diff --git a/classfile.h b/classfile.h new file mode 100644 index 0000000..abdb419 --- /dev/null +++ b/classfile.h @@ -0,0 +1,174 @@ +// types for Java's Classfile structure +// https://docs.oracle.com/javase/specs/jvms/se22/html/jvms-4.html + +#include + +#pragma pack(1) + +typedef uint8_t u1; +typedef uint16_t u2; +typedef uint32_t u4; + +typedef enum { + ACC_PUBLIC = 0x0001, + ACC_PRIVATE = 0x0002, + ACC_PROTECTED = 0x0004, + ACC_STATIC = 0x0008, + ACC_FINAL = 0x0010, + ACC_SUPER = 0x0020, + ACC_SYNCHRONIZED = 0x0020, + ACC_VOLATLE = 0x0040, + ACC_BRIDGE = 0x0040, + ACC_TRANSIENT = 0x0080, + ACC_VARARGS = 0x0080, + ACC_NATIVE = 0x0100, + ACC_INTERFACE = 0x0200, + ACC_ABSTRACT = 0x0400, + ACC_STRICT = 0x0800, + ACC_SYNTHETIC = 0x1000, + ACC_ANNOTATION = 0x2000, + ACC_ENUM = 0x4000, + ACC_MODULE = 0x8000, +} ACCESS_FLAGS; + +typedef enum { + CONSTANT_Class = 7, + CONSTANT_Fieldref = 9, + CONSTANT_Methodref = 10, + CONSTANT_InterfaceMethodref = 11, + CONSTANT_String = 8, + CONSTANT_Integer = 3, + CONSTANT_Float = 4, + CONSTANT_Long = 5, + CONSTANT_Double = 6, + CONSTANT_NameAndType = 12, + CONSTANT_Utf8 = 1, + CONSTANT_MethodHandle = 15, + CONSTANT_MethodType = 16, + CONSTANT_Dynamic = 17, + CONSTANT_InvokeDynamic = 18, + CONSTANT_Module = 19, + CONSTANT_Package = 20, +} CONSTANT_TAGS; + +typedef struct { + u1 tag; + void *info; +} cp_info; + +typedef struct { + u1 tag; + u2 name_index; +} CONSTANT_Class_info; + +typedef struct { + u1 tag; + u2 class_index; + u2 name_and_type_index; +} CONSTANT_Fieldref_info; +typedef CONSTANT_Fieldref_info CONSTANT_Methodref_info; +typedef CONSTANT_Fieldref_info CONSTANT_InterfaceMethodref_info; + +typedef struct { + u1 tag; + u2 string_index; +} CONSTANT_String_info; + +typedef struct { + u1 tag; + u4 bytes; +} CONSTANT_Integer_info; +typedef CONSTANT_Integer_info CONSTANT_Float_info; + +typedef struct { + u1 tag; + u4 high_bytes; + u4 low_bytes; +} CONSTANT_Long_info; +typedef CONSTANT_Long_info CONSTANT_Double_info; + +typedef struct { + u1 tag; + u2 name_index; + u2 descriptor_index; +} CONSTANT_NameAndType_info; + +typedef struct { + u1 tag; + u2 length; + u1 *bytes; +} CONSTANT_Utf8_info; + +typedef struct { + u1 tag; + u1 reference_kind; + u2 reference_index; +} CONSTANT_MethodHandle_info; + +typedef struct { + u1 tag; + u2 descriptor_index; +} CONSTANT_MethodType_info; + +typedef struct { + u1 tag; + u2 bootstrap_method_attr_index; + u2 name_and_type_index; +} CONSTANT_Dynamic_info; +typedef CONSTANT_Dynamic_info CONSTANT_InvokeDynamic_info; + +typedef struct { + u1 tag; + u2 name_index; +} CONSTANT_Module_info; + +typedef struct { + u1 tag; + u2 name_index; +} CONSTANT_Package_info; + +typedef struct { + u2 attribute_name_index; + u4 attribute_length; + u1 *info; +} attribute_info; + +typedef struct { + u2 access_flags; + u2 name_index; + u2 descriptor_index; + u2 attributes_count; + attribute_info *attributes; +} field_info; + +typedef struct { + u2 access_flags; + u2 name_index; + u2 descriptor_index; + u2 attributes_count; + attribute_info *attributes; +} method_info; + +typedef struct { + u4 magic; + u2 minor_version; + u2 major_version; + u2 constant_pool_count; + cp_info *constant_pool; + u2 access_flags; + u2 this_class; + u2 super_class; + u2 interfaces_count; + u2 *interfaces; + u2 fields_count; + field_info *fields; + u2 methods_count; + method_info *methods; + u2 attributes_count; + attribute_info *attributes; +} ClassFile; + +ClassFile *ClassFile_load(const char *path); +void ClassFile_info(const ClassFile *cf); + +#pragma pack() \ No newline at end of file diff --git a/main.c b/main.c new file mode 100644 index 0000000..dbdbbc0 --- /dev/null +++ b/main.c @@ -0,0 +1,6 @@ +#include "classfile.h" + +int main(int argc, char **argv) { + ClassFile *cf = ClassFile_load("test/Main.class"); + ClassFile_info(cf); +} \ No newline at end of file