
#include "plugin_base.h"
Plugin_api api = {0};
Plugin_file_format supported_formats [] = {
	{.ext=S("bmp")},
	{.ext=S("png")},
	{.ext=S("apng")},
	{.ext=S("jpg")},
	{.ext=S("jpeg")},
	{.ext=S("psd")},
	{.ext=S("tga")},
	{.ext=S("ppm")},
	{.ext=S("pgm")},
};
void init_1 (Plugin_api* plugin_api, Plugin_info* out__info) {
	api = *plugin_api;
	*out__info = (Plugin_info){
		.priority = -1,
		.supported_formats_count = countof(supported_formats),
		.supported_formats = supported_formats,
	};
}

#include <string.h>
#include <stdio.h>
static inline void mem_copy (void* target, u64 bytecount, void* source) {
	memcpy(target, source, bytecount);
}

/*

This plugin will detect if PNG is animated, and then converts it into a non-animated PNG, which mostly just involves changing the image size and removing all the other frames. That converted image is then fed to stb_image.

PNG is made up of chunks, which consist of a u32 content length, 4-byte signature, chunk content, and then a u32 "CRC" value. CRC is generated from the chunk signature and content, so it needs to be re-generated if a chunk is modified.
The file starts with an 8-byte PNG signature (0x0A1A0A0D474E5089 or "\211PNG\r\n\032\n"), which is immediately followed by 'IHDR'.
- 'IHDR' (image header) contains the image dimensions and a bunch of other info about the format of the image pixels.
- 'acTL' (animation info) is before all frames including 'IDAT', and there's only one of them. The existence of this can be used to detect if the PNG is animated, if you find 'IDAT' before this then it's not animated.
- 'fcTL' (frame into) is the beginning of a frame, it has some information about the animation frame like duration and position/size. If this is before 'IDAT' then 'IDAT' is the first frame, if it's after 'IDAT' (i.e. before an 'fdAT') then 'IDAT' should be treated as a preview/fallback image and not part of the animation. The dimensions in 'IHDR' needs to be changed to the dimensions in the chosen 'fcTL'.
- 'IDAT' (image data) contains compressed data for the image's pixels, there may be multiple of these back-to-back.
- 'fdAT' (frame data) is identical to 'IDAT' (image data), except there's a useless u32 at the start. By removing the u32, you can just rename this into 'IDAT' to make this the default image.
- 'IEND' is the last chunk in the file, decoders are expected to treat it as end of the file, even if it isn't.

PNG integers are in big endian format, which is very very very very very very very very annoying.

Useless chunks that should be safe to discard for the purposes of APNG decoding:
- tEXt = Image description / metadata.
- zTXt = Compressed text.
- iTXt = International text.
- tIME = Modification time.
- sPLT = Suggested palette.
- pHYs = Physical pixel size / aspect ratio hint.
- bKGD = Suggested background color for transparent images. This isn't actually part of the image, it just suggests what background to put the image on.
- hIST = Palette frequency histogram.

https://www.libpng.org/pub/png/spec/1.2/PNG-Structure.html
https://wiki.mozilla.org/APNG_Specification

*/

#define endian_swap_16(val) __builtin_bswap16(val)
#define endian_swap_32(val) __builtin_bswap32(val)
#define endian_swap_64(val) __builtin_bswap64(val)

// These are PNG chunk signatures as 32-bit values. E.g. "acTL" -> u32_acTL
#define u32_acTL 0x4C546361
#define u32_bKGD 0x44474B62
#define u32_fcTL 0x4C546366
#define u32_fdAT 0x54416466
#define u32_hIST 0x54534968
#define u32_IDAT 0x54414449
#define u32_IEND 0x444E4549
#define u32_IHDR 0x52444849
#define u32_iTXt 0x74585469
#define u32_pHYs 0x73594870
#define u32_sPLT 0x544C5073
#define u32_tEXt 0x74584574
#define u32_tIME 0x454D4974
#define u32_zTXt 0x7458547A

#pragma pack(push,1)
typedef struct {
	u32 length;
	u32 signature;
} PNG_chunkhead;
typedef struct {
	u32 w;
	u32 h;
	u8 bit_depth;
	u8 color_type;
	u8 compression_method;
	u8 filter_method;
	u8 interlace_method;
} PNG_IHDR;
typedef struct {
	u32 num_frames;
	u32 num_plays;
} PNG_acTL;
typedef struct {
	u32 sequence_number;
	u32 w;
	u32 h;
	u32 x;
	u32 y;
	u16 delay_numerator; // Frame delay = numerator/denominator
	u16 delay_denominator;
	u8 dispose_op; // Type of frame area disposal to be done after rendering this frame
		// 0 = NONE, no disposal is done on this frame before rendering the next; the contents of the output buffer are left as is.
		// 1 = BACKGROUND, the frame's region of the output buffer is to be cleared to fully transparent black before rendering the next frame.
		// 2 = PREVIOUS, the frame's region of the output buffer is to be reverted to the previous contents before rendering the next frame.
		// "If the first fcTL chunk uses PREVIOUS it should be treated as BACKGROUND." (note: this might be nonsense that assumes the first frame (non-animated IDAT png) isn't considered to be a frame)
	u8 blend_op; // Type of frame area rendering for this frame 
		// 0 = SOURCE, replace the entire region, e.g. just overwrite transparent pixels
		// 1 = OVER, blend transparent pixels normally
} PNG_fcTL;
typedef struct {
	u32 sequence_number;
	u8 data [];
} PNG_fdAT;
typedef u32 PNG_crc;
#pragma pack(pop)

static PNG_crc generate_png_crc32 (void* data, u64 len) {
	// I just copy pasted this function because I have no interest understanding whatever this useless shit is. It's probably fine, worst thing that can happen here is that the value turns out wrong, and even that doesn't matter if stb_image doesn't mind.
	u8* c = data;
	PNG_crc crc = 0xFFFFFFFF;
	for (u64 i=0; i<len; i++) {
		crc ^= c[i];
		for (u64 k=0; k<8; k++) crc = (crc & 1) ? (crc >> 1) ^ 0xEDB88320 : crc >> 1;
	}
	return crc ^ 0xFFFFFFFF;
}
static Errnum analyze_png (u64 len, void* data, u64* out__width, u64* out__height, u64* out__frame_count, PNG_fcTL*** out__fcTLs) {
	u64 i = 0;
	#define REMAIN (len-i)
	inline u64 READ_U64 (void) { u64 value = *(u64*)(data+i); i += sizeof(u64); return value; }
	inline u32 READ_U32 (void) { u32 value = *(u32*)(data+i); i += sizeof(u32); return value; }
	inline u16 READ_U16 (void) { u16 value = *(u16*)(data+i); i += sizeof(u16); return value; }
	inline void* READ (u64 amount) { void* value = data+i; i += amount; return value; }
	#define READ_T(T) *(T*)READ(sizeof(T))
	
	if (len < 8 + 12+sizeof(PNG_IHDR) + 12) return 1;
	u64 png_signature = READ_U64();
	if (png_signature != 0x0A1A0A0D474E5089) return 2;
	PNG_chunkhead* ihdr_head = READ(sizeof(PNG_chunkhead));
		u32 ihdr_len = endian_swap_32(ihdr_head->length);
	if (ihdr_len < sizeof(PNG_IHDR) || ihdr_head->signature != u32_IHDR || REMAIN < ihdr_len+sizeof(PNG_crc)+12) return 3;
	PNG_IHDR* ihdr_chunk = READ(ihdr_len+sizeof(PNG_crc));
		u32 ihdr_w = endian_swap_32(ihdr_chunk->w);
		u32 ihdr_h = endian_swap_32(ihdr_chunk->h);
	
	if (ihdr_w == 0 || ihdr_h == 0) return 4;
	*out__width = ihdr_w;
	*out__height = ihdr_h;
	
	PNG_fcTL** fcTLs = NULL;
	u32 acTL_num_frames = 0;
	u64 frame_count = 0;
	while (i < len) {
		if (REMAIN < sizeof(PNG_chunkhead)) return 10;
		PNG_chunkhead* head = READ(sizeof(PNG_chunkhead));
			u32 chunk_len = endian_swap_32(head->length);
		if (REMAIN < chunk_len+sizeof(PNG_crc)) return 11;
		void* chunk = READ(chunk_len+sizeof(PNG_crc));
		
		switch (head->signature) {
			case u32_acTL: {
				PNG_acTL* acTL = chunk;
				if (acTL_num_frames) return 20; // There can only be one of these.
				acTL_num_frames = endian_swap_32(acTL->num_frames);
				if (acTL_num_frames < 1) return 21;
				fcTLs = api.arena_alloc(NULL, sizeof(PNG_fcTL*)*acTL_num_frames);
				break;
			}
			case u32_fcTL: {
				if (!acTL_num_frames) return 22; // Animated pngs must have acTL.
				if (frame_count == acTL_num_frames) return 23; // More frames than reported by acTL.
				fcTLs[frame_count] = (PNG_fcTL*)chunk;
				frame_count ++;
				break;
			}
			case u32_IDAT: {
				if (!acTL_num_frames) return 0; // acTL must come before this, so if this is encountered first, the file must be a non-animated PNG.
				break;
			}
			case u32_IEND: {
				i = len;
				break;
			}
		}
	}
	if (frame_count != acTL_num_frames) return 14; // Should this be an error?
	
	*out__frame_count = frame_count;
	*out__fcTLs = fcTLs;
	
	return 0;
}
static Errnum generate_static_png_from_apng_frame (u64 len, void* data, i64 frame_to_get, u64* out__len, void* out__data) {
	u64 i = 0;
	u64 outlen = 0;
	#define REMAIN (len-i)
	inline u64 READ_U64 (void) { u64 value = *(u64*)(data+i); i += sizeof(u64); return value; }
	inline u32 READ_U32 (void) { u32 value = *(u32*)(data+i); i += sizeof(u32); return value; }
	inline u16 READ_U16 (void) { u16 value = *(u16*)(data+i); i += sizeof(u16); return value; }
	inline void* READ (u64 amount) { void* value = data+i; i += amount; return value; }
	
	u64 png_signature = READ_U64();
	PNG_chunkhead* ihdr_head = READ(sizeof(PNG_chunkhead));
		u32 ihdr_len = endian_swap_32(ihdr_head->length);
	PNG_IHDR* ihdr_chunk = READ(ihdr_len+sizeof(PNG_crc)); // +u32 = there's a CRC after every chunk.
	
	mem_copy(out__data, i, data);
	outlen = i;
	
	frame_to_get ++; // Will be decremented at the beginning of frame.
	while (i < len) {
		if (REMAIN < sizeof(PNG_chunkhead)) return 10;
		PNG_chunkhead* head = READ(sizeof(PNG_chunkhead));
			u32 chunk_len = endian_swap_32(head->length);
		if (REMAIN < chunk_len+sizeof(PNG_crc)) return 11;
		void* chunk = READ(chunk_len+sizeof(PNG_crc));
		
		switch (head->signature) {
			// All of these are useless, might as well ignore them to save time.
			case u32_tEXt: case u32_zTXt: case u32_iTXt: case u32_tIME: case u32_sPLT: case u32_pHYs: case u32_bKGD: case u32_hIST: break;
			// This isn't useless per-se, but for the purpose of this function, it was already parsed at analyze_png(), and isn't needed in a static PNG.
			case u32_acTL: break;
			// This was already passed, there can't be another one.
			case u32_IHDR: return 10;
			
			case u32_fcTL: {
				frame_to_get --;
				if (frame_to_get == 0) {
					PNG_fcTL* fcTL = chunk;
					
					// IHDR has already been written, overwrite it's width, height, and CRC.
					PNG_chunkhead* ihdr_head = out__data + sizeof(u64);
					PNG_IHDR* ihdr_chunk = out__data + sizeof(u64)+sizeof(PNG_chunkhead);
					ihdr_chunk->w = fcTL->w;
					ihdr_chunk->h = fcTL->h;
					PNG_crc* ihdr_crc = out__data + sizeof(u64)+sizeof(PNG_chunkhead)+ihdr_len;
					*ihdr_crc = generate_png_crc32(&ihdr_head->signature, sizeof(ihdr_head->signature)+ihdr_len);
				}
				break;
			}
			case u32_IDAT: {
				// IDAT can be the first animation frame, but only if it's preceded by fcTL. If there's no fcTL then you're supposed to ignore it, I guess it functions as a fallback for players that don't support apng. Point is, there's no need to handle this in a special way, frame_to_get counter works exactly as if this was a fdAT.
				if (frame_to_get == 0) {
					mem_copy(out__data+outlen, chunk_len+12, head);
					outlen += chunk_len+12;
				}
				break;
			}
			case u32_fdAT: {
				if (frame_to_get == 0) {
					chunk += sizeof(u32); // Remove sequence number.
					u32 newlen = chunk_len-sizeof(u32);
					
					PNG_chunkhead* newhead = out__data+outlen;
					*newhead = (PNG_chunkhead){
						.length = endian_swap_32(newlen),
						.signature = u32_IDAT,
					};
					outlen += sizeof(PNG_chunkhead);
					
					mem_copy(out__data+outlen, newlen, chunk);
					outlen += newlen;
					
					PNG_crc* ihdr_crc = out__data+outlen;
					*ihdr_crc = generate_png_crc32(&newhead->signature, sizeof(newhead->signature)+newlen);
					outlen += sizeof(PNG_crc);
				}
				break;
			}
			case u32_IEND: {
				mem_copy(out__data+outlen, chunk_len+12, head);
				outlen += chunk_len+12;
				i = len;
				break;
			}
			default: {
				mem_copy(out__data+outlen, chunk_len+12, head);
				outlen += chunk_len+12;
				break;
			}
		}
	}
	*out__len = outlen;
	return 0;
}

#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_STATIC
#define STBI_NO_STDIO
#define STBI_NO_FAILURE_STRINGS
// #define STBI_FAILURE_USERMSG
#define STBI_NO_GIF // Implemented by a native plugin so there's no need for this.
#define STBI_MALLOC(sz) api.arena_alloc(NULL,sz)
#define STBI_REALLOC(p,newsz) api.arena_alloc(p,newsz)
#define STBI_FREE(p) do{}while(0)
#include "stb_image.h"

Errnum load_image (String ext, u64 len, void* data, f64 zoom, Plugin_file* out__file) {
	if (strings_are_equal(ext, S("png")) || strings_are_equal(ext, S("apng"))) {
		u64 image_w = 0;
		u64 image_h = 0;
		u64 frame_count = 0;
		PNG_fcTL** fctls = NULL;
		Errnum e = analyze_png(len, data, &image_w, &image_h, &frame_count, &fctls);
		if (e) return e+1000;
		// if (frame_count > 216000) return 100; // 60 fps for an hour is probably malicious.
		// if (image_w*image_h > 1000000llu*1000000llu) return 100; // 1m x 1m is probably malicious.
		if (frame_count) { // If the function succeeds but returns 0 frames, it's a static PNG. If it succeeds and returns 1, it's an APNG with 1 frame, APNG always needs to be extracted since stb_image fails if it finds an APNG chunk.
			// printf("frames %i, w %i, h %i\n", frame_count, image_w, image_h);
			out__file->w = image_w;
			out__file->h = image_h;
			out__file->frame_count = frame_count;
			out__file->frames = api.arena_alloc(NULL, sizeof(Plugin_frame)*frame_count);
			// for (u64 f=0; f<frame_count; f++) {
			// 	PNG_fcTL* fctl = fctls[f];
			// 	u64 frame_x = endian_swap_32(fctl->x);
			// 	u64 frame_y = endian_swap_32(fctl->y);
			// 	u64 frame_w = endian_swap_32(fctl->w);
			// 	u64 frame_h = endian_swap_32(fctl->h);
			// 	u64 delay_denominator = endian_swap_16(fctl->delay_denominator);
			// 	u64 delay_numerator = endian_swap_16(fctl->delay_numerator);
			// 	u8 dispose_op = fctl->dispose_op;
			// 	u8 blend_op = fctl->blend_op;
			// 	if (delay_denominator == 0) delay_denominator = 100; // Supposedly 0 should be treated as 100.
			// 	u64 duration = (u64)delay_numerator*1000llu*1000llu / (u64)delay_denominator; // How long the following frame should be displayed for.
			// 	printf("   frame %llu | %llu %llu %llu %llu | %u %u (%llu) | %u %u\n", f, frame_x, frame_y, frame_w, frame_h, delay_denominator, delay_numerator, duration, dispose_op, blend_op);
			// }
			Rgba8* canvas = api.arena_alloc(NULL, image_w*image_h*sizeof(Rgba8)); // Render the animation frames here.
			u64 gen_len = 0;
			void* gen_data = api.arena_alloc(NULL, len);
			for (u64 f=0; f<frame_count; f++) {
				e = generate_static_png_from_apng_frame(len, data, f, &gen_len, gen_data);
				if (e) return e+2000;
				if (gen_len > I32_MAX) return 100; // stb_image limitation, all the variables are ints.
				
				u64 vpos = api.arena_get_pos();
				
				PNG_fcTL* fctl = fctls[f];
				u64 frame_x = endian_swap_32(fctl->x);
				u64 frame_y = endian_swap_32(fctl->y);
				u64 frame_w = endian_swap_32(fctl->w);
				u64 frame_h = endian_swap_32(fctl->h);
				u64 delay_denominator = endian_swap_16(fctl->delay_denominator);
				u64 delay_numerator = endian_swap_16(fctl->delay_numerator);
				u8 dispose_op = fctl->dispose_op;
				u8 blend_op = fctl->blend_op;
				if (delay_denominator == 0) delay_denominator = 100; // Supposedly 0 should be treated as 100.
				u64 duration = (u64)delay_numerator*1000llu*1000llu / (u64)delay_denominator; // How long the following frame should be displayed for.
				// if (fctl.delay_numerator == 0) duration = 10llu*1000llu; // There's no mandate about a 0 numerator, but it's suggested to use a reasonable minimum frame time.
				Plugin_frame frame = {
					.duration = duration,
					.pixels = api.permanent_alloc(NULL, image_w*image_h*sizeof(Rgba8)),
				};
				out__file->frames[f] = frame;
				
				int channels = 0;
				int stb_w = 0;
				int stb_h = 0;
				Rgba8* stb_frame = (Rgba8*)stbi_load_from_memory(gen_data, gen_len, &stb_w, &stb_h, &channels, 4);
				// if (!stb_frame) printf("STB error: %s\n", stbi_failure_reason());
				if (!stb_frame || stb_w <= 0 || stb_h <= 0) {
					// Failed, free all the frames.
					for (u64 ff=0; ff<=f; ff++) {
						api.permanent_free(out__file->frames[ff].pixels);
					}
					return 101;
				}
				
				if (dispose_op == 1 || dispose_op == 2) {
					for (u64 x=0; x<image_w*image_h; x++) {
						frame.pixels[x] = canvas[x];
					}
				}
				u64 index = 0;
				for (u64 y=frame_y; y<frame_y+frame_h; y++) {
					for (u64 x=frame_x; x<frame_x+frame_w; x++, index++) {
						Rgba8 color = stb_frame[index];
						if (dispose_op == 1) { // The ENTIRE region needs to be restored, not just the written pixels. Only 1 needs to do this, 2 restores the region to previous frame, which is already on the canvas.
							canvas[y*image_w+x] = (Rgba8){0};
						}
						if (blend_op == 1 && color.a == 0) continue; // TODO: what about semi-transparent pixels? Also, is skipping the pixel like this correct for all dispose_ops?
						if (dispose_op == 0) {
							canvas[y*image_w+x] = color;
						}
						else /*if (dispose_op == 1 || dispose_op == 2)*/ {
							frame.pixels[y*image_w+x] = color;
						}
					}
				}
				if (dispose_op == 0) {
					for (u64 x=0; x<image_w*image_h; x++) {
						frame.pixels[x] = canvas[x];
					}
				}
				
				api.arena_set_pos(vpos);
			}
			return 0;
		}
	}
	
	if (len > I32_MAX) return 1; // stb_image limitation, all the variables are ints.
	
	int channels = 0;
	int w = 0;
	int h = 0;
	Rgba8* stb_frame = (Rgba8*)stbi_load_from_memory(data, len, &w, &h, &channels, 4);
	if (!stb_frame) return 10;
	if (w <= 0 || h <= 0) return 11;
	
	Plugin_frame frame = {
		.pixels = api.permanent_alloc(NULL, (u64)w*(u64)h*sizeof(Rgba8)),
	};
	for (u64 i=0; i<(u64)w*(u64)h; i++) {
		frame.pixels[i] = stb_frame[i];
	}
	out__file->w = w;
	out__file->h = h;
	out__file->frame_count = 1;
	out__file->frames = api.arena_alloc(NULL, sizeof(Plugin_frame));
	*out__file->frames = frame;
	
	// stbi_image_free(stbdata); // Allocated with api.alloc so this doesn't actually need to be freed.
	return 0;
}
