diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..abf6343 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,5 @@ +cmake_minimum_required(VERSION 3.1) +project(mpeg2enc) + +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/y262) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/y262app) diff --git a/LICENSE b/LICENSE index 2b9a150..8f3cb1e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 2-Clause License -Copyright (c) 2021, Ralf Willenbacher +Copyright (c) 2013-2021, Ralf Willenbacher All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md index 37c0f66..f14fd33 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,138 @@ # y262 -y262 mpeg2 video encoder + +y262 is a mpeg-1/2 video encoder + +# features +* MPEG-1/2 video output as a raw bitstream +* 420, 422 and 444 chroma format +* All modes for frame pictures, including interlaced +* Multipass bitrate control, do as many passes as you want +* Adaptive quantization, yeah that variance based stuff +* Psyrd, but it does not work good for mpeg2 +* Good picture quality, as far as i can tell +* Slice based threading + +# Sadly missing +* Field pictures +* Dual prime +* Frame Threading + +# How to build +You need cmake. +You need a C compiler supported by cmake. +You need the yasm assembler. cmake will look for it. + +In the root directory of the y262 directory it should be as simple as: +```bash +mkdir build +cd build +cmake -G "Visual Studio 16 2019" -A x64 .. +``` +You probably need to replace the generator and architecture with what you have. +Then build ! + + +# Running y262 +-- Running y262 -- +Once you have your executable you can run the encoder without arguments to get a list of possible parameters. + +Sample output: +``` + y262app -in <420yuv> -size -out + + -frames : number of frames to encode, 0 for all + -threads : threading enabled and number of concurrent slices + -profile : simple or main profile + -level : low main high1440 or high level + -chromaf : chroma format, 420, 422 or 444 + -rec : write reconstructed frames to + -rcmode : 0 = CQ, 1 = 1st pass, 2 = subsequent pass + -mpin : stats file of previous pass + -mpout : output stats file of current pass + -bitrate : average bitrate + -vbvrate : maximum bitrate + -vbv : video buffer size + -quant : quantizer for CQ + -interlaced : enable field macroblock modes + -bff : first input frame is bottom field first + -pulldown_frcode :frame rate code to pull input up to + -quality : encoder complexity, negative faster, positive slower + -frcode : frame rate code, see mpeg2 spec + -arinfo : aspect ratio information, see mpeg2 spec + -qscale0 : use more linear qscale type + -nump : number of p frames between i frames + -numb : number of b frames between i/p frames + -closedgop : bframes after i frames use only backwards prediction + -noaq : disable variance based quantizer modulation + -psyrd : psy rd strength + -avamat6 : use avamat6 quantization matrices + -flatmat : use flat quantization matrices + -intramat : use the 64 numbers in the file as intra matrix + -intermat : use the 64 numbers in the file as inter matrix + -videoformat : pal, secam, ntsc, 709 or unknown + -mpeg1 : output mpeg1 instead mpeg2, constraints apply +``` + +Notes about -in, you can specify "-" if you want to pipe the yuv into the application. The yuv format of the input should match -chromaf + + +Example: So to encode something you call it like so: + +./y262app -in my_NTSC_video.yuv -size 720 480 -profile main -level high -rcmode 1 -mpout test.stats -bitrate +3000 -vbvrate 6000 -vbv 6000 -quality 0 -frcode 2 -pulldown_frcode 4 -arinfo 2 -nump 4 -numb 2 -out test_p1.m2v +-videoformat ntsc -interlaced + +This will encode the 420 raw yuv file my_NTSC_video.yuv with the dimensions 720x480, signaling main profile at +high level. It is the first pass and multipass data is written to the file test.stats for subsequent passes. +It is encoded with an average bitrate of 3 Mbit and a maximum video rate of 6Mbit, using 6Mbit video buffer. +The encode is done at the default quality/speed tradeoff of 0 ( The -quality parameter roughly takes values from +around -50 to 50 ). +The framerate of the input is 24fps and the sequence is encoded at 29.97 fps, which will result in aprox. 3:2 +pulldown in the resulting file. Aspect ratio is set to 4:3. The encoder will place 4 P frames between keyframes +and between the frames of the so resulting IPPPPI sequence 2 B Frames each ( IBBPBBPBBPBBPBBIBB.. ). The +elementary stream is written to test_p1.m2v. The video format and colorspace parameters are written out to +specify ntsc. Field macroblock modes are enabled, slowing things down a lot for a slight coding gain. + +Possible -frcode and -pulldown_frcode ( frame rate ) values +``` +1: ( 24000 / 1001 ) fps +2: 24 fps +3: 25 fps +4: ( 30000 / 1001 ) fps +5: 30 fps +6: 50 fps +7: ( 60000 / 1001 ) fps +8: 60 fps +``` + +Possible -arinfo ( aspect ratio ) values: +``` +1: 1:1 +2: 4:3 +3: 16:9 +4: 2.21:1 +``` + +Possible -arinfo ( pixel aspect ratio ) values for -mpeg1: +``` +1: 1.0 +2: 0.6735 +3: 0.7031 +4: 0.7615 +5: 0.8055 +6: 0.8437 +7: 0.8935 +8: 0.9375 +9: 0.9815 +10: 1.0255 +11: 1.0695 +12: 1.1250 +13: 1.1575 +14: 1.2015 +``` + +# Notes + +This is an offline encoder. It may do some retries on bitrate control failure to prevent bad quality or video buffer violations. So on complex content it may slow down quite a lot. This is also the main reason for missing frame threading. + +Patent situation regarding Mpeg2 you best check with the [MPEG LA](https://www.mpegla.com) or ask your legal department. \ No newline at end of file diff --git a/src/y262/CMakeLists.txt b/src/y262/CMakeLists.txt new file mode 100644 index 0000000..7d697f8 --- /dev/null +++ b/src/y262/CMakeLists.txt @@ -0,0 +1,84 @@ +cmake_minimum_required(VERSION 3.1) +project(liby262) + +find_package(Threads) +find_program(YASM_EXE NAMES yasm) + +if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(ARCH "_x64") + if(WIN32) + set(YASM_ARGS -f win32 -m amd64 -DARCH_X86_64 -DPIC) + elseif(APPLE) + set(YASM_ARGS -f macho64 -m amd64 -DARCH_X86_64 -DPIC --prefix=_) + else() + set(YASM_ARGS -f elf64 -m amd64 -DARCH_X86_64 -DPIC) + endif() +else() + set(ARCH "_x86") + if(WIN32) + set(YASM_ARGS -f win32 --prefix=_) + elseif(APPLE) + set(YASM_ARGS -f macho32 --prefix=_) + else() + set(YASM_ARGS -f elf32) + endif() +endif() + +add_custom_command(OUTPUT pixelop_x86.o COMMAND ${YASM_EXE} + ARGS ${YASM_ARGS} -o ${CMAKE_CURRENT_BINARY_DIR}/pixelop_x86.o ${CMAKE_CURRENT_SOURCE_DIR}/pixelop_x86.asm) +add_custom_command(OUTPUT transform_x86.o COMMAND ${YASM_EXE} + ARGS ${YASM_ARGS} -o ${CMAKE_CURRENT_BINARY_DIR}/transform_x86.o ${CMAKE_CURRENT_SOURCE_DIR}/transform_x86.asm) + +add_library(liby262 STATIC + ${CMAKE_CURRENT_SOURCE_DIR}/aboveslicelevel.h + ${CMAKE_CURRENT_SOURCE_DIR}/bitstream.h + ${CMAKE_CURRENT_SOURCE_DIR}/lookahead.h + ${CMAKE_CURRENT_SOURCE_DIR}/me.h + ${CMAKE_CURRENT_SOURCE_DIR}/pixelop.h + ${CMAKE_CURRENT_SOURCE_DIR}/pixelop_x86.h + ${CMAKE_CURRENT_SOURCE_DIR}/ratectrl.h + ${CMAKE_CURRENT_SOURCE_DIR}/tables.h + ${CMAKE_CURRENT_SOURCE_DIR}/threads.h + ${CMAKE_CURRENT_SOURCE_DIR}/transform.h + ${CMAKE_CURRENT_SOURCE_DIR}/transform_x86.h + ${CMAKE_CURRENT_SOURCE_DIR}/types.h + ${CMAKE_CURRENT_SOURCE_DIR}/y262.h + ${CMAKE_CURRENT_SOURCE_DIR}/y262api.h + ${CMAKE_CURRENT_SOURCE_DIR}/aboveslicelevel.c + ${CMAKE_CURRENT_SOURCE_DIR}/bitstream.c + ${CMAKE_CURRENT_SOURCE_DIR}/lookahead.c + ${CMAKE_CURRENT_SOURCE_DIR}/mc.c + ${CMAKE_CURRENT_SOURCE_DIR}/me.c + ${CMAKE_CURRENT_SOURCE_DIR}/pixelop.c + ${CMAKE_CURRENT_SOURCE_DIR}/ratectrl.c + ${CMAKE_CURRENT_SOURCE_DIR}/tables.c + ${CMAKE_CURRENT_SOURCE_DIR}/threads.c + ${CMAKE_CURRENT_SOURCE_DIR}/transform.c + ${CMAKE_CURRENT_SOURCE_DIR}/y262.c + ${CMAKE_CURRENT_SOURCE_DIR}/y262api.c + + ${CMAKE_CURRENT_BINARY_DIR}/pixelop_x86.o + ${CMAKE_CURRENT_BINARY_DIR}/transform_x86.o +) + +set_target_properties(liby262 PROPERTIES + OUTPUT_NAME "liby262$<$:d>${ARCH}" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" +) + + +if(WIN32) + target_compile_definitions(liby262 PRIVATE WIN32) +elseif(APPLE) + target_compile_definitions(liby262 PRIVATE HAVE_LIBPTHREAD) +else() + target_compile_definitions(liby262 PRIVATE HAVE_LIBPTHREAD) + target_link_libraries(liby262 PUBLIC m) +endif() + +set_target_properties(liby262 PROPERTIES POSITION_INDEPENDENT_CODE ON) + +target_include_directories(liby262 PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +target_link_libraries(liby262 PUBLIC Threads::Threads) diff --git a/src/y262/aboveslicelevel.c b/src/y262/aboveslicelevel.c new file mode 100644 index 0000000..3bc314a --- /dev/null +++ b/src/y262/aboveslicelevel.c @@ -0,0 +1,448 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + + +void y262_write_sequence_header( y262_t *ps_context ) +{ + int32_t i_idx; + y262_bitstream_t *ps_bitstream; + y262_sequence_header_t s_sequence_header; + + ps_bitstream = &ps_context->s_bitstream; + + if( ( ps_context->i_sequence_width >> 4 ) == ( ( ps_context->i_sequence_display_width + 15 ) >> 4 ) ) + { + s_sequence_header.i_horizontal_size = ps_context->i_sequence_display_width; + } + else + { + s_sequence_header.i_horizontal_size = ps_context->i_sequence_width; + } + if( ( ps_context->i_sequence_height >> 4 ) == ( ( ps_context->i_sequence_display_height + 15 ) >> 4 ) ) + { + s_sequence_header.i_vertical_size = ps_context->i_sequence_display_height; + } + else + { + s_sequence_header.i_vertical_size = ps_context->i_sequence_height; + } + s_sequence_header.i_aspect_ratio_information = ps_context->i_sequence_aspect_ratio_information; + s_sequence_header.i_frame_rate_code = ps_context->i_sequence_pulldown_frame_rate_code; + s_sequence_header.b_marker_bit = 1; + if( !ps_context->b_sequence_mpeg1 ) + { + s_sequence_header.i_bit_rate_value = ( ( ps_context->s_ratectrl.i_vbvrate + 399 ) / 400 ) & 0x3ffff; + s_sequence_header.i_vbv_buffer_size_value = ( ( ps_context->s_ratectrl.i_vbv_size + ( ( 16 * 1024 ) - 1 ) ) / ( 16 * 1024 ) ) & 0x3ff; + s_sequence_header.b_constrained_parameters_flag = FALSE; + } + else + { + /* MPEG1__ FIXME: bitrate and constrained parameters flag */ + + if( ps_context->b_sequence_cbr ) + { + s_sequence_header.i_bit_rate_value = ( ( ps_context->s_ratectrl.i_vbvrate + 399 ) / 400 ) & 0x3ffff; + } + else + { + s_sequence_header.i_bit_rate_value = 0x3ffff; + } + s_sequence_header.i_vbv_buffer_size_value = ( ( ps_context->s_ratectrl.i_vbv_size + ( ( 16 * 1024 ) - 1 ) ) / ( 16 * 1024 ) ) & 0x3ff; + s_sequence_header.b_constrained_parameters_flag = FALSE; + } + + if( memcmp( ps_context->rgui8_intra_quantiser_matrix, rgui8_y262_default_intra_matrix, 64 * sizeof( uint8_t ) ) != 0 ) + { + s_sequence_header.b_load_intra_quantiser_matrix = TRUE; + memcpy( s_sequence_header.rgui8_intra_quantiser_matrix, ps_context->rgui8_intra_quantiser_matrix, 64 * sizeof( uint8_t ) ); + } + else + { + s_sequence_header.b_load_intra_quantiser_matrix = FALSE; + } + + if( memcmp( ps_context->rgui8_non_intra_quantiser_matrix, rgui8_y262_default_non_intra_matrix, 64 * sizeof( uint8_t ) ) != 0 ) + { + s_sequence_header.b_load_non_intra_quantiser_matrix = TRUE; + memcpy( s_sequence_header.rgui8_non_intra_quantiser_matrix, ps_context->rgui8_non_intra_quantiser_matrix, 64 * sizeof( uint8_t ) ); + } + else + { + s_sequence_header.b_load_non_intra_quantiser_matrix = FALSE; + } + + y262_bitstream_write( ps_bitstream, 1, 24 ); + y262_bitstream_write( ps_bitstream, STARTCODE_SEQUENCE_HEADER, 8 ); + + y262_bitstream_write( ps_bitstream, s_sequence_header.i_horizontal_size, 12 ); + y262_bitstream_write( ps_bitstream, s_sequence_header.i_vertical_size, 12 ); + y262_bitstream_write( ps_bitstream, s_sequence_header.i_aspect_ratio_information, 4 ); + y262_bitstream_write( ps_bitstream, s_sequence_header.i_frame_rate_code, 4 ); + y262_bitstream_write( ps_bitstream, s_sequence_header.i_bit_rate_value, 18 ); + y262_bitstream_write( ps_bitstream, s_sequence_header.b_marker_bit, 1 ); + y262_bitstream_write( ps_bitstream, s_sequence_header.i_vbv_buffer_size_value, 10 ); + y262_bitstream_write( ps_bitstream, s_sequence_header.b_constrained_parameters_flag, 1 ); + + y262_bitstream_write( ps_bitstream, s_sequence_header.b_load_intra_quantiser_matrix, 1 ); + if( s_sequence_header.b_load_intra_quantiser_matrix ) + { + for( i_idx = 0; i_idx < 64; i_idx++ ) + { + y262_bitstream_write( ps_bitstream, s_sequence_header.rgui8_intra_quantiser_matrix[ i_idx ], 8 ); + } + } + + y262_bitstream_write( ps_bitstream, s_sequence_header.b_load_non_intra_quantiser_matrix, 1 ); + if( s_sequence_header.b_load_non_intra_quantiser_matrix ) + { + for( i_idx = 0; i_idx < 64; i_idx++ ) + { + y262_bitstream_write( ps_bitstream, s_sequence_header.rgui8_non_intra_quantiser_matrix[ i_idx ], 8 ); + } + } + +} + + + +void y262_write_sequence_extension( y262_t *ps_context ) +{ + y262_bitstream_t *ps_bitstream; + y262_sequence_extension_t s_sequence_extension; + + ps_bitstream = &ps_context->s_bitstream; + + s_sequence_extension.i_profile_and_level_indication = ( ps_context->i_derived_profile << 4 ) | ps_context->i_derived_level; + s_sequence_extension.b_progressive_sequence = ps_context->b_progressive_sequence; + switch( ps_context->i_sequence_chroma_format ) + { + default: + case Y262_CHROMA_FORMAT_420: + s_sequence_extension.i_chroma_format = H262_CHROMA_FORMAT_420; + break; + case Y262_CHROMA_FORMAT_422: + s_sequence_extension.i_chroma_format = H262_CHROMA_FORMAT_422; + break; + case Y262_CHROMA_FORMAT_444: + s_sequence_extension.i_chroma_format = H262_CHROMA_FORMAT_444; + break; + + } + s_sequence_extension.i_horizontal_size_extension = 0; + s_sequence_extension.i_vertical_size_extension = 0; + s_sequence_extension.i_bit_rate_extension = ( ( ps_context->s_ratectrl.i_vbvrate + 399 ) / 400 ) >> 18; + s_sequence_extension.b_marker_bit = TRUE; + s_sequence_extension.i_vbv_buffer_size_extension = ( ( ps_context->s_ratectrl.i_vbv_size + ( ( 16 * 1024 ) - 1 ) ) / ( 16 * 1024 ) ) >> 10; + s_sequence_extension.b_low_delay = 0; + s_sequence_extension.i_frame_rate_extension_n = ps_context->i_sequence_frame_rate_extension_n; + s_sequence_extension.i_frame_rate_extension_d = ps_context->i_sequence_frame_rate_extension_d; + + y262_bitstream_write( ps_bitstream, 1, 24 ); + y262_bitstream_write( ps_bitstream, STARTCODE_EXTENSION, 8 ); + y262_bitstream_write( ps_bitstream, H262_SEQUENCE_EXTENSION_ID, 4 ); + + y262_bitstream_write( ps_bitstream, s_sequence_extension.i_profile_and_level_indication, 8 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.b_progressive_sequence, 1 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.i_chroma_format, 2 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.i_horizontal_size_extension, 2 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.i_vertical_size_extension, 2 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.i_bit_rate_extension, 12 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.b_marker_bit, 1 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.i_vbv_buffer_size_extension, 8 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.b_low_delay, 1 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.i_frame_rate_extension_n, 2 ); + y262_bitstream_write( ps_bitstream, s_sequence_extension.i_frame_rate_extension_d, 5 ); + +} + + +#define Y262_VIDEOFORMAT_PAL 0 +#define Y262_VIDEOFORMAT_NTSC 1 +#define Y262_VIDEOFORMAT_SECAM 2 +#define Y262_VIDEOFORMAT_709 3 +#define Y262_VIDEOFORMAT_UNKNOWN 4 + +void y262_write_sequence_display_extension( y262_t *ps_context ) +{ + int32_t i_video_format; + int32_t rgi_video_format[ 5 ] = { 1, 2, 3, 5, 5 }; + int32_t rgb_colour_description_present[ 5 ] = { 1, 1, 1, 1, 0 }; + int32_t rgi_colour_primaries[ 5 ] = { 5, 4, 5, 1, 0 }; + int32_t rgi_transfer_characteristics[ 5 ] = { 4, 6, 4, 1, 0 }; + int32_t rgi_matrix_coefficients[ 5 ] = { 5, 6, 5, 1, 0 }; + y262_bitstream_t *ps_bitstream; + y262_sequence_display_extension_t s_sequence_display_extension; + + ps_bitstream = &ps_context->s_bitstream; + + i_video_format = ps_context->i_sequence_video_format; + if( i_video_format < Y262_VIDEOFORMAT_PAL || i_video_format > Y262_VIDEOFORMAT_UNKNOWN ) + { + i_video_format = Y262_VIDEOFORMAT_UNKNOWN; + } + + s_sequence_display_extension.i_video_format = rgi_video_format[ i_video_format ]; + s_sequence_display_extension.b_colour_description = rgb_colour_description_present[ i_video_format ]; + s_sequence_display_extension.s_colour_description.i_colour_primaries = rgi_colour_primaries[ i_video_format ]; + s_sequence_display_extension.s_colour_description.i_transfer_characteristics = rgi_transfer_characteristics[ i_video_format ]; + s_sequence_display_extension.s_colour_description.i_matrix_coefficients = rgi_matrix_coefficients[ i_video_format ]; + s_sequence_display_extension.i_display_horizontal_size = ps_context->i_sequence_display_width; + s_sequence_display_extension.b_marker_bit = TRUE; + s_sequence_display_extension.i_display_vertical_size = ps_context->i_sequence_display_height; + + y262_bitstream_write( ps_bitstream, 1, 24 ); + y262_bitstream_write( ps_bitstream, STARTCODE_EXTENSION, 8 ); + y262_bitstream_write( ps_bitstream, H262_SEQUENCE_DISPLAY_EXTENSION_ID, 4 ); + + y262_bitstream_write( ps_bitstream, s_sequence_display_extension.i_video_format, 3 ); + y262_bitstream_write( ps_bitstream, s_sequence_display_extension.b_colour_description, 1 ); + if( s_sequence_display_extension.b_colour_description ) + { + y262_bitstream_write( ps_bitstream, s_sequence_display_extension.s_colour_description.i_colour_primaries, 8 ); + y262_bitstream_write( ps_bitstream, s_sequence_display_extension.s_colour_description.i_transfer_characteristics, 8 ); + y262_bitstream_write( ps_bitstream, s_sequence_display_extension.s_colour_description.i_matrix_coefficients, 8 ); + } + y262_bitstream_write( ps_bitstream, s_sequence_display_extension.i_display_horizontal_size, 14 ); + y262_bitstream_write( ps_bitstream, s_sequence_display_extension.b_marker_bit, 1 ); + y262_bitstream_write( ps_bitstream, s_sequence_display_extension.i_display_vertical_size, 14 ); + +} + +void y262_write_group_of_pictures_header( y262_t *ps_context ) +{ + y262_bitstream_t *ps_bitstream; + y262_group_of_pictures_header_t s_gop; + bool_t b_drop_frame_flag; + int64_t i64_ticks_per_unit, i64_ticks; + int32_t i_hours, i_minutes, i_seconds, i_frames; + + ps_bitstream = &ps_context->s_bitstream; + + s_gop.i_time_code = 0; + s_gop.b_closed_gop = !!ps_context->b_closed_gop; + s_gop.b_broken_link = FALSE; + + y262_bitstream_write( ps_bitstream, 1, 24 ); + y262_bitstream_write( ps_bitstream, STARTCODE_GROUP, 8 ); + + /* fixme: need next picture with temporal reference 0, is this here correct ? */ + i64_ticks = ps_context->s_ratectrl.i64_output_seconds; + + i64_ticks_per_unit = 3600LL; + i_hours = ( int32_t )( i64_ticks / i64_ticks_per_unit ); + i64_ticks = i64_ticks % i64_ticks_per_unit; + i_hours = i_hours % 24; + + i64_ticks_per_unit = 60LL; + i_minutes = ( int32_t )( i64_ticks / i64_ticks_per_unit ); + i64_ticks = i64_ticks % i64_ticks_per_unit; + + i64_ticks_per_unit = 1; + i_seconds = ( int32_t ) (i64_ticks / i64_ticks_per_unit ); + i64_ticks = i64_ticks % i64_ticks_per_unit; + + i_frames = ( int32_t ) ps_context->s_ratectrl.i64_output_frames; + b_drop_frame_flag = FALSE; /* FIXME */ + + y262_bitstream_write( ps_bitstream, b_drop_frame_flag, 1 ); + y262_bitstream_write( ps_bitstream, i_hours, 5 ); + y262_bitstream_write( ps_bitstream, i_minutes, 6 ); + y262_bitstream_write( ps_bitstream, 1, 1 ); /* marker */ + y262_bitstream_write( ps_bitstream, i_seconds, 6 ); + y262_bitstream_write( ps_bitstream, i_frames, 6 ); + + y262_bitstream_write( ps_bitstream, s_gop.b_closed_gop, 1 ); + y262_bitstream_write( ps_bitstream, s_gop.b_broken_link, 1 ); + +} + +void y262_write_picture_header( y262_t *ps_context, int32_t i_picture_coding_type ) +{ + y262_bitstream_t *ps_bitstream; + y262_picture_header_t s_picture_header; + + ps_bitstream = &ps_context->s_bitstream; + + s_picture_header.i_temporal_reference = ps_context->ps_input_picture->i_temporal_reference; + s_picture_header.i_picture_coding_type = i_picture_coding_type; + s_picture_header.i_vbv_delay = ps_context->ps_input_picture->i_vbv_delay & 0xffff; + if( s_picture_header.i_picture_coding_type == PICTURE_CODING_TYPE_P || + s_picture_header.i_picture_coding_type == PICTURE_CODING_TYPE_B ) + { + s_picture_header.b_full_pel_forward_vector = FALSE; + if( ps_context->b_sequence_mpeg1 ) + { + s_picture_header.i_forward_f_code = ps_context->rgi_fcode[ PICTURE_CODING_FORWARD ][ PICTURE_CODING_HORIZONTAL ]; + } + else + { + s_picture_header.i_forward_f_code = 0x7; + } + } + if( s_picture_header.i_picture_coding_type == PICTURE_CODING_TYPE_B ) + { + s_picture_header.b_full_pel_backward_vector = FALSE; + if( ps_context->b_sequence_mpeg1 ) + { + s_picture_header.i_backward_f_code = ps_context->rgi_fcode[ PICTURE_CODING_BACKWARD ][ PICTURE_CODING_HORIZONTAL ]; + } + else + { + s_picture_header.i_backward_f_code = 0x7; + } + } + + s_picture_header.b_extra_bit_picture = FALSE; + assert( s_picture_header.b_extra_bit_picture == FALSE ); + + y262_bitstream_write( ps_bitstream, 1, 24 ); + y262_bitstream_write( ps_bitstream, STARTCODE_PICTURE, 8 ); + + y262_bitstream_write( ps_bitstream, s_picture_header.i_temporal_reference, 10 ); + y262_bitstream_write( ps_bitstream, s_picture_header.i_picture_coding_type, 3 ); + y262_bitstream_write( ps_bitstream, s_picture_header.i_vbv_delay, 16 ); + if( s_picture_header.i_picture_coding_type == PICTURE_CODING_TYPE_P || + s_picture_header.i_picture_coding_type == PICTURE_CODING_TYPE_B ) + { + y262_bitstream_write( ps_bitstream, s_picture_header.b_full_pel_forward_vector, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_header.i_forward_f_code, 3 ); + } + if( s_picture_header.i_picture_coding_type == PICTURE_CODING_TYPE_B ) + { + y262_bitstream_write( ps_bitstream, s_picture_header.b_full_pel_backward_vector, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_header.i_backward_f_code, 3 ); + } + + y262_bitstream_write( ps_bitstream, s_picture_header.b_extra_bit_picture, 1 ); + assert( s_picture_header.b_extra_bit_picture == FALSE ); +} + +void y262_write_picture_coding_extension( y262_t *ps_context ) +{ + y262_bitstream_t *ps_bitstream; + y262_picture_coding_extension_t s_picture_coding_extension; + + ps_bitstream = &ps_context->s_bitstream; + + memset( &s_picture_coding_extension, 0, sizeof( s_picture_coding_extension ) ); + + s_picture_coding_extension.rgi_f_code[ PICTURE_CODING_FORWARD ][ PICTURE_CODING_HORIZONTAL ] = ps_context->rgi_fcode[ PICTURE_CODING_FORWARD ][ PICTURE_CODING_HORIZONTAL ]; + s_picture_coding_extension.rgi_f_code[ PICTURE_CODING_FORWARD ][ PICTURE_CODING_VERTICAL ] = ps_context->rgi_fcode[ PICTURE_CODING_FORWARD ][ PICTURE_CODING_VERTICAL ]; + s_picture_coding_extension.rgi_f_code[ PICTURE_CODING_BACKWARD ][ PICTURE_CODING_HORIZONTAL ] = ps_context->rgi_fcode[ PICTURE_CODING_BACKWARD ][ PICTURE_CODING_HORIZONTAL ]; + s_picture_coding_extension.rgi_f_code[ PICTURE_CODING_BACKWARD ][ PICTURE_CODING_VERTICAL ] = ps_context->rgi_fcode[ PICTURE_CODING_BACKWARD ][ PICTURE_CODING_VERTICAL ]; + s_picture_coding_extension.i_intra_dc_precision = ps_context->i_intra_dc_precision; + s_picture_coding_extension.i_picture_structure = PICTURE_CODING_STRUCTURE_FRAME; + s_picture_coding_extension.b_top_field_first = ps_context->ps_input_picture->b_top_field_first; + s_picture_coding_extension.b_frame_pred_frame_dct = ps_context->b_frame_pred_frame_dct; + s_picture_coding_extension.b_concealment_motion_vectors = FALSE; + s_picture_coding_extension.b_q_scale_type = ps_context->b_qscale_type; + s_picture_coding_extension.b_intra_vlc_format = ps_context->b_intra_vlc_format; + s_picture_coding_extension.b_alternate_scan = 0; + s_picture_coding_extension.b_repeat_first_field = ps_context->ps_input_picture->b_repeat_first_field; + if( ps_context->i_sequence_chroma_format == Y262_CHROMA_FORMAT_420 ) + { + s_picture_coding_extension.b_chroma_420_type = ps_context->ps_input_picture->b_progressive_frame; + } + else + { + s_picture_coding_extension.b_chroma_420_type = FALSE; + } + s_picture_coding_extension.b_progressive_frame = ps_context->ps_input_picture->b_progressive_frame; + s_picture_coding_extension.b_composite_display_flag = FALSE; + + assert( s_picture_coding_extension.b_composite_display_flag == FALSE ); + + y262_bitstream_write( ps_bitstream, 1, 24 ); + y262_bitstream_write( ps_bitstream, STARTCODE_EXTENSION, 8 ); + y262_bitstream_write( ps_bitstream, H262_PICTURE_CODING_EXTENSION_ID, 4 ); + + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.rgi_f_code[ PICTURE_CODING_FORWARD ][ PICTURE_CODING_HORIZONTAL ], 4 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.rgi_f_code[ PICTURE_CODING_FORWARD ][ PICTURE_CODING_VERTICAL ], 4 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.rgi_f_code[ PICTURE_CODING_BACKWARD ][ PICTURE_CODING_HORIZONTAL ], 4 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.rgi_f_code[ PICTURE_CODING_BACKWARD ][ PICTURE_CODING_VERTICAL ], 4 ); + + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.i_intra_dc_precision, 2 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.i_picture_structure, 2 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_top_field_first, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_frame_pred_frame_dct, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_concealment_motion_vectors, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_q_scale_type, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_intra_vlc_format, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_alternate_scan, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_repeat_first_field, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_chroma_420_type, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_progressive_frame, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.b_composite_display_flag, 1 ); + + if( s_picture_coding_extension.b_composite_display_flag ) + { + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.s_composite_display.b_v_axis, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.s_composite_display.i_field_sequence, 3 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.s_composite_display.b_sub_carrier, 1 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.s_composite_display.i_burst_amplitude, 7 ); + y262_bitstream_write( ps_bitstream, s_picture_coding_extension.s_composite_display.i_sub_carrier_phase, 8 ); + } + +} + +void y262_write_user_data( y262_t *ps_context, int32_t i_which ) +{ + y262_bitstream_t *ps_bitstream; + y262_user_data_t *ps_ud = ps_context->ps_input_picture->rgps_user_data[ i_which ]; + int32_t i_idx; + + ps_bitstream = &ps_context->s_bitstream; + + y262_bitstream_write( ps_bitstream, 1, 24 ); + y262_bitstream_write( ps_bitstream, STARTCODE_USER_DATA, 8 ); + + for( i_idx = 0; i_idx < ps_ud->i_len; i_idx++ ) + { + y262_bitstream_write( ps_bitstream, ps_ud->rgui8_user_data[ i_idx ], 8 ); + } +} + + +void y262_write_zero_stuffing( y262_t *ps_context, int32_t i_num_bytes ) +{ + y262_bitstream_t *ps_bitstream; + int32_t i_idx; + + ps_bitstream = &ps_context->s_bitstream; + + for( i_idx = 0; i_idx < i_num_bytes; i_idx++ ) + { + y262_bitstream_write( ps_bitstream, 0, 8 ); + } +} + + diff --git a/src/y262/aboveslicelevel.h b/src/y262/aboveslicelevel.h new file mode 100644 index 0000000..a9a7e45 --- /dev/null +++ b/src/y262/aboveslicelevel.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +void y262_write_sequence_header( y262_t *ps_context ); +void y262_write_sequence_extension( y262_t *ps_context ); +void y262_write_sequence_display_extension( y262_t *ps_context ); +void y262_write_group_of_pictures_header( y262_t *ps_context ); +void y262_write_picture_header( y262_t *ps_context, int32_t i_picture_coding_type ); +void y262_write_picture_coding_extension( y262_t *ps_context ); +void y262_write_user_data( y262_t *ps_context, int32_t i_which ); +void y262_write_zero_stuffing( y262_t *ps_context, int32_t i_num_bytes ); + diff --git a/src/y262/bitstream.c b/src/y262/bitstream.c new file mode 100644 index 0000000..6f18aed --- /dev/null +++ b/src/y262/bitstream.c @@ -0,0 +1,123 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + + +void y262_bitstream_init( y262_bitstream_t *ps_bitstream, int32_t i_length ) +{ + ps_bitstream->pui8_bitstream = ( uint8_t * )y262_alloc( sizeof( uint8_t ) * i_length ); + ps_bitstream->i_length = i_length; + + ps_bitstream->i_byte_count = 0; + ps_bitstream->i_next_bit = 7; + + ps_bitstream->pui8_codeword_ptr = ps_bitstream->pui8_bitstream; + ps_bitstream->ui_codeword = 0; + ps_bitstream->i_codeword_fill = 0; +} + +void y262_bitstream_deinit( y262_bitstream_t *ps_bitstream ) +{ + y262_dealloc( ps_bitstream->pui8_bitstream ); +} + +void y262_bitstream_advance( y262_bitstream_t *ps_bitstream ) +{ + ps_bitstream->pui8_codeword_ptr = ps_bitstream->pui8_bitstream; +} + +void y262_bitstream_reset( y262_bitstream_t *ps_bitstream ) +{ + ps_bitstream->pui8_codeword_ptr = ps_bitstream->pui8_bitstream; + ps_bitstream->ui_codeword = 0; + ps_bitstream->i_codeword_fill = 0; + ps_bitstream->i_byte_count = 0; + ps_bitstream->i_next_bit = 7; +} + + +void y262_bitstream_write( y262_bitstream_t *ps_bitstream, uint32_t ui_code, uint32_t ui_length ) +{ + const uint32_t rgui_mask[ 25 ] = { + 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, + 0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, + 0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff, + 0xffffff + }; + ps_bitstream->ui_codeword |= ( ui_code & rgui_mask[ ui_length ] ) << ( 32 - ui_length - ps_bitstream->i_codeword_fill ); + ps_bitstream->i_codeword_fill += ui_length; + + while( ps_bitstream->i_codeword_fill > 7 ) + { + *( ps_bitstream->pui8_codeword_ptr++ ) = ( ps_bitstream->ui_codeword >> 24 ); + ps_bitstream->ui_codeword <<= 8; + ps_bitstream->i_codeword_fill -= 8; + } +} + + +void y262_bitstream_get( y262_bitstream_t *ps_bitstream, uint8_t **ppui8_bitstream, uint32_t *pui_length ) +{ + *ppui8_bitstream = ps_bitstream->pui8_bitstream; + *pui_length = ( int32_t )( ps_bitstream->pui8_codeword_ptr - ps_bitstream->pui8_bitstream ); +} + +void y262_bitstream_flush( y262_bitstream_t *ps_bitstream, uint8_t **ppui8_bitstream, uint32_t *pui_length ) +{ + *ppui8_bitstream = ps_bitstream->pui8_bitstream; + + if( ps_bitstream->i_codeword_fill ) + { + *( ps_bitstream->pui8_codeword_ptr++ ) = ( ps_bitstream->ui_codeword >> 24 ); + *pui_length = ( int32_t )( ps_bitstream->pui8_codeword_ptr - ps_bitstream->pui8_bitstream ); + } + else + { + *pui_length = ( int32_t )( ps_bitstream->pui8_codeword_ptr - ps_bitstream->pui8_bitstream ); + } +} + + +int32_t y262_bitstream_bits( y262_bitstream_t *ps_bitstream ) +{ + return ( int32_t )( ( ps_bitstream->pui8_codeword_ptr - ps_bitstream->pui8_bitstream ) * 8 ) + ( ps_bitstream->i_codeword_fill ); +} + +void y262_bitstream_bytealign( y262_bitstream_t *ps_bitstream ) +{ + if( ps_bitstream->i_codeword_fill ) + { + *( ps_bitstream->pui8_codeword_ptr++ ) = ( ps_bitstream->ui_codeword >> 24 ); + ps_bitstream->i_codeword_fill = 0; + ps_bitstream->ui_codeword <<= 8; + } +} + diff --git a/src/y262/bitstream.h b/src/y262/bitstream.h new file mode 100644 index 0000000..6a0ab93 --- /dev/null +++ b/src/y262/bitstream.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + + + +void y262_bitstream_init( y262_bitstream_t *ps_bitstream, int32_t i_length ); +void y262_bitstream_advance( y262_bitstream_t *ps_bitstream ); +void y262_bitstream_reset( y262_bitstream_t *ps_bitstream ); +void y262_bitstream_deinit( y262_bitstream_t *ps_bitstream ); + +void y262_bitstream_write( y262_bitstream_t *ps_bitstream, uint32_t ui_code, uint32_t ui_length ); + +void y262_bitstream_get( y262_bitstream_t *ps_bitstream, uint8_t **ppui8_bitstream, uint32_t *ui_length ); + +int32_t y262_bitstream_bits( y262_bitstream_t *ps_bitstream ); + +void y262_bitstream_flush( y262_bitstream_t *ps_bitstream, uint8_t **ppui8_bitstream, uint32_t *pui_length ); + +void y262_bitstream_bytealign( y262_bitstream_t *ps_bitstream ); \ No newline at end of file diff --git a/src/y262/configure.ac b/src/y262/configure.ac new file mode 100644 index 0000000..89d22b2 --- /dev/null +++ b/src/y262/configure.ac @@ -0,0 +1,36 @@ +AC_PREREQ([2.68]) +AC_INIT([liby262], [1.0], [ralf.willenbacher@gmail.com]) +AM_INIT_AUTOMAKE([-Wall -Werror foreign]) +AC_CANONICAL_HOST +AC_CONFIG_SRCDIR([y262api.c]) +AC_CONFIG_HEADERS([config.h]) +AC_ARG_ENABLE([debug],[AS_HELP_STRING([--enable-debug],[compile and link debug information @<:@default=no@:>@])],[],[makedebug=no]) +AS_IF([test "x$makedebug" != xno],[CFLAGS="-O0 -g";CXXFLAGS="-O0 -g"],[]) +AS_IF([test "x$CFLAGS" == x],[CFLAGS="-O2 -Wall"],[]) +AC_PROG_CC +AC_PROG_CXX +AM_PROG_CC_C_O +AM_PROG_AR +AM_PROG_AS +AC_PROG_RANLIB +AC_CHECK_PROG([YASM],[yasm],[yasm],[no]) +AS_IF([test "x$YASM" == xno],[AC_MSG_FAILURE([could not find the assembler yasm])],[]) +AC_CHECK_LIB([m],[sqrt]) +AC_CHECK_LIB([pthread],[pthread_create]) +AC_DEFINE([HAVE_LIBPTHREAD], [1], [libpthread available]) +AC_CHECK_HEADERS([stdlib.h memory.h string.h]) +AC_TYPE_SIZE_T +AC_CHECK_FUNCS([floor pow memset memcpy sqrt]) + +YASM_I386=no +YASM_X86_64=no +AS_IF([test "x$YASM" != xno],[AS_CASE([$host], + [*i386*], [YASM_I386=yes], + [*x86_64*], [YASM_X86_64=yes], + [] + )]) +AM_CONDITIONAL([WITH_YASM_I386], [test "$YASM_I386" == yes]) +AM_CONDITIONAL([WITH_YASM_X86_64], [test "$YASM_X86_64" == yes]) + +AC_CONFIG_FILES([Makefile]) +AC_OUTPUT diff --git a/src/y262/lookahead.c b/src/y262/lookahead.c new file mode 100644 index 0000000..a9e164b --- /dev/null +++ b/src/y262/lookahead.c @@ -0,0 +1,718 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + +int32_t y262_get_free_input_frame_idx( y262_t *ps_y262 ) +{ + int32_t i_idx; + + if( ps_y262->b_multithreading ) + { + y262_mutex_lock( ps_y262, ps_y262->p_resource_mutex ); + } + + for( i_idx = 0; i_idx < ps_y262->i_max_buffered_input_pictures; i_idx++ ) + { + if( ps_y262->rgs_buffered_input_pictures[ i_idx ].b_used == FALSE ) + { + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } + return i_idx; + } + } + + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } + + assert( FALSE ); + return 0; +} + +int32_t y262_get_input_frame_pon( y262_t *ps_y262, int32_t i_pon ) +{ + int32_t i_idx; + + if( ps_y262->b_multithreading ) + { + y262_mutex_lock( ps_y262, ps_y262->p_resource_mutex ); + } + + for( i_idx = 0; i_idx < ps_y262->i_max_buffered_input_pictures; i_idx++ ) + { + if( ps_y262->rgs_buffered_input_pictures[ i_idx ].b_used == TRUE && + ps_y262->rgs_buffered_input_pictures[ i_idx ].i_pon == i_pon ) + { + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } + return i_idx; + } + } + + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } + + assert( FALSE ); + return 0; +} + +int32_t y262_get_input_frame_don( y262_t *ps_y262, int32_t i_don ) +{ + int32_t i_idx; + + if( ps_y262->b_multithreading ) + { + y262_mutex_lock( ps_y262, ps_y262->p_resource_mutex ); + } + + for( i_idx = 0; i_idx < ps_y262->i_max_buffered_input_pictures; i_idx++ ) + { + if( ps_y262->rgs_buffered_input_pictures[ i_idx ].b_used == TRUE && + ps_y262->rgs_buffered_input_pictures[ i_idx ].i_don == i_don ) + { + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } + return i_idx; + } + } + + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } + + assert( FALSE ); + return 0; +} + + +void y262_lookahead_analyze_mb_intra( y262_t *ps_y262, y262_lookahead_mb_t *ps_mb, int32_t i_mb_x, int32_t i_mb_y, y262_picture_t *ps_pic ) +{ + int32_t i_satd, i_sad; + const uint8_t rgui8_zero[ 16 ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + uint8_t *pui8_mb; + + pui8_mb = ps_pic->pui8_luma + ( i_mb_x << 4 ) + ( ( i_mb_y << 4 ) * ps_y262->i_sequence_width ); + i_satd = ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x16 ]( pui8_mb, ps_y262->i_sequence_width, ( uint8_t *)rgui8_zero, 0 ); + i_sad = ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x16 ]( ( uint8_t *)rgui8_zero, 0, pui8_mb, ps_y262->i_sequence_width ); + i_satd -= i_sad >> 2; /* - dc */ + + i_satd = MAX( i_satd + 10, 10 ); + + ps_mb->i_best_cost = ps_mb->i_intra_cost = i_satd; + ps_mb->i_best_mode = LOOKAHEAD_MODE_INTRA; +} + +void y262_lookahead_analyze_mb_inter( y262_t *ps_y262, y262_lookahead_mb_t *ps_mb, int32_t i_mb_x, int32_t i_mb_y, int32_t i_s, y262_picture_t *ps_pic, y262_picture_t *ps_ref ) +{ + int32_t i_fcode_x, i_fcode_y; + y262_me_context_t s_me; + + i_fcode_x = ps_y262->rgi_fcode[ i_s ][ 0 ]; + i_fcode_y = ps_y262->rgi_fcode[ i_s ][ 1 ]; + s_me.pui8_blk = ps_pic->pui8_luma + ( i_mb_x << 4 ) + ( ( i_mb_y << 4 ) * ps_y262->i_sequence_width ); + s_me.i_blk_stride = ps_y262->i_sequence_width; + s_me.i_blk_type = BLOCK_TYPE_16x16; + s_me.i_min_mv_x = -( 1 << ( 3 + i_fcode_x - 1 ) ); + s_me.i_min_mv_y = -( 1 << ( 3 + i_fcode_y - 1 ) ); + s_me.i_max_mv_x = ( 1 << ( 3 + i_fcode_x - 1 ) ) - 1; + s_me.i_max_mv_y = ( 1 << ( 3 + i_fcode_y - 1 ) ) - 1; + s_me.i_x_offset = i_mb_x << 4; + s_me.i_y_offset = i_mb_y << 4; + s_me.i_num_candidates_fp = 0; + s_me.i_lambda = 1; + s_me.i_ref_width = ps_y262->i_sequence_width; + s_me.i_ref_height = ps_y262->i_sequence_height; + s_me.i_ref_stride = ps_y262->i_sequence_width; + s_me.pui8_ref = ps_ref->pui8_luma; + if( i_mb_x > 0 ) + { + s_me.i_pred_mv_x = ps_mb[ -1 ].rgi_mvs[ i_s ][ 0 ]; + s_me.i_pred_mv_y = ps_mb[ -1 ].rgi_mvs[ i_s ][ 1 ]; + } + else + { + s_me.i_pred_mv_x = 0; + s_me.i_pred_mv_y = 0; + } + s_me.i_me_call = MECALL_LOOKAHEAD; + y262_motion_search( ps_y262, &s_me ); + + ps_mb->rgi_mvs[ i_s ][ 0 ] = s_me.i_best_mv_x; + ps_mb->rgi_mvs[ i_s ][ 1 ] = s_me.i_best_mv_y; + ps_mb->rgi_mv_costs[ i_s ] = s_me.i_best_mv_sad + 4; + + if( ps_mb->rgi_mv_costs[ i_s ] < ps_mb->i_best_cost ) + { + ps_mb->i_best_cost = ps_mb->rgi_mv_costs[ i_s ]; + if( i_s == 0 ) + { + ps_mb->i_best_mode = LOOKAHEAD_MODE_INTER_FW; + } + else + { + ps_mb->i_best_mode = LOOKAHEAD_MODE_INTER_BW; + } + } +} + +void y262_lookahead_analyze_slice( y262_t *ps_y262, y262_picture_t *ps_pic, y262_picture_t *ps_fw_ref, y262_picture_t *ps_bw_ref, int32_t i_mb_y_first, int32_t i_mb_y_last ) +{ + int32_t i_lookahead_size_x, i_lookahead_size_y, i_mb_x, i_mb_y, i_mb_idx; + y262_lookahead_mb_t *ps_mb; + + i_lookahead_size_x = ps_y262->i_sequence_width >> 4; + i_lookahead_size_y = ps_y262->i_sequence_height >> 4; + + for( i_mb_y = i_mb_y_first; i_mb_y <= i_mb_y_last; i_mb_y++ ) + { + for( i_mb_x = 0; i_mb_x < i_lookahead_size_x; i_mb_x++ ) + { + int32_t i_variance_frame; + i_mb_idx = i_mb_x + ( i_lookahead_size_x * i_mb_y ); + + ps_mb = &ps_pic->ps_lookahead[ i_mb_idx ]; + ps_mb->i_quantizer_scale = ( 1 << 12 ); + + if( ps_y262->b_variance_aq ) + { + i_variance_frame = ps_y262->s_funcs.f_variance_16x16( ps_pic->pui8_luma + ( i_mb_x << 4 ) + ( ( i_mb_y << 4 ) * ps_y262->i_sequence_width ), ps_y262->i_sequence_width ); + i_variance_frame += ps_y262->s_funcs.f_variance_8x8( ps_pic->pui8_cb + ( i_mb_x << 3 ) + ( ( i_mb_y << 3 ) * ( ps_y262->i_sequence_width >> 1 ) ), ps_y262->i_sequence_width >> 1 ); + i_variance_frame += ps_y262->s_funcs.f_variance_8x8( ps_pic->pui8_cr + ( i_mb_x << 3 ) + ( ( i_mb_y << 3 ) * ( ps_y262->i_sequence_width >> 1 ) ), ps_y262->i_sequence_width >> 1 ); + ps_mb->i_quantizer_aq_scale = MAX( 1 << 10,( int32_t )( ( pow( i_variance_frame, 1.0/6.0 ) / 4.0 ) * ( 1 << 12 ) ) ); + } + else + { + ps_mb->i_quantizer_aq_scale = ( 1 << 12 ); + } + + y262_lookahead_analyze_mb_intra( ps_y262, ps_mb, i_mb_x, i_mb_y, ps_pic ); + if( ps_fw_ref ) + { + y262_lookahead_analyze_mb_inter( ps_y262, ps_mb, i_mb_x, i_mb_y, 0, ps_pic, ps_fw_ref ); + } + if( ps_bw_ref ) + { + y262_lookahead_analyze_mb_inter( ps_y262, ps_mb, i_mb_x, i_mb_y, 1, ps_pic, ps_bw_ref ); + } + /* fixme: bi ? */ + } + } +} + +void y262_lookahead_analyze_frame( y262_t *ps_y262, y262_picture_t *ps_pic, y262_picture_t *ps_fw_ref, y262_picture_t *ps_bw_ref ) +{ + int32_t i_lookahead_size_x, i_lookahead_size_y, i_mb_x, i_mb_y, i_mb_idx, i_frame_cost, i_frame_intra_cost, i_slice_encoder; + y262_lookahead_mb_t *ps_mb; + + i_lookahead_size_x = ps_y262->i_sequence_width >> 4; + i_lookahead_size_y = ps_y262->i_sequence_height >> 4; + + if( ps_fw_ref ) + { + ps_pic->i_forward_pon = ps_fw_ref->i_pon; + } + else + { + ps_pic->i_forward_pon = -1; + } + if( ps_bw_ref ) + { + ps_pic->i_backward_pon = ps_bw_ref->i_pon; + } + else + { + ps_pic->i_backward_pon = -1; + } + + i_frame_cost = 0; + i_frame_intra_cost = 0; + + if( ps_y262->b_multithreading ) + { + y262_mutex_lock( ps_y262, ps_y262->p_resource_mutex ); /* need to lock this to shut up helgrind */ + for( i_slice_encoder = 0; i_slice_encoder < ps_y262->i_num_lookahead_encoders; i_slice_encoder++ ) + { + ps_y262->rgs_lookahead_threads[ i_slice_encoder ].i_command = Y262_SLICE_THREAD_CMD_LOOKAHEAD; + ps_y262->rgs_lookahead_threads[ i_slice_encoder ].i_first_slice_row = ( i_lookahead_size_y * i_slice_encoder ) / ps_y262->i_num_lookahead_encoders; + ps_y262->rgs_lookahead_threads[ i_slice_encoder ].i_last_slice_row = ( ( i_lookahead_size_y * ( i_slice_encoder + 1 ) ) / ps_y262->i_num_lookahead_encoders ) - 1; + ps_y262->rgs_lookahead_threads[ i_slice_encoder ].ps_pic = ps_pic; + ps_y262->rgs_lookahead_threads[ i_slice_encoder ].ps_fw_ref = ps_fw_ref; + ps_y262->rgs_lookahead_threads[ i_slice_encoder ].ps_bw_ref = ps_bw_ref; + } + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + + for( i_slice_encoder = 0; i_slice_encoder < ps_y262->i_num_lookahead_encoders; i_slice_encoder++ ) + { + y262_event_set_g( ps_y262, ps_y262->rgs_lookahead_threads[ i_slice_encoder ].p_start_event ); + } + for( i_slice_encoder = 0; i_slice_encoder < ps_y262->i_num_lookahead_encoders; i_slice_encoder++ ) + { + y262_event_wait_g( ps_y262, ps_y262->rgs_lookahead_threads[ i_slice_encoder ].p_finished_event ); + } + } + else + { + for( i_slice_encoder = 0; i_slice_encoder < ps_y262->i_num_lookahead_encoders; i_slice_encoder++ ) + { + y262_lookahead_analyze_slice( ps_y262, ps_pic, ps_fw_ref, ps_bw_ref, ( i_lookahead_size_y * i_slice_encoder ) / + ps_y262->i_num_lookahead_encoders, ( ( i_lookahead_size_y * ( i_slice_encoder + 1 ) ) / ps_y262->i_num_lookahead_encoders ) - 1 ); + } + } + + for( i_mb_y = 0; i_mb_y < i_lookahead_size_y; i_mb_y++ ) + { + for( i_mb_x = 0; i_mb_x < i_lookahead_size_x; i_mb_x++ ) + { + i_mb_idx = i_mb_x + ( i_lookahead_size_x * i_mb_y ); + ps_mb = &ps_pic->ps_lookahead[ i_mb_idx ]; + + i_frame_intra_cost += ps_mb->i_intra_cost; + if( ps_pic->i_frame_type == PICTURE_CODING_TYPE_I ) + { + i_frame_cost += ps_mb->i_intra_cost; /* need to special case now that its doing mbtree past i frames */ + } + else + { + i_frame_cost += ps_mb->i_best_cost; + } + } + } + + if( ps_y262->b_multithreading ) + { + y262_mutex_lock( ps_y262, ps_y262->p_resource_mutex ); /* need to lock this to shut up helgrind */ + } + ps_pic->i_frame_intra_cost = i_frame_intra_cost; + ps_pic->i_frame_cost = i_frame_cost; + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } +} + +void y262_process_lookahead_internal( y262_t *ps_y262 ) +{ + int32_t i_idx, i_next_ref, i_next_pon, i_forward_ref_pon, i_backward_ref_pon, i_fw_ref_idx, i_bw_ref_idx; + y262_picture_t *ps_pic, *ps_fw_ref, *ps_bw_ref; + bool_t b_backward_pred_only = FALSE; + + i_next_pon = ps_y262->i_lookahead_next_pon; + i_next_ref = ps_y262->i_lookahead_next_ref; + + ps_pic = &ps_y262->rgs_buffered_input_pictures[ y262_get_input_frame_pon( ps_y262, i_next_ref ) ]; + ps_pic->i_don = ps_y262->i_leading_lookahead_don++; + if( ps_y262->i_keyframe_countdown <= 0 ) + { + ps_pic->i_frame_type = PICTURE_CODING_TYPE_I; + ps_y262->i_keyframe_countdown = ps_y262->i_sequence_keyframe_distance; + b_backward_pred_only = ps_y262->b_closed_gop; + } + else + { + ps_pic->i_frame_type = PICTURE_CODING_TYPE_P; + ps_y262->i_keyframe_countdown--; + } + ps_pic->b_backward_pred_only = FALSE; + + if( ps_pic->i_frame_type == PICTURE_CODING_TYPE_I ) + { + ps_y262->i_last_keyframe_temporal_reference = i_next_pon; + } + + ps_pic->i_temporal_reference = ps_pic->i_pon - ps_y262->i_last_keyframe_temporal_reference; + if( ps_pic->i_temporal_reference < 0 ) + { + int32_t *pi_null = NULL; + *pi_null = 0; + } + ps_pic->i_temporal_reference = MAX( 0, ps_pic->i_temporal_reference ); /* if stream ends in a keyframe i_next_pon points past end behind the keyframe */ + + i_forward_ref_pon = ps_pic->i_don - 1; + i_backward_ref_pon = i_next_ref; + + if( i_forward_ref_pon >= 0 ) + { + i_fw_ref_idx = y262_get_input_frame_pon( ps_y262, i_forward_ref_pon ); + assert( i_fw_ref_idx >= 0 ); + ps_fw_ref = &ps_y262->rgs_buffered_input_pictures[ i_fw_ref_idx ]; + } + else + { + ps_fw_ref = NULL; + } + i_bw_ref_idx = y262_get_input_frame_pon( ps_y262, i_backward_ref_pon ); + assert( i_bw_ref_idx >= 0 ); + ps_bw_ref = &ps_y262->rgs_buffered_input_pictures[ i_bw_ref_idx ]; + + y262_lookahead_analyze_frame( ps_y262, ps_pic, /*ps_pic->i_frame_type == PICTURE_CODING_TYPE_I ? NULL : */ ps_fw_ref, NULL ); + + for( i_idx = i_next_pon; i_idx < i_next_ref; i_idx++ ) + { + ps_pic = &ps_y262->rgs_buffered_input_pictures[ y262_get_input_frame_pon( ps_y262, i_idx ) ]; + ps_pic->i_don = ps_y262->i_leading_lookahead_don++; + ps_pic->i_frame_type = PICTURE_CODING_TYPE_B; + ps_pic->i_temporal_reference = ps_pic->i_pon - ps_y262->i_last_keyframe_temporal_reference; + ps_pic->b_backward_pred_only = b_backward_pred_only; + assert( ps_pic->i_temporal_reference >= 0 ); + + if( !b_backward_pred_only ) + { + y262_lookahead_analyze_frame( ps_y262, ps_pic, ps_fw_ref, ps_bw_ref ); + } + else + { + y262_lookahead_analyze_frame( ps_y262, ps_pic, NULL, ps_bw_ref ); + } + } +} + +void y262_start_lookahead( y262_t *ps_y262 ) +{ + if( ps_y262->b_multithreading ) + { + assert( !ps_y262->b_lookahead_running ); + ps_y262->s_lookahead_thread.i_command = Y262_LOOKAHEAD_THREAD_CMD_LOOKAHEAD; + y262_event_set_g( ps_y262, ps_y262->s_lookahead_thread.p_start_event ); + } + else + { + y262_process_lookahead_internal( ps_y262 ); + } +} + +void y262_setup_lookahead_next_and_start_lookahead( y262_t *ps_y262 ) +{ + int32_t i_pon, i_buf_idx; + + ps_y262->i_lookahead_next_pon = ps_y262->i_leading_lookahead_don; + ps_y262->i_lookahead_next_ref = ps_y262->i_current_input_pon - 1; + + for( i_pon = ps_y262->i_lookahead_next_pon; i_pon <= ps_y262->i_lookahead_next_ref; i_pon++ ) + { + i_buf_idx = y262_get_input_frame_pon( ps_y262, i_pon ); + if( ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].b_force_new_gop ) + { + ps_y262->i_lookahead_next_ref = i_pon; + ps_y262->i_keyframe_countdown = 0; + } + } + + y262_start_lookahead( ps_y262 ); +} + + + +void y262_finish_lookahead( y262_t *ps_y262 ) +{ + if( ps_y262->b_multithreading ) + { + assert( ps_y262->b_lookahead_running ); + y262_event_wait_g( ps_y262, ps_y262->s_lookahead_thread.p_finished_event ); + } +} + + +void y262_lookahead_mbtree( y262_t *ps_y262, y262_picture_t *ps_pic ) +{ + volatile int32_t i_ridx; + int32_t i_idx, i_lookahead_size_x, i_lookahead_size_y, i_mb_x, i_mb_y, i_mb_idx, i_don; + y262_lookahead_mb_t *ps_mb; + int32_t *rgpi_references[ 4 ], rgi_references_don_map[ 4 ]; + + i_lookahead_size_x = ps_y262->i_sequence_width >> 4; + i_lookahead_size_y = ps_y262->i_sequence_height >> 4; + + if( ps_pic->i_frame_type == PICTURE_CODING_TYPE_B ) + { + for( i_idx = 0; i_idx < i_lookahead_size_x * i_lookahead_size_y; i_idx++ ) + { + ps_mb = &ps_pic->ps_lookahead[ i_idx ]; + ps_mb->i_quantizer_scale = ( 1 << 12 ); + } + return; + } + + for( i_ridx = 0; i_ridx < 4; i_ridx++ ) + { + rgpi_references[ i_ridx ] = ps_y262->rgpi_mbtree_references[ i_ridx ]; + rgi_references_don_map[ i_ridx ] = 0x7fffffff; + } + + for( i_don = ps_pic->i_don + ps_y262->i_num_lookahead_pictures; i_don > ps_pic->i_don; i_don-- ) + { + int32_t i_fpic_idx, i_fwpic_idx, i_bwpic_idx; + y262_picture_t *ps_fpic; + + if( ps_y262->i_current_eof_don >= 0 && i_don >= ps_y262->i_current_eof_don ) + { + continue; + } + + i_fpic_idx = y262_get_input_frame_don( ps_y262, i_don ); + assert( i_fpic_idx >= 0 ); + ps_fpic = &ps_y262->rgs_buffered_input_pictures[ i_fpic_idx ]; + + if( ps_fpic->i_frame_type != PICTURE_CODING_TYPE_B ) + { + int32_t i_oldest; + i_oldest = 0; + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + if( rgi_references_don_map[ i_idx ] == i_don ) + { + break; + } + else if( rgi_references_don_map[ i_idx ] > rgi_references_don_map[ i_oldest ] ) + { + i_oldest = i_idx; + } + } + if( i_idx == 4 ) + { + rgi_references_don_map[ i_oldest ] = i_don; + i_fpic_idx = i_oldest; + memset( rgpi_references[ i_fpic_idx ], 0, sizeof( int32_t ) * i_lookahead_size_x * i_lookahead_size_y ); + } + else + { + i_fpic_idx = i_idx; + } + } + else + { + i_fpic_idx = -1; + } + + if( ps_fpic->i_forward_pon >= 0 && ps_pic->i_pon <= ps_fpic->i_forward_pon ) + { + int32_t i_oldest; + i_fwpic_idx = y262_get_input_frame_pon( ps_y262, ps_fpic->i_forward_pon ); + assert( i_fwpic_idx >= 0 ); + + i_oldest = 0; + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + if( rgi_references_don_map[ i_idx ] == ps_y262->rgs_buffered_input_pictures[ i_fwpic_idx ].i_don ) + { + break; + } + else if( rgi_references_don_map[ i_idx ] > rgi_references_don_map[ i_oldest ] ) + { + i_oldest = i_idx; + } + } + if( i_idx == 4 ) + { + rgi_references_don_map[ i_oldest ] = ps_y262->rgs_buffered_input_pictures[ i_fwpic_idx ].i_don; + i_fwpic_idx = i_oldest; + memset( rgpi_references[ i_fwpic_idx ], 0, sizeof( int32_t ) * i_lookahead_size_x * i_lookahead_size_y ); + } + else + { + i_fwpic_idx = i_idx; + } + } + else + { + i_fwpic_idx = -1; + } + + if( ps_fpic->i_backward_pon >= 0 ) + { + int32_t i_oldest; + i_bwpic_idx = y262_get_input_frame_pon( ps_y262, ps_fpic->i_backward_pon ); + assert( i_bwpic_idx >= 0 ); + + i_oldest = 0; + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + if( rgi_references_don_map[ i_idx ] == ps_y262->rgs_buffered_input_pictures[ i_bwpic_idx ].i_don ) + { + break; + } + else if( rgi_references_don_map[ i_idx ] > rgi_references_don_map[ i_oldest ] ) + { + i_oldest = i_idx; + } + } + if( i_idx == 4 ) + { + rgi_references_don_map[ i_oldest ] = ps_y262->rgs_buffered_input_pictures[ i_bwpic_idx ].i_don; + i_bwpic_idx = i_oldest; + memset( rgpi_references[ i_bwpic_idx ], 0, sizeof( int32_t ) * i_lookahead_size_x * i_lookahead_size_y ); + } + else + { + i_bwpic_idx = i_idx; + } + } + else + { + i_bwpic_idx = -1; + } + + for( i_mb_y = 0; i_mb_y < i_lookahead_size_y; i_mb_y++ ) + { + for( i_mb_x = 0; i_mb_x < i_lookahead_size_x; i_mb_x++ ) + { + int64_t i_refweight, i_ref; + const int32_t rgi_weights[ 4 ][ 2 ] = { { 32, 32 }, { 0, 32 }, { 32, 0 }, { 0, 0 } }; + const int32_t rgi_signs[ 4 ][ 2 ] = { { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; + const int32_t rgi_offsets[ 4 ][ 2 ] = { { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } }; + int32_t *pi_reference; + + i_mb_idx = i_mb_x + ( i_lookahead_size_x * i_mb_y ); + ps_mb = &ps_fpic->ps_lookahead[ i_mb_idx ]; + + i_refweight = ( ps_mb->i_intra_cost * ( ( int64_t )ps_mb->i_intra_cost - ps_mb->i_best_cost ) ) / ps_mb->i_intra_cost; + if( i_fpic_idx >= 0 ) + { + i_refweight += ( rgpi_references[ i_fpic_idx ][ i_mb_idx ] * ( ( int64_t )ps_mb->i_intra_cost - ps_mb->i_best_cost ) ) / ps_mb->i_intra_cost; + } + + for( i_ref = 0; i_ref < 2; i_ref++ ) + { + if( i_ref == 0 && ps_mb->i_best_mode == LOOKAHEAD_MODE_INTER_FW && i_fwpic_idx >= 0 ) + { + pi_reference = rgpi_references[ i_fwpic_idx ]; + } + else if( i_ref == 1 && ps_mb->i_best_mode == LOOKAHEAD_MODE_INTER_BW && i_bwpic_idx >= 0 ) + { + pi_reference = rgpi_references[ i_bwpic_idx ]; + } + else + { + pi_reference = NULL; + } + if( pi_reference ) + { + int32_t i_tmb_x, i_tmb_y, i_frac_x, i_frac_y, i_tmb_weight; + + i_tmb_x = ( i_mb_x << 5 ) + ps_mb->rgi_mvs[ i_ref ][ 0 ]; + i_tmb_y = ( i_mb_y << 5 ) + ps_mb->rgi_mvs[ i_ref ][ 1 ]; + i_frac_x = i_tmb_x & 0x1f; + i_frac_y = i_tmb_y & 0x1f; + i_tmb_x >>= 5; + i_tmb_y >>= 5; + + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + int32_t i_ttmb_x, i_ttmb_y; + i_tmb_weight = ( rgi_weights[ i_idx ][ 0 ] + rgi_signs[ i_idx ][ 0 ] * i_frac_x ) * ( rgi_weights[ i_idx ][ 1 ] + rgi_signs[ i_idx ][ 1 ] * i_frac_y ); + i_ttmb_x = i_tmb_x + rgi_offsets[ i_idx ][ 0 ]; + i_ttmb_y = i_tmb_y + rgi_offsets[ i_idx ][ 1 ]; + if( i_ttmb_x >= 0 && i_ttmb_x < i_lookahead_size_x && + i_ttmb_y >= 0 && i_ttmb_y < i_lookahead_size_y ) + { + pi_reference[ i_ttmb_x + i_ttmb_y * i_lookahead_size_x ] += ( int32_t )( ( i_refweight * i_tmb_weight ) >> 10 ); + } + } + } + } + } + } + } + + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + if( rgi_references_don_map[ i_idx ] == ps_pic->i_don ) + { + break; + } + } + if( i_idx == 4 ) + { + /* fixme: most likely indicates that this is the last frame in sequence and is not referenced */ + for( i_idx = 0; i_idx < i_lookahead_size_x * i_lookahead_size_y; i_idx++ ) + { + ps_mb = &ps_pic->ps_lookahead[ i_idx ]; + ps_mb->i_quantizer_scale = ( 1 << 12 ); + } + return; + } + + for( i_mb_y = 0; i_mb_y < i_lookahead_size_y; i_mb_y++ ) + { + for( i_mb_x = 0; i_mb_x < i_lookahead_size_x; i_mb_x++ ) + { + double d_quantizer_scale; + i_mb_idx = i_mb_x + ( i_lookahead_size_x * i_mb_y ); + ps_mb = &ps_pic->ps_lookahead[ i_mb_idx ]; + + d_quantizer_scale = ( ( double )( rgpi_references[ i_idx ][ i_mb_idx ] + ps_mb->i_intra_cost ) ) / ps_mb->i_intra_cost; + d_quantizer_scale = 1.0 / ( ( ( sqrt( d_quantizer_scale ) - 1.0 ) * 0.43 ) + 1.0 ); + d_quantizer_scale = MAX( d_quantizer_scale, 0.5 ); + ps_mb->i_quantizer_scale = ( int32_t ) ( ( 1 << 12 ) * d_quantizer_scale ); + } + } +} + + + +void y262_lookahead_fill_ratectrl_vars( y262_t *ps_y262, y262_picture_t *ps_pic ) +{ + int32_t i_don, i_counter = 0; + for( i_don = ps_pic->i_don; i_don < ps_pic->i_don + ps_y262->i_num_lookahead_pictures; i_don++ ) + { + int32_t i_fpic_idx; + y262_picture_t *ps_lpic; + + if( ps_y262->i_current_eof_don >= 0 && i_don >= ps_y262->i_current_eof_don ) + { + break; + } + + if( i_don - ps_pic->i_don >= MAX_BITRATE_CONTROL_LOOKAHEAD_PICTURES ) + { + break; + } + + i_fpic_idx = y262_get_input_frame_don( ps_y262, i_don ); + ps_lpic = &ps_y262->rgs_buffered_input_pictures[ i_fpic_idx ]; + + ps_pic->rgi_lookahead_picture_costs[ i_counter ] = ps_lpic->i_frame_cost; + ps_pic->rgi_lookahead_picture_types[ i_counter ] = ps_lpic->i_frame_type; + i_counter++; + } + ps_pic->i_num_lookahead_pictures = i_counter; +} + + diff --git a/src/y262/lookahead.h b/src/y262/lookahead.h new file mode 100644 index 0000000..b6313f3 --- /dev/null +++ b/src/y262/lookahead.h @@ -0,0 +1,44 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + + +int32_t y262_get_free_input_frame_idx( y262_t *ps_y262 ); +int32_t y262_get_input_frame_pon( y262_t *ps_y262, int32_t i_pon ); +int32_t y262_get_input_frame_don( y262_t *ps_y262, int32_t i_don ); +void y262_process_lookahead( y262_t *ps_y262 ); +void y262_process_lookahead_internal( y262_t *ps_y262 ); +void y262_lookahead_mbtree( y262_t *ps_y262, y262_picture_t *ps_pic ); +void y262_lookahead_fill_ratectrl_vars( y262_t *ps_y262, y262_picture_t *ps_pic ); + +void y262_lookahead_analyze_slice( y262_t *ps_y262, y262_picture_t *ps_pic, y262_picture_t *ps_fw_ref, y262_picture_t *ps_bw_ref, int32_t i_mb_y_first, int32_t i_mb_y_last ); + +void y262_setup_lookahead_next_and_start_lookahead( y262_t *ps_y262 ); +void y262_finish_lookahead( y262_t *ps_y262 ); + diff --git a/src/y262/mc.c b/src/y262/mc.c new file mode 100644 index 0000000..7719a02 --- /dev/null +++ b/src/y262/mc.c @@ -0,0 +1,387 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + +void y262_motcomp_16x16_00_put_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x16_01_put_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x16_10_put_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x16_11_put_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + +void y262_motcomp_16x8_00_put_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x8_01_put_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x8_10_put_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x8_11_put_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + +void y262_motcomp_8x16_00_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x16_01_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x16_10_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x16_11_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + +void y262_motcomp_8x8_00_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x8_01_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x8_10_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x8_11_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + +void y262_motcomp_8x4_00_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x4_01_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x4_10_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x4_11_put_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + +void y262_motcomp_16x16_00_avg_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x16_01_avg_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x16_10_avg_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x16_11_avg_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + +void y262_motcomp_16x8_00_avg_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x8_01_avg_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x8_10_avg_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_16x8_11_avg_sse2( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + +void y262_motcomp_8x16_00_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x16_01_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x16_10_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x16_11_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + +void y262_motcomp_8x8_00_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x8_01_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x8_10_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x8_11_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + +void y262_motcomp_8x4_00_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x4_01_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x4_10_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +void y262_motcomp_8x4_11_avg_mmxext( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); + + + + + +#define MC_FUNC_REF( name, i_width, i_height, hpelidx ) \ +void y262_motcomp_##name##_put( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ) \ +{ \ + int32_t i_x, i_y; \ + \ + if( hpelidx == 0 ) \ + { \ + for( i_y = 0; i_y < i_height; i_y++ ) \ + { \ + for( i_x = 0; i_x < i_width; i_x++ ) \ + { \ + pui8_dst[ i_x ] = pui8_src[ i_x ]; \ + } \ + pui8_src += i_src_stride; \ + pui8_dst += i_dst_stride; \ + } \ + } \ + else if( hpelidx == 1 ) \ + { \ + uint8_t *pui8_src1, *pui8_src2; \ + \ + pui8_src1 = pui8_src; \ + pui8_src2 = pui8_src + 1; \ + \ + for( i_y = 0; i_y < i_height; i_y++ ) \ + { \ + for( i_x = 0; i_x < i_width; i_x++ ) \ + { \ + pui8_dst[ i_x ] = ( pui8_src1[ i_x ] + pui8_src2[ i_x ] + 1 ) >> 1; \ + } \ + pui8_src1 += i_src_stride; \ + pui8_src2 += i_src_stride; \ + pui8_dst += i_dst_stride; \ + } \ + } \ + else if( hpelidx == 2 ) \ + { \ + uint8_t *pui8_src1, *pui8_src2; \ + \ + pui8_src1 = pui8_src; \ + pui8_src2 = pui8_src + i_src_stride; \ + \ + for( i_y = 0; i_y < i_height; i_y++ ) \ + { \ + for( i_x = 0; i_x < i_width; i_x++ ) \ + { \ + pui8_dst[ i_x ] = ( pui8_src1[ i_x ] + pui8_src2[ i_x ] + 1 ) >> 1; \ + } \ + pui8_src1 += i_src_stride; \ + pui8_src2 += i_src_stride; \ + pui8_dst += i_dst_stride; \ + } \ + } \ + else \ + { \ + uint8_t *pui8_src1, *pui8_src2, *pui8_src3, *pui8_src4; \ + \ + pui8_src1 = pui8_src; \ + pui8_src2 = pui8_src + 1; \ + pui8_src3 = pui8_src + i_src_stride; \ + pui8_src4 = pui8_src + i_src_stride + 1; \ + \ + for( i_y = 0; i_y < i_height; i_y++ ) \ + { \ + for( i_x = 0; i_x < i_width; i_x++ ) \ + { \ + pui8_dst[ i_x ] = ( pui8_src1[ i_x ] + pui8_src2[ i_x ] + pui8_src3[ i_x ] + pui8_src4[ i_x ] + 2 ) >> 2; \ + } \ + pui8_src1 += i_src_stride; \ + pui8_src2 += i_src_stride; \ + pui8_src3 += i_src_stride; \ + pui8_src4 += i_src_stride; \ + pui8_dst += i_dst_stride; \ + } \ + } \ +} \ + \ + \ +void y262_motcomp_##name##_avg( uint8_t *pui8_src, int32_t i_src_stride, uint8_t *pui8_dst, int32_t i_dst_stride ) \ +{ \ + int32_t i_x, i_y; \ + \ + if( hpelidx == 0 ) \ + { \ + for( i_y = 0; i_y < i_height; i_y++ ) \ + { \ + for( i_x = 0; i_x < i_width; i_x++ ) \ + { \ + pui8_dst[ i_x ] = ( pui8_src[ i_x ] + pui8_dst[ i_x ] + 1 ) >> 1; \ + } \ + pui8_src += i_src_stride; \ + pui8_dst += i_dst_stride; \ + } \ + } \ + else if( hpelidx == 1 ) \ + { \ + uint8_t *pui8_src1, *pui8_src2; \ + \ + pui8_src1 = pui8_src; \ + pui8_src2 = pui8_src + 1; \ + \ + for( i_y = 0; i_y < i_height; i_y++ ) \ + { \ + for( i_x = 0; i_x < i_width; i_x++ ) \ + { \ + pui8_dst[ i_x ] = ( ( ( pui8_src1[ i_x ] + pui8_src2[ i_x ] + 1 ) >> 1 ) + pui8_dst[ i_x ] + 1 ) / 2; \ + } \ + pui8_src1 += i_src_stride; \ + pui8_src2 += i_src_stride; \ + pui8_dst += i_dst_stride; \ + } \ + } \ + else if( hpelidx == 2 ) \ + { \ + uint8_t *pui8_src1, *pui8_src2; \ + \ + pui8_src1 = pui8_src; \ + pui8_src2 = pui8_src + i_src_stride; \ + \ + for( i_y = 0; i_y < i_height; i_y++ ) \ + { \ + for( i_x = 0; i_x < i_width; i_x++ ) \ + { \ + pui8_dst[ i_x ] = ( ( ( pui8_src1[ i_x ] + pui8_src2[ i_x ] + 1 ) >> 1 ) + pui8_dst[ i_x ] + 1 ) / 2; \ + } \ + pui8_src1 += i_src_stride; \ + pui8_src2 += i_src_stride; \ + pui8_dst += i_dst_stride; \ + } \ + } \ + else \ + { \ + uint8_t *pui8_src1, *pui8_src2, *pui8_src3, *pui8_src4; \ + \ + pui8_src1 = pui8_src; \ + pui8_src2 = pui8_src + 1; \ + pui8_src3 = pui8_src + i_src_stride; \ + pui8_src4 = pui8_src + i_src_stride + 1; \ + \ + for( i_y = 0; i_y < i_height; i_y++ ) \ + { \ + for( i_x = 0; i_x < i_width; i_x++ ) \ + { \ + pui8_dst[ i_x ] = ( ( ( pui8_src1[ i_x ] + pui8_src2[ i_x ] + pui8_src3[ i_x ] + \ + pui8_src4[ i_x ] + 2 ) >> 2 ) + pui8_dst[ i_x ] + 1 ) >> 1; \ + } \ + pui8_src1 += i_src_stride; \ + pui8_src2 += i_src_stride; \ + pui8_src3 += i_src_stride; \ + pui8_src4 += i_src_stride; \ + pui8_dst += i_dst_stride; \ + } \ + } \ +} \ + + +MC_FUNC_REF( 16x16_00, 16, 16, 0 ); +MC_FUNC_REF( 16x16_01, 16, 16, 1 ); +MC_FUNC_REF( 16x16_10, 16, 16, 2 ); +MC_FUNC_REF( 16x16_11, 16, 16, 3 ); + +MC_FUNC_REF( 16x8_00, 16, 8, 0 ); +MC_FUNC_REF( 16x8_01, 16, 8, 1 ); +MC_FUNC_REF( 16x8_10, 16, 8, 2 ); +MC_FUNC_REF( 16x8_11, 16, 8, 3 ); + +MC_FUNC_REF( 8x16_00, 8, 16, 0 ); +MC_FUNC_REF( 8x16_01, 8, 16, 1 ); +MC_FUNC_REF( 8x16_10, 8, 16, 2 ); +MC_FUNC_REF( 8x16_11, 8, 16, 3 ); + +MC_FUNC_REF( 8x8_00, 8, 8, 0 ); +MC_FUNC_REF( 8x8_01, 8, 8, 1 ); +MC_FUNC_REF( 8x8_10, 8, 8, 2 ); +MC_FUNC_REF( 8x8_11, 8, 8, 3 ); + +MC_FUNC_REF( 8x4_00, 8, 4, 0 ); +MC_FUNC_REF( 8x4_01, 8, 4, 1 ); +MC_FUNC_REF( 8x4_10, 8, 4, 2 ); +MC_FUNC_REF( 8x4_11, 8, 4, 3 ); + + + +void y262_init_motion_compensation( y262_t *ps_y262 ) +{ + /* copy */ + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ MC_BLOCK_00 ] = y262_motcomp_16x16_00_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ MC_BLOCK_01 ] = y262_motcomp_16x16_01_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ MC_BLOCK_10 ] = y262_motcomp_16x16_10_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ MC_BLOCK_11 ] = y262_motcomp_16x16_11_put; + + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ MC_BLOCK_00 ] = y262_motcomp_16x8_00_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ MC_BLOCK_01 ] = y262_motcomp_16x8_01_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ MC_BLOCK_10 ] = y262_motcomp_16x8_10_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ MC_BLOCK_11 ] = y262_motcomp_16x8_11_put; + + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x16 ][ MC_BLOCK_00 ] = y262_motcomp_8x16_00_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x16 ][ MC_BLOCK_01 ] = y262_motcomp_8x16_01_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x16 ][ MC_BLOCK_10 ] = y262_motcomp_8x16_10_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x16 ][ MC_BLOCK_11 ] = y262_motcomp_8x16_11_put; + + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x8 ][ MC_BLOCK_00 ] = y262_motcomp_8x8_00_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x8 ][ MC_BLOCK_01 ] = y262_motcomp_8x8_01_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x8 ][ MC_BLOCK_10 ] = y262_motcomp_8x8_10_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x8 ][ MC_BLOCK_11 ] = y262_motcomp_8x8_11_put; + + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x4 ][ MC_BLOCK_00 ] = y262_motcomp_8x4_00_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x4 ][ MC_BLOCK_01 ] = y262_motcomp_8x4_01_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x4 ][ MC_BLOCK_10 ] = y262_motcomp_8x4_10_put; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x4 ][ MC_BLOCK_11 ] = y262_motcomp_8x4_11_put; + + + /* avg */ + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x16 ][ MC_BLOCK_00 ] = y262_motcomp_16x16_00_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x16 ][ MC_BLOCK_01 ] = y262_motcomp_16x16_01_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x16 ][ MC_BLOCK_10 ] = y262_motcomp_16x16_10_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x16 ][ MC_BLOCK_11 ] = y262_motcomp_16x16_11_avg; + + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x8 ][ MC_BLOCK_00 ] = y262_motcomp_16x8_00_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x8 ][ MC_BLOCK_01 ] = y262_motcomp_16x8_01_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x8 ][ MC_BLOCK_10 ] = y262_motcomp_16x8_10_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x8 ][ MC_BLOCK_11 ] = y262_motcomp_16x8_11_avg; + + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x16 ][ MC_BLOCK_00 ] = y262_motcomp_8x16_00_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x16 ][ MC_BLOCK_01 ] = y262_motcomp_8x16_01_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x16 ][ MC_BLOCK_10 ] = y262_motcomp_8x16_10_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x16 ][ MC_BLOCK_11 ] = y262_motcomp_8x16_11_avg; + + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x8 ][ MC_BLOCK_00 ] = y262_motcomp_8x8_00_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x8 ][ MC_BLOCK_01 ] = y262_motcomp_8x8_01_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x8 ][ MC_BLOCK_10 ] = y262_motcomp_8x8_10_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x8 ][ MC_BLOCK_11 ] = y262_motcomp_8x8_11_avg; + + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x4 ][ MC_BLOCK_00 ] = y262_motcomp_8x4_00_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x4 ][ MC_BLOCK_01 ] = y262_motcomp_8x4_01_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x4 ][ MC_BLOCK_10 ] = y262_motcomp_8x4_10_avg; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x4 ][ MC_BLOCK_11 ] = y262_motcomp_8x4_11_avg; + +#if 1 + + if( 1 ) + { + /* copy */ + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ MC_BLOCK_00 ] = y262_motcomp_16x16_00_put_sse2; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ MC_BLOCK_01 ] = y262_motcomp_16x16_01_put_sse2; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ MC_BLOCK_10 ] = y262_motcomp_16x16_10_put_sse2; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ MC_BLOCK_11 ] = y262_motcomp_16x16_11_put_sse2; + + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ MC_BLOCK_00 ] = y262_motcomp_16x8_00_put_sse2; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ MC_BLOCK_01 ] = y262_motcomp_16x8_01_put_sse2; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ MC_BLOCK_10 ] = y262_motcomp_16x8_10_put_sse2; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ MC_BLOCK_11 ] = y262_motcomp_16x8_11_put_sse2; + + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x16 ][ MC_BLOCK_00 ] = y262_motcomp_8x16_00_put_mmxext; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x16 ][ MC_BLOCK_01 ] = y262_motcomp_8x16_01_put_mmxext; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x16 ][ MC_BLOCK_10 ] = y262_motcomp_8x16_10_put_mmxext; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x16 ][ MC_BLOCK_11 ] = y262_motcomp_8x16_11_put_mmxext; + + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x8 ][ MC_BLOCK_00 ] = y262_motcomp_8x8_00_put_mmxext; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x8 ][ MC_BLOCK_01 ] = y262_motcomp_8x8_01_put_mmxext; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x8 ][ MC_BLOCK_10 ] = y262_motcomp_8x8_10_put_mmxext; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x8 ][ MC_BLOCK_11 ] = y262_motcomp_8x8_11_put_mmxext; + + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x4 ][ MC_BLOCK_00 ] = y262_motcomp_8x4_00_put_mmxext; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x4 ][ MC_BLOCK_01 ] = y262_motcomp_8x4_01_put_mmxext; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x4 ][ MC_BLOCK_10 ] = y262_motcomp_8x4_10_put_mmxext; + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_8x4 ][ MC_BLOCK_11 ] = y262_motcomp_8x4_11_put_mmxext; + + + + /* avg */ + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x16 ][ MC_BLOCK_00 ] = y262_motcomp_16x16_00_avg_sse2; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x16 ][ MC_BLOCK_01 ] = y262_motcomp_16x16_01_avg_sse2; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x16 ][ MC_BLOCK_10 ] = y262_motcomp_16x16_10_avg_sse2; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x16 ][ MC_BLOCK_11 ] = y262_motcomp_16x16_11_avg_sse2; + + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x8 ][ MC_BLOCK_00 ] = y262_motcomp_16x8_00_avg_sse2; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x8 ][ MC_BLOCK_01 ] = y262_motcomp_16x8_01_avg_sse2; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x8 ][ MC_BLOCK_10 ] = y262_motcomp_16x8_10_avg_sse2; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x8 ][ MC_BLOCK_11 ] = y262_motcomp_16x8_11_avg_sse2; + + + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x16 ][ MC_BLOCK_00 ] = y262_motcomp_8x16_00_avg_mmxext; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x16 ][ MC_BLOCK_01 ] = y262_motcomp_8x16_01_avg_mmxext; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x16 ][ MC_BLOCK_10 ] = y262_motcomp_8x16_10_avg_mmxext; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x16 ][ MC_BLOCK_11 ] = y262_motcomp_8x16_11_avg_mmxext; + + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x8 ][ MC_BLOCK_00 ] = y262_motcomp_8x8_00_avg_mmxext; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x8 ][ MC_BLOCK_01 ] = y262_motcomp_8x8_01_avg_mmxext; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x8 ][ MC_BLOCK_10 ] = y262_motcomp_8x8_10_avg_mmxext; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x8 ][ MC_BLOCK_11 ] = y262_motcomp_8x8_11_avg_mmxext; + + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x4 ][ MC_BLOCK_00 ] = y262_motcomp_8x4_00_avg_mmxext; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x4 ][ MC_BLOCK_01 ] = y262_motcomp_8x4_01_avg_mmxext; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x4 ][ MC_BLOCK_10 ] = y262_motcomp_8x4_10_avg_mmxext; + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_8x4 ][ MC_BLOCK_11 ] = y262_motcomp_8x4_11_avg_mmxext; + } +#endif +} + + diff --git a/src/y262/me.c b/src/y262/me.c new file mode 100644 index 0000000..a1cf555 --- /dev/null +++ b/src/y262/me.c @@ -0,0 +1,279 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + + +#define VALID_MV( x, y ) ( ( x ) >= i_min_mv_x && ( x ) <= i_max_mv_x && ( y ) >= i_min_mv_y && ( y ) <= i_max_mv_y ) + +#define MV_COST( x, y ) ( ( ps_y262->rgi_y262_motion_bits_x[ 2048 + ( x ) - i_pred_mv_x ] * i_lambda ) + ( ( ps_y262->rgi_y262_motion_bits_y[ 2048 + ( y ) - i_pred_mv_y ] * i_lambda ) ) ) + +#define TRY_MV( x, y ) \ +do { \ + uint8_t *pui8_mvref; \ + pui8_mvref = pui8_ref + ( x ) + ( ( y ) * ps_ctx->i_ref_stride ); \ + i_sad = ps_y262->s_funcs.rgf_sad[ i_blk_type ]( pui8_mvref, i_ref_stride, pui8_blk, i_blk_stride ); \ + i_sad += MV_COST( ( x ) << 1, ( y ) << 1 ); \ + if( i_sad < i_best_sad ) \ + { \ + i_best_sad = i_sad; \ + i_best_mv_x = ( x ); \ + i_best_mv_y = ( y ); \ + } \ +} while( 0 ) + +bool_t y262_motion_search( y262_t *ps_y262, y262_me_context_t *ps_ctx ) +{ + int32_t i_blk_stride, i_ref_stride, i_blk_type, i_idx, i_best_mv_x, i_best_mv_y, i_best_sad, i_mv_x, i_mv_y, i_scale, i_sad, i_ter; + int32_t i_pred_mv_x, i_pred_mv_y; + int32_t i_lambda; + uint8_t *pui8_blk, *pui8_ref; + int32_t i_min_mv_x, i_min_mv_y, i_max_mv_x, i_max_mv_y; + int32_t i_omv_x, i_omv_y; + + static const int32_t rgi_exhaustive_pattern[ 8 ][ 2 ] = { + { -1, -1 }, { 0, -1 }, { 1, -1 }, + { -1, 0 }, { 1, 0 }, + { -1, 1 }, { 0, 1 }, { 1, 1 } + }; + + static const int32_t rgi_diamond[ 6 ][ 2 ] = { + { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } + }; + + pui8_blk = ps_ctx->pui8_blk; + i_blk_stride = ps_ctx->i_blk_stride; + i_blk_type = ps_ctx->i_blk_type; + i_ref_stride = ps_ctx->i_ref_stride; + i_pred_mv_x = ps_ctx->i_pred_mv_x; + i_pred_mv_y = ps_ctx->i_pred_mv_y; + i_lambda = ps_ctx->i_lambda; + + i_min_mv_x = MAX( ps_ctx->i_min_mv_x, -ps_ctx->i_x_offset ); + i_max_mv_x = MIN( ps_ctx->i_max_mv_x, ps_ctx->i_ref_width - rgi_y262_block_type_dims[ ps_ctx->i_blk_type ][ 0 ] - ps_ctx->i_x_offset ); + i_min_mv_y = MAX( ps_ctx->i_min_mv_y, -ps_ctx->i_y_offset ); + i_max_mv_y = MIN( ps_ctx->i_max_mv_y, ps_ctx->i_ref_height - rgi_y262_block_type_dims[ ps_ctx->i_blk_type ][ 1 ] - ps_ctx->i_y_offset ); + + pui8_ref = ps_ctx->pui8_ref + ps_ctx->i_x_offset + ( ps_ctx->i_y_offset * ps_ctx->i_ref_stride ); + + i_best_sad = MAX_COST; + i_best_mv_x = 0; + i_best_mv_y = 0; + TRY_MV( 0, 0 ); + + i_mv_x = ( i_pred_mv_x + 1 ) >> 1; + i_mv_y = ( i_pred_mv_y + 1 ) >> 1; + + if( VALID_MV( i_mv_x, i_mv_y ) ) + { + TRY_MV( i_mv_x, i_mv_y ); + } + + for( i_idx = 0; i_idx < ps_ctx->i_num_candidates_fp; i_idx++ ) + { + if( VALID_MV( ps_ctx->rgi_candidates_fp[ i_idx ][ 0 ] >> 1, ps_ctx->rgi_candidates_fp[ i_idx ][ 1 ] >> 1 ) ) + { + TRY_MV( ps_ctx->rgi_candidates_fp[ i_idx ][ 0 ] >> 1, ps_ctx->rgi_candidates_fp[ i_idx ][ 1 ] >> 1 ); + } + } + + if( ps_ctx->i_me_call == MECALL_LOOKAHEAD || ps_y262->i_quality_for_speed > -20 ) + { + do + { + i_omv_x = i_best_mv_x; + i_omv_y = i_best_mv_y; + i_ter = 0; + for( i_scale = 0; i_scale < 12; i_scale++ ) + { + for( i_idx = 0; i_idx < 8; i_idx++ ) + { + i_mv_x = i_omv_x + ( rgi_exhaustive_pattern[ i_idx ][ 0 ] << i_scale ); + i_mv_y = i_omv_y + ( rgi_exhaustive_pattern[ i_idx ][ 1 ] << i_scale ); + if( VALID_MV( i_mv_x, i_mv_y ) ) + { + TRY_MV( i_mv_x, i_mv_y ); + } + } + + i_ter++; + if( i_omv_x != i_best_mv_x || i_omv_y != i_best_mv_y ) + { + i_ter = 0; + } + else if( i_ter > 4 ) + { + break; + } + } + } while( ( i_omv_x != i_best_mv_x || i_omv_y != i_best_mv_y ) ); + } + else + { + do + { + i_omv_x = i_best_mv_x; + i_omv_y = i_best_mv_y; + i_ter = 0; + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + i_mv_x = i_omv_x + ( rgi_diamond[ i_idx ][ 0 ] ); + i_mv_y = i_omv_y + ( rgi_diamond[ i_idx ][ 1 ] ); + if( VALID_MV( i_mv_x, i_mv_y ) ) + { + TRY_MV( i_mv_x, i_mv_y ); + } + } + } while( ( i_omv_x != i_best_mv_x || i_omv_y != i_best_mv_y ) ); + } + + i_best_mv_x <<= 1; + i_best_mv_y <<= 1; + ps_ctx->i_best_mv_x = i_best_mv_x; + ps_ctx->i_best_mv_y = i_best_mv_y; + ps_ctx->i_best_mv_sad = i_best_sad; + + /* hpel */ + y262_hpel_motion_search( ps_y262, ps_ctx ); + + return TRUE; +} + + +#define TRY_HPEL_MV( x, y ) \ +do { \ + uint8_t *pui8_mvref; \ + int32_t i_hpelidx; \ + pui8_mvref = pui8_ref + ( ( x ) >> 1 ) + ( ( ( y ) >> 1 ) * i_ref_stride ); \ + i_hpelidx = ( ( x ) & 1 ) | ( ( ( y ) & 1 ) << 1 ); \ + if( i_hpelidx ) \ + { \ + ps_y262->s_funcs.rgf_motcomp_copy[ i_mc_blk_type ][ i_hpelidx ]( pui8_mvref, i_ref_stride, rgui8_pred, 16 ); \ + i_sad = ps_y262->s_funcs.rgf_satd[ i_blk_type ]( pui8_blk, i_blk_stride, rgui8_pred, 16 ); \ + } \ + else \ + { \ + i_sad = ps_y262->s_funcs.rgf_satd[ i_blk_type ]( pui8_blk, i_blk_stride, pui8_mvref, i_ref_stride ); \ + } \ + i_sad += MV_COST( ( x ), ( y ) ); \ + if( i_sad < i_best_sad ) \ + { \ + i_best_sad = i_sad; \ + i_best_mv_x = ( x ); \ + i_best_mv_y = ( y ); \ + } \ +} while( 0 ) + + +bool_t y262_hpel_motion_search( y262_t *ps_y262, y262_me_context_t *ps_ctx ) +{ + int32_t i_blk_stride, i_ref_stride, i_blk_type, i_idx, i_best_mv_x, i_best_mv_y, i_best_sad, i_mv_x, i_mv_y, i_sad; + int32_t i_pred_mv_x, i_pred_mv_y; + int32_t i_lambda; + uint8_t *pui8_blk, *pui8_ref; + int32_t i_min_mv_x, i_min_mv_y, i_max_mv_x, i_max_mv_y; + ALIGNED( 16 ) uint8_t rgui8_pred[ 16 * 16 ]; + uint8_t *pui8_pred; + int32_t i_mc_blk_type; + + static const int32_t rgi_exhaustive_pattern[ 8 ][ 2 ] = { + { -1, -1 }, { 0, -1 }, { 1, -1 }, + { -1, 0 }, { 1, 0 }, + { -1, 1 }, { 0, 1 }, { 1, 1 } + }; + + pui8_blk = ps_ctx->pui8_blk; + i_blk_stride = ps_ctx->i_blk_stride; + i_blk_type = ps_ctx->i_blk_type; + i_ref_stride = ps_ctx->i_ref_stride; + i_pred_mv_x = ps_ctx->i_pred_mv_x; + i_pred_mv_y = ps_ctx->i_pred_mv_y; + i_lambda = ps_ctx->i_lambda; + + i_min_mv_x = MAX( ps_ctx->i_min_mv_x, -ps_ctx->i_x_offset ); + i_max_mv_x = MIN( ps_ctx->i_max_mv_x, ps_ctx->i_ref_width - rgi_y262_block_type_dims[ ps_ctx->i_blk_type ][ 0 ] - ps_ctx->i_x_offset ); + i_min_mv_y = MAX( ps_ctx->i_min_mv_y, -ps_ctx->i_y_offset ); + i_max_mv_y = MIN( ps_ctx->i_max_mv_y, ps_ctx->i_ref_height - rgi_y262_block_type_dims[ ps_ctx->i_blk_type ][ 1 ] - ps_ctx->i_y_offset ); + + pui8_ref = ps_ctx->pui8_ref + ps_ctx->i_x_offset + ( ps_ctx->i_y_offset * ps_ctx->i_ref_stride ); + + /* hpel */ + i_min_mv_x <<= 1; + i_min_mv_y <<= 1; + i_max_mv_x <<= 1; + i_max_mv_y <<= 1; + + if( i_blk_type == BLOCK_TYPE_16x16 ) + { + i_mc_blk_type = MC_BLOCK_16x16; + } + else if( i_blk_type == BLOCK_TYPE_16x8 ) + { + i_mc_blk_type = MC_BLOCK_16x8; + } + else + { + assert( FALSE ); + } + + pui8_pred = pui8_ref + ( ps_ctx->i_best_mv_x >> 1 ) + ( ( ps_ctx->i_best_mv_y >> 1 ) * i_ref_stride ); + i_best_sad = MAX_COST; + i_best_mv_x = 0; + i_best_mv_y = 0; + + if( VALID_MV( ps_ctx->i_best_mv_x, ps_ctx->i_best_mv_y ) ) + { + TRY_HPEL_MV( ps_ctx->i_best_mv_x, ps_ctx->i_best_mv_y ); + } + + for( i_idx = 0; i_idx < 8; i_idx++ ) + { + i_mv_x = ps_ctx->i_best_mv_x + rgi_exhaustive_pattern[ i_idx ][ 0 ]; + i_mv_y = ps_ctx->i_best_mv_y + rgi_exhaustive_pattern[ i_idx ][ 1 ]; + + if( VALID_MV( i_mv_x, i_mv_y ) ) + { + TRY_HPEL_MV( i_mv_x, i_mv_y ); + } + } + ps_ctx->i_best_mv_x = i_best_mv_x; + ps_ctx->i_best_mv_y = i_best_mv_y; + ps_ctx->i_best_mv_sad = i_best_sad; + + return TRUE; +} + + + + + + + + diff --git a/src/y262/me.h b/src/y262/me.h new file mode 100644 index 0000000..e59311d --- /dev/null +++ b/src/y262/me.h @@ -0,0 +1,32 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +bool_t y262_motion_search( y262_t *ps_y262, y262_me_context_t *ps_ctx ); +bool_t y262_hpel_motion_search( y262_t *ps_y262, y262_me_context_t *ps_ctx ); diff --git a/src/y262/pixelop.c b/src/y262/pixelop.c new file mode 100644 index 0000000..83b547c --- /dev/null +++ b/src/y262/pixelop.c @@ -0,0 +1,376 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + +int32_t y262_sad_16x16( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +{ + int32_t i_sad, i_y; + + i_sad = 0; + for( i_y = 0; i_y < 16; i_y++ ) + { + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 0 ] - pui8_blk2[ i_y * i_stride2 + 0 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 1 ] - pui8_blk2[ i_y * i_stride2 + 1 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 2 ] - pui8_blk2[ i_y * i_stride2 + 2 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 3 ] - pui8_blk2[ i_y * i_stride2 + 3 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 4 ] - pui8_blk2[ i_y * i_stride2 + 4 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 5 ] - pui8_blk2[ i_y * i_stride2 + 5 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 6 ] - pui8_blk2[ i_y * i_stride2 + 6 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 7 ] - pui8_blk2[ i_y * i_stride2 + 7 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 8 ] - pui8_blk2[ i_y * i_stride2 + 8 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 9 ] - pui8_blk2[ i_y * i_stride2 + 9 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 10 ] - pui8_blk2[ i_y * i_stride2 + 10 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 11 ] - pui8_blk2[ i_y * i_stride2 + 11 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 12 ] - pui8_blk2[ i_y * i_stride2 + 12 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 13 ] - pui8_blk2[ i_y * i_stride2 + 13 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 14 ] - pui8_blk2[ i_y * i_stride2 + 14 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 15 ] - pui8_blk2[ i_y * i_stride2 + 15 ] ); + } + return i_sad; +} + +int32_t y262_sad_16x8( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +{ + int32_t i_sad, i_y; + + i_sad = 0; + for( i_y = 0; i_y < 8; i_y++ ) + { + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 0 ] - pui8_blk2[ i_y * i_stride2 + 0 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 1 ] - pui8_blk2[ i_y * i_stride2 + 1 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 2 ] - pui8_blk2[ i_y * i_stride2 + 2 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 3 ] - pui8_blk2[ i_y * i_stride2 + 3 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 4 ] - pui8_blk2[ i_y * i_stride2 + 4 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 5 ] - pui8_blk2[ i_y * i_stride2 + 5 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 6 ] - pui8_blk2[ i_y * i_stride2 + 6 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 7 ] - pui8_blk2[ i_y * i_stride2 + 7 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 8 ] - pui8_blk2[ i_y * i_stride2 + 8 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 9 ] - pui8_blk2[ i_y * i_stride2 + 9 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 10 ] - pui8_blk2[ i_y * i_stride2 + 10 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 11 ] - pui8_blk2[ i_y * i_stride2 + 11 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 12 ] - pui8_blk2[ i_y * i_stride2 + 12 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 13 ] - pui8_blk2[ i_y * i_stride2 + 13 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 14 ] - pui8_blk2[ i_y * i_stride2 + 14 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 15 ] - pui8_blk2[ i_y * i_stride2 + 15 ] ); + } + return i_sad; +} + +int32_t y262_sad_8x8( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +{ + int32_t i_sad, i_y; + + i_sad = 0; + for( i_y = 0; i_y < 8; i_y++ ) + { + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 0 ] - pui8_blk2[ i_y * i_stride2 + 0 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 1 ] - pui8_blk2[ i_y * i_stride2 + 1 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 2 ] - pui8_blk2[ i_y * i_stride2 + 2 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 3 ] - pui8_blk2[ i_y * i_stride2 + 3 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 4 ] - pui8_blk2[ i_y * i_stride2 + 4 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 5 ] - pui8_blk2[ i_y * i_stride2 + 5 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 6 ] - pui8_blk2[ i_y * i_stride2 + 6 ] ); + i_sad += abs( pui8_blk1[ i_y * i_stride1 + 7 ] - pui8_blk2[ i_y * i_stride2 + 7 ] ); + } + return i_sad; +} + + +int32_t y262_satd_8x8( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +{ + int32_t i_idx, m[ 8 ][ 8 ], d[ 8 ][ 8 ], rgi_diff[ 8 ][ 8 ]; + int32_t i_satd; + + for( i_idx = 0; i_idx < 8; i_idx++ ) + { + rgi_diff[ i_idx ][ 0 ] = ( *pui8_blk1++ ) - ( *pui8_blk2++ ); + rgi_diff[ i_idx ][ 1 ] = ( *pui8_blk1++ ) - ( *pui8_blk2++ ); + rgi_diff[ i_idx ][ 2 ] = ( *pui8_blk1++ ) - ( *pui8_blk2++ ); + rgi_diff[ i_idx ][ 3 ] = ( *pui8_blk1++ ) - ( *pui8_blk2++ ); + rgi_diff[ i_idx ][ 4 ] = ( *pui8_blk1++ ) - ( *pui8_blk2++ ); + rgi_diff[ i_idx ][ 5 ] = ( *pui8_blk1++ ) - ( *pui8_blk2++ ); + rgi_diff[ i_idx ][ 6 ] = ( *pui8_blk1++ ) - ( *pui8_blk2++ ); + rgi_diff[ i_idx ][ 7 ] = ( *pui8_blk1++ ) - ( *pui8_blk2++ ); + + pui8_blk1 += ( i_stride1 - 8 ); + pui8_blk2 += ( i_stride2 - 8 ); + } + + for( i_idx = 0; i_idx < 8; i_idx++ ) + { + m[ i_idx ][ 0 ] = rgi_diff[ i_idx ][ 0 ] + rgi_diff[ i_idx ][ 4 ]; + m[ i_idx ][ 1 ] = rgi_diff[ i_idx ][ 1 ] + rgi_diff[ i_idx ][ 5 ]; + m[ i_idx ][ 2 ] = rgi_diff[ i_idx ][ 2 ] + rgi_diff[ i_idx ][ 6 ]; + m[ i_idx ][ 3 ] = rgi_diff[ i_idx ][ 3 ] + rgi_diff[ i_idx ][ 7 ]; + m[ i_idx ][ 4 ] = rgi_diff[ i_idx ][ 0 ] - rgi_diff[ i_idx ][ 4 ]; + m[ i_idx ][ 5 ] = rgi_diff[ i_idx ][ 1 ] - rgi_diff[ i_idx ][ 5 ]; + m[ i_idx ][ 6 ] = rgi_diff[ i_idx ][ 2 ] - rgi_diff[ i_idx ][ 6 ]; + m[ i_idx ][ 7 ] = rgi_diff[ i_idx ][ 3 ] - rgi_diff[ i_idx ][ 7 ]; + + d[ i_idx ][ 0 ] = m[ i_idx ][ 0 ] + m[ i_idx ][ 2 ]; + d[ i_idx ][ 1 ] = m[ i_idx ][ 1 ] + m[ i_idx ][ 3 ]; + d[ i_idx ][ 2 ] = m[ i_idx ][ 0 ] - m[ i_idx ][ 2 ]; + d[ i_idx ][ 3 ] = m[ i_idx ][ 1 ] - m[ i_idx ][ 3 ]; + d[ i_idx ][ 4 ] = m[ i_idx ][ 4 ] + m[ i_idx ][ 6 ]; + d[ i_idx ][ 5 ] = m[ i_idx ][ 5 ] + m[ i_idx ][ 7 ]; + d[ i_idx ][ 6 ] = m[ i_idx ][ 4 ] - m[ i_idx ][ 6 ]; + d[ i_idx ][ 7 ] = m[ i_idx ][ 5 ] - m[ i_idx ][ 7 ]; + + m[ i_idx ][ 0 ] = d[ i_idx ][ 0 ] + d[ i_idx ][ 1 ]; + m[ i_idx ][ 1 ] = d[ i_idx ][ 0 ] - d[ i_idx ][ 1 ]; + m[ i_idx ][ 2 ] = d[ i_idx ][ 2 ] + d[ i_idx ][ 3 ]; + m[ i_idx ][ 3 ] = d[ i_idx ][ 2 ] - d[ i_idx ][ 3 ]; + m[ i_idx ][ 4 ] = d[ i_idx ][ 4 ] + d[ i_idx ][ 5 ]; + m[ i_idx ][ 5 ] = d[ i_idx ][ 4 ] - d[ i_idx ][ 5 ]; + m[ i_idx ][ 6 ] = d[ i_idx ][ 6 ] + d[ i_idx ][ 7 ]; + m[ i_idx ][ 7 ] = d[ i_idx ][ 6 ] - d[ i_idx ][ 7 ]; + } + + for( i_idx = 0; i_idx < 8; i_idx++ ) + { + d[ 0 ][ i_idx ] = m[ 0 ][ i_idx ] + m[ 4 ][ i_idx ]; + d[ 1 ][ i_idx ] = m[ 1 ][ i_idx ] + m[ 5 ][ i_idx ]; + d[ 2 ][ i_idx ] = m[ 2 ][ i_idx ] + m[ 6 ][ i_idx ]; + d[ 3 ][ i_idx ] = m[ 3 ][ i_idx ] + m[ 7 ][ i_idx ]; + d[ 4 ][ i_idx ] = m[ 0 ][ i_idx ] - m[ 4 ][ i_idx ]; + d[ 5 ][ i_idx ] = m[ 1 ][ i_idx ] - m[ 5 ][ i_idx ]; + d[ 6 ][ i_idx ] = m[ 2 ][ i_idx ] - m[ 6 ][ i_idx ]; + d[ 7 ][ i_idx ] = m[ 3 ][ i_idx ] - m[ 7 ][ i_idx ]; + + m[ 0 ][ i_idx ] = d[ 0 ][ i_idx ] + d[ 2 ][ i_idx ]; + m[ 1 ][ i_idx ] = d[ 1 ][ i_idx ] + d[ 3 ][ i_idx ]; + m[ 2 ][ i_idx ] = d[ 0 ][ i_idx ] - d[ 2 ][ i_idx ]; + m[ 3 ][ i_idx ] = d[ 1 ][ i_idx ] - d[ 3 ][ i_idx ]; + m[ 4 ][ i_idx ] = d[ 4 ][ i_idx ] + d[ 6 ][ i_idx ]; + m[ 5 ][ i_idx ] = d[ 5 ][ i_idx ] + d[ 7 ][ i_idx ]; + m[ 6 ][ i_idx ] = d[ 4 ][ i_idx ] - d[ 6 ][ i_idx ]; + m[ 7 ][ i_idx ] = d[ 5 ][ i_idx ] - d[ 7 ][ i_idx ]; + + d[ 0 ][ i_idx ] = m[ 0 ][ i_idx ] + m[ 1 ][ i_idx ]; + d[ 1 ][ i_idx ] = m[ 0 ][ i_idx ] - m[ 1 ][ i_idx ]; + d[ 2 ][ i_idx ] = m[ 2 ][ i_idx ] + m[ 3 ][ i_idx ]; + d[ 3 ][ i_idx ] = m[ 2 ][ i_idx ] - m[ 3 ][ i_idx ]; + d[ 4 ][ i_idx ] = m[ 4 ][ i_idx ] + m[ 5 ][ i_idx ]; + d[ 5 ][ i_idx ] = m[ 4 ][ i_idx ] - m[ 5 ][ i_idx ]; + d[ 6 ][ i_idx ] = m[ 6 ][ i_idx ] + m[ 7 ][ i_idx ]; + d[ 7 ][ i_idx ] = m[ 6 ][ i_idx ] - m[ 7 ][ i_idx ]; + } + + i_satd = 0; + for( i_idx = 0; i_idx < 8; i_idx++ ) + { + i_satd += abs( d[ i_idx ][ 0 ] ); + i_satd += abs( d[ i_idx ][ 1 ] ); + i_satd += abs( d[ i_idx ][ 2 ] ); + i_satd += abs( d[ i_idx ][ 3 ] ); + i_satd += abs( d[ i_idx ][ 4 ] ); + i_satd += abs( d[ i_idx ][ 5 ] ); + i_satd += abs( d[ i_idx ][ 6 ] ); + i_satd += abs( d[ i_idx ][ 7 ] ); + } + + i_satd = ( ( i_satd + 2 ) >> 2 ); + + return i_satd; +} + + +int32_t y262_satd_16x16( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +{ + int32_t i_satd; + + i_satd = y262_satd_8x8( pui8_blk1, i_stride1, pui8_blk2, i_stride2 ); + i_satd += y262_satd_8x8( pui8_blk1 + 8, i_stride1, pui8_blk2 + 8, i_stride2 ); + i_satd += y262_satd_8x8( pui8_blk1 + ( 8 * i_stride1 ), i_stride1, pui8_blk2 + ( 8 * i_stride2 ), i_stride2 ); + i_satd += y262_satd_8x8( pui8_blk1 + 8 + ( 8 * i_stride1 ), i_stride1, pui8_blk2 + 8 + ( 8 * i_stride2 ), i_stride2 ); + + return i_satd; +} + + +int32_t y262_satd_16x8( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +{ + int32_t i_satd; + + i_satd = y262_satd_8x8( pui8_blk1, i_stride1, pui8_blk2, i_stride2 ); + i_satd += y262_satd_8x8( pui8_blk1 + 8, i_stride1, pui8_blk2 + 8, i_stride2 ); + + return i_satd; +} + +int32_t y262_satd_16x16_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +{ + int32_t i_satd; + + i_satd = y262_satd_8x8_sse2( pui8_blk1, i_stride1, pui8_blk2, i_stride2 ); + i_satd += y262_satd_8x8_sse2( pui8_blk1 + 8, i_stride1, pui8_blk2 + 8, i_stride2 ); + i_satd += y262_satd_8x8_sse2( pui8_blk1 + ( 8 * i_stride1 ), i_stride1, pui8_blk2 + ( 8 * i_stride2 ), i_stride2 ); + i_satd += y262_satd_8x8_sse2( pui8_blk1 + 8 + ( 8 * i_stride1 ), i_stride1, pui8_blk2 + 8 + ( 8 * i_stride2 ), i_stride2 ); + + return i_satd; +} + + +int32_t y262_satd_16x8_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +{ + int32_t i_satd; + + i_satd = y262_satd_8x8_sse2( pui8_blk1, i_stride1, pui8_blk2, i_stride2 ); + i_satd += y262_satd_8x8_sse2( pui8_blk1 + 8, i_stride1, pui8_blk2 + 8, i_stride2 ); + + return i_satd; +} + + +int32_t y262_ssd_16x16( uint8_t *pui8_blk1, int32_t i_blk1_stride, uint8_t *pui8_blk2, int32_t i_blk2_stride ) +{ + int32_t i_y, i_x, i_ssd; + + i_ssd = 0; + for( i_y = 0; i_y < 16; i_y ++ ) + { + for( i_x = 0; i_x < 16; i_x++ ) + { + int32_t i_diff; + i_diff = pui8_blk1[ i_x + i_y * i_blk1_stride ] - pui8_blk2[ i_x + i_y * i_blk2_stride ]; + i_ssd += i_diff * i_diff; + } + } + return i_ssd; +} + +int32_t y262_ssd_8x8( uint8_t *pui8_blk1, int32_t i_blk1_stride, uint8_t *pui8_blk2, int32_t i_blk2_stride ) +{ + int32_t i_y, i_x, i_ssd; + + i_ssd = 0; + for( i_y = 0; i_y < 8; i_y ++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + int32_t i_diff; + i_diff = pui8_blk1[ i_x + i_y * i_blk1_stride ] - pui8_blk2[ i_x + i_y * i_blk2_stride ]; + i_ssd += i_diff * i_diff; + } + } + return i_ssd; +} + + +int32_t y262_variance_16x16( uint8_t *pui8_blk, int32_t i_blk_stride ) +{ + int32_t i_y, i_x, i_sum, i_sqr; + + i_sum = i_sqr = 0; + for( i_y = 0; i_y < 16; i_y++ ) + { + for( i_x = 0; i_x < 16; i_x++ ) + { + i_sum += pui8_blk[ i_x ]; + i_sqr += pui8_blk[ i_x ] * pui8_blk[ i_x ]; + } + pui8_blk += i_blk_stride; + } + return ( i_sqr - ( ( ( ( int64_t )i_sum ) * i_sum ) >> 8 ) ); +} + +int32_t y262_variance_8x8( uint8_t *pui8_blk, int32_t i_blk_stride ) +{ + int32_t i_y, i_x, i_sum, i_sqr; + + i_sum = i_sqr = 0; + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + i_sum += pui8_blk[ i_x ]; + i_sqr += pui8_blk[ i_x ] * pui8_blk[ i_x ]; + } + pui8_blk += i_blk_stride; + } + return ( i_sqr - ( ( ( ( int64_t )i_sum ) * i_sum ) >> 6 ) ); +} + + +void y262_sub_8x8( int16_t *pi16_diff, uint8_t *pui8_src1, int32_t i_stride_src1, uint8_t *pui8_src2, int32_t i_stride_src2 ) +{ + int32_t i_x, i_y; + + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + pi16_diff[ i_x + i_y * 8 ] = pui8_src1[ i_x + i_y * i_stride_src1 ] - pui8_src2[ i_x + i_y * i_stride_src2 ]; + } + } +} + +void y262_add_8x8( uint8_t *pui8_destination, int32_t i_destination_stride, uint8_t *pui8_base, int32_t i_base_stride, int16_t *pi16_difference ) +{ + int32_t i_x, i_y; + + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + pui8_destination[ i_x + i_y * i_destination_stride ] = MIN( 255, MAX( 0, pui8_base[ i_x + i_y * i_base_stride ] + pi16_difference[ i_x + i_y * 8 ] ) ); + } + } +} + +bool_t y262_16x16_frame_field_pel_decision( uint8_t *pui8_src, int32_t i_src_stride ) +{ + int32_t i_y, i_x, i_frame, i_field; + + i_frame = 0; + for( i_y = 0; i_y < 16 - 1; i_y++ ) + { + for( i_x = 0; i_x < 16; i_x++ ) + { + i_frame += abs( pui8_src[ i_y * i_src_stride + i_x ] - pui8_src[ ( i_y + 1 ) * i_src_stride + i_x ] ); + } + } + + i_field = 0; + for( i_y = 0; i_y < 16 - 2; i_y++ ) + { + for( i_x = 0; i_x < 16; i_x++ ) + { + i_field += abs( pui8_src[ i_y * i_src_stride + i_x ] - pui8_src[ ( i_y + 2 ) * i_src_stride + i_x ] ); + } + } + return i_field < i_frame; +} + + + + + diff --git a/src/y262/pixelop.h b/src/y262/pixelop.h new file mode 100644 index 0000000..38e3618 --- /dev/null +++ b/src/y262/pixelop.h @@ -0,0 +1,50 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +int32_t y262_sad_16x16( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); +int32_t y262_sad_16x8( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); +int32_t y262_sad_8x8( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); + +int32_t y262_satd_16x16( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); +int32_t y262_satd_16x8( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); + +int32_t y262_satd_16x16_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); +int32_t y262_satd_16x8_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); + +int32_t y262_ssd_16x16( uint8_t *pui8_blk1, int32_t i_blk1_stride, uint8_t *pui8_blk2, int32_t i_blk2_stride ); +int32_t y262_ssd_8x8( uint8_t *pui8_blk1, int32_t i_blk1_stride, uint8_t *pui8_blk2, int32_t i_blk2_stride ); + +int32_t y262_variance_16x16( uint8_t *pui8_blk, int32_t i_blk_stride ); +int32_t y262_variance_8x8( uint8_t *pui8_blk, int32_t i_blk_stride ); + +void y262_sub_8x8( int16_t *pi16_diff, uint8_t *pui8_src1, int32_t i_stride_src1, uint8_t *pui8_src2, int32_t i_stride_src2 ); +void y262_add_8x8( uint8_t *pui8_destination, int32_t i_destination_stride, uint8_t *pui8_base, int32_t i_base_stride, int16_t *pi_difference ); + +bool_t y262_16x16_frame_field_pel_decision( uint8_t *pui8_src, int32_t i_src_stride ); diff --git a/src/y262/pixelop_x86.asm b/src/y262/pixelop_x86.asm new file mode 100644 index 0000000..17fe2b0 --- /dev/null +++ b/src/y262/pixelop_x86.asm @@ -0,0 +1,1355 @@ +%if 0 +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +%endif + + +%include "x86inc.asm" + +SECTION_RODATA + +ALIGN 16 +M128_FILTER_HOR_MASK_INNER : dw 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000 +ALIGN 16 +M128_FILTER_HOR_MASK_OUTER : dw 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff +ALIGN 16 +M128_FILTER_TWO : dw 2, 2, 2, 2, 2, 2, 2, 2 +ALIGN 16 +M128_FILTER_EIGHT : dw 8, 8, 8, 8, 8, 8, 8, 8 + +ALIGN 16 +M128_ONE : dw 1, 1, 1, 1, 1, 1, 1, 1 + +ALIGN 16 +M128_ONE_BYTE : db 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + +SECTION .text + +INIT_XMM + + + + + + +%macro HADAMARD4x2 5 + paddw %1, %2 + paddw %3, %4 + paddw %2, %2 + paddw %4, %4 + psubw %2, %1 + psubw %4, %3 + paddw %1, %3 + paddw %2, %4 + paddw %3, %3 + paddw %4, %4 + psubw %3, %1 + psubw %4, %2 + mova %5, %1 + punpcklwd %1, %2 + punpckhwd %5, %2 + mova %2, %3 + punpcklwd %3, %4 + punpckhwd %2, %4 + mova %4, %1 + punpckldq %1, %3 + punpckhdq %4, %3 + mova %3, %5 + punpckhdq %5, %2 + punpckldq %3, %2 + mova %2, %1 + punpcklqdq %1, %3 + punpckhqdq %2, %3 + mova %3, %4 + punpckhqdq %3, %5 + punpcklqdq %4, %5 + paddw %1, %2 + paddw %3, %4 + paddw %2, %2 + paddw %4, %4 + psubw %2, %1 + psubw %4, %3 + paddw %1, %3 + paddw %2, %4 + paddw %3, %3 + paddw %4, %4 + psubw %3, %1 + psubw %4, %2 +%endmacro + +%macro ABSSUM 7 + mova %6, %1 + pxor %5, %5 + psubw %5, %6 + pmaxsw %6, %5 + paddusw %7, %6 + mova %6, %2 + pxor %5, %5 + psubw %5, %6 + pmaxsw %6, %5 + paddusw %7, %6 + mova %6, %3 + pxor %5, %5 + psubw %5, %6 + pmaxsw %6, %5 + paddusw %7, %6 + mova %6, %4 + pxor %5, %5 + psubw %5, %6 + pmaxsw %6, %5 + paddusw %7, %6 +%endmacro + +%macro ABSSUMX2 5 + mova %4, %1 + pxor %3, %3 + psubw %3, %4 + pmaxsw %4, %3 + paddusw %5, %4 + mova %4, %2 + pxor %3, %3 + psubw %3, %4 + pmaxsw %4, %3 + paddusw %5, %4 +%endmacro + +%macro ADDSUB 2 + paddw %1, %2 + paddw %2, %2 + psubw %2, %1 +%endmacro + +%macro LOADDIFF8x4 10 ; 0-3, tmp, zero, p1, stride1, p2, stride2 + movq %1, [ %7 ] + movq %5, [ %9 ] + punpcklbw %1, %6 + punpcklbw %5, %6 + psubsw %1, %5 + movq %2, [ %7 + %8 ] + movq %5, [ %9 + %10 ] + lea %7, [ %7 + 2 * %8 ] + lea %9, [ %9 + 2 * %10 ] + punpcklbw %2, %6 + punpcklbw %5, %6 + psubsw %2, %5 + movq %3, [ %7 ] + movq %5, [ %9 ] + punpcklbw %3, %6 + punpcklbw %5, %6 + psubsw %3, %5 + movq %4, [ %7 + %8 ] + movq %5, [ %9 + %10 ] + lea %7, [ r0 + 2 * r1 ] + lea %9, [ r2 + 2 * r3 ] + punpcklbw %4, %6 + punpcklbw %5, %6 + psubsw %4, %5 +%endmacro + +; int32_t __cdecl y262_satd_8x8_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) + +INIT_XMM +cglobal y262_satd_8x8_sse2, 4, 5, 8 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + sub rsp, 0x40 + pxor m7, m7 + + LOADDIFF8x4 m0, m1, m2, m3, m4, m7, r0, r1, r2, r3 + HADAMARD4x2 m0, m1, m2, m3, m4 + + movu [ rsp ], m0 + movu [ rsp + 16 ], m1 + movu [ rsp + 32 ], m2 + movu [ rsp + 48 ], m3 + + LOADDIFF8x4 m0, m1, m2, m3, m4, m7, r0, r1, r2, r3 + HADAMARD4x2 m0, m1, m2, m3, m4 + + movu m4, [ rsp ] + movu m5, [ rsp + 16 ] + movu m6, [ rsp + 32 ] + movu m7, [ rsp + 48 ] + ADDSUB m4, m0 + ADDSUB m5, m1 + ADDSUB m6, m2 + ADDSUB m7, m3 + ;movu [ rsp ], m4 + movu [ rsp + 16 ], m5 + movu [ rsp + 32 ], m6 + movu [ rsp + 48 ], m7 + + ;movu m4, [ rsp ] + mova m5, m4 + punpcklqdq m4, m0 + punpckhqdq m5, m0 + ADDSUB m4, m5 + pxor m0, m0 ; sum = 0 + ABSSUMX2 m4, m5, m6, m7, m0 + + movu m4, [ rsp + 16 ] + mova m5, m4 + punpcklqdq m4, m1 + punpckhqdq m5, m1 + ADDSUB m4, m5 + ABSSUMX2 m4, m5, m6, m7, m0 + + movu m4, [ rsp + 32 ] + mova m5, m4 + punpcklqdq m4, m2 + punpckhqdq m5, m2 + ADDSUB m4, m5 + ABSSUMX2 m4, m5, m6, m7, m0 + + movu m4, [ rsp + 48 ] + mova m5, m4 + punpcklqdq m4, m3 + punpckhqdq m5, m3 + ADDSUB m4, m5 + ABSSUMX2 m4, m5, m6, m7, m0 + + pmaddwd m0, [ M128_ONE ] + movhlps m6, m0 + paddd m0, m6 + pshuflw m6, m0, 0xE + paddd m0, m6 + + movd r4d, m0 + add r4d, 2 + shr r4d, 2 + mov eax, r4d + + add rsp, 0x40 + + RET + + +;----------------------------------------------------------------------------- +; int32_t __cdecl y262_sad_16x16_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +;----------------------------------------------------------------------------- + +cglobal y262_sad_16x16_sse2, 4, 4, 8 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + movdqu m6, [r0] + movdqu m7, [r0+r1] + lea r0, [r0+2*r1] + movdqu m5, [r0] + movdqu m4, [r0+r1] + lea r0, [r0+2*r1] + psadbw m6, [r2] + psadbw m7, [r2+r3] + lea r2, [r2+2*r3] + movdqu m0, [r0] + paddw m6, m7 + psadbw m5, [r2] + psadbw m4, [r2+r3] + lea r2, [r2+2*r3] + movdqu m2, [r0+r1] + lea r0, [r0+2*r1] + paddw m5, m4 + movdqu m3, [r0] + movdqu m1, [r0+r1] + lea r0, [r0+2*r1] + paddw m6, m5 + psadbw m0, [r2] + psadbw m2, [r2+r3] + lea r2, [r2+2*r3] + movdqu m7, [r0] + paddw m0, m2 + psadbw m3, [r2] + psadbw m1, [r2+r3] + lea r2, [r2+2*r3] + movdqu m5, [r0+r1] + lea r0, [r0+2*r1] + paddw m3, m1 + movdqu m4, [r0] + paddw m6, m0 + movdqu m0, [r0+r1] + lea r0, [r0+2*r1] + paddw m6, m3 + psadbw m7, [r2] + psadbw m5, [r2+r3] + lea r2, [r2+2*r3] + movdqu m2, [r0] + paddw m7, m5 + psadbw m4, [r2] + psadbw m0, [r2+r3] + lea r2, [r2+2*r3] + movdqu m3, [r0+r1] + lea r0, [r0+2*r1] + paddw m4, m0 + movdqu m1, [r0] + paddw m6, m7 + movdqu m7, [r0+r1] + paddw m6, m4 + psadbw m2, [r2] + psadbw m3, [r2+r3] + lea r2, [r2+2*r3] + paddw m2, m3 + psadbw m1, [r2] + psadbw m7, [r2+r3] + paddw m1, m7 + paddw m6, m2 + paddw m6, m1 + + movdqa m7, m6 + psrldq m6, 8 + paddw m6, m7 + movd eax, m6 + + RET + +;----------------------------------------------------------------------------- +; int32_t __cdecl y262_sad_16x8_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +;----------------------------------------------------------------------------- + +cglobal y262_sad_16x8_sse2, 4, 4, 8 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + movdqu m7, [r0] + movdqu m6, [r0+r1] + lea r0, [r0+2*r1] + movdqu m5, [r0] + movdqu m4, [r0+r1] + lea r0, [r0+2*r1] + + psadbw m7, [r2] + psadbw m6, [r2+r3] + lea r2, [r2+2*r3] + psadbw m5, [r2] + psadbw m4, [r2+r3] + lea r2, [r2+2*r3] + + paddw m7, m6 + paddw m5, m4 + paddw m7, m5 + + movdqu m6, [r0] + movdqu m5, [r0+r1] + lea r0, [r0+2*r1] + movdqu m4, [r0] + movdqu m3, [r0+r1] + + psadbw m6, [r2] + psadbw m5, [r2+r3] + lea r2, [r2+2*r3] + psadbw m4, [r2] + psadbw m3, [r2+r3] + + paddw m6, m5 + paddw m4, m3 + paddw m7, m6 + paddw m7, m4 + + movdqa m6, m7 + psrldq m6, 8 + paddw m6, m7 + movd eax, m6 + + RET + + +;----------------------------------------------------------------------------- +; int32_t y262_ssd_8x8_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); +;----------------------------------------------------------------------------- + +cglobal y262_ssd_8x8_sse2, 4, 4, 8 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + pxor m7, m7 + pxor m6, m6 + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + pshufd m7, m6, 11100101b + pshufd m5, m6, 11100110b + pshufd m4, m6, 11100111b + paddd m7, m6 + paddd m7, m5 + paddd m7, m4 + + movd eax, m7 + RET + + +;----------------------------------------------------------------------------- +; int32_t __cdecl y262_ssd_16x16_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ) +;----------------------------------------------------------------------------- + +cglobal y262_ssd_16x16_sse2, 4, 4, 8 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + pxor m7, m7 + pxor m6, m6 + +; 0-1 + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + movq m0, [ r0 + 8 ] + movq m1, [ r0 + r1 + 8 ] + movq m2, [ r2 + 8 ] + movq m3, [ r2 + r3 + 8 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + +; 2-3 + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + movq m0, [ r0 + 8 ] + movq m1, [ r0 + r1 + 8 ] + movq m2, [ r2 + 8 ] + movq m3, [ r2 + r3 + 8 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + +; 4-5 + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + movq m0, [ r0 + 8 ] + movq m1, [ r0 + r1 + 8 ] + movq m2, [ r2 + 8 ] + movq m3, [ r2 + r3 + 8 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + +; 6-7 + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + movq m0, [ r0 + 8 ] + movq m1, [ r0 + r1 + 8 ] + movq m2, [ r2 + 8 ] + movq m3, [ r2 + r3 + 8 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + +; 8-9 + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + movq m0, [ r0 + 8 ] + movq m1, [ r0 + r1 + 8 ] + movq m2, [ r2 + 8 ] + movq m3, [ r2 + r3 + 8 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + +; 10-11 + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + movq m0, [ r0 + 8 ] + movq m1, [ r0 + r1 + 8 ] + movq m2, [ r2 + 8 ] + movq m3, [ r2 + r3 + 8 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + +; 12-13 + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + movq m0, [ r0 + 8 ] + movq m1, [ r0 + r1 + 8 ] + movq m2, [ r2 + 8 ] + movq m3, [ r2 + r3 + 8 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + +; 14-15 + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + + movq m0, [ r0 ] + movq m1, [ r0 + r1 ] + movq m2, [ r2 ] + movq m3, [ r2 + r3 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + movq m0, [ r0 + 8 ] + movq m1, [ r0 + r1 + 8 ] + movq m2, [ r2 + 8 ] + movq m3, [ r2 + r3 + 8 ] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + + psubsw m0, m2 + psubsw m1, m3 + pmaddwd m0, m0 + pmaddwd m1, m1 + + paddd m6, m0 + paddd m6, m1 + + pshufd m7, m6, 11100101b + pshufd m5, m6, 11100110b + pshufd m4, m6, 11100111b + paddd m7, m6 + paddd m7, m5 + paddd m7, m4 + + movd eax, m7 + RET + + + +;----------------------------------------------------------------------------------------------------------------- +;Void __cdecl y262_sub_8x8_sse2( int16_t *pi16_diff, uint8_t *pui8_src1, int32_t i_stride_src1, uint8_t *pui8_src2, int32_t i_stride_src2 ); +;----------------------------------------------------------------------------------------------------------------- +cglobal y262_sub_8x8_sse2, 5, 5, 5 +%ifdef ARCH_X86_64 + movsxd r2, r2d + movsxd r4, r4d +%endif + + pxor m4, m4 + + movq m0, [ r1 ] + movq m1, [ r3 ] + movq m2, [ r1 + r2 ] + movq m3, [ r3 + r4 ] + + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + + lea r1, [ r1 + r2 * 2 ] + lea r3, [ r3 + r4 * 2 ] + + psubsw m0, m1 + psubsw m2, m3 + + movdqu [ r0 ], m0 + movdqu [ r0 + 16 ], m2 + add r0, 32 + + movq m0, [ r1 ] + movq m1, [ r3 ] + movq m2, [ r1 + r2 ] + movq m3, [ r3 + r4 ] + + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + + lea r1, [ r1 + r2 * 2 ] + lea r3, [ r3 + r4 * 2 ] + + psubsw m0, m1 + psubsw m2, m3 + + movdqu [ r0 ], m0 + movdqu [ r0 + 16 ], m2 + add r0, 32 + + movq m0, [ r1 ] + movq m1, [ r3 ] + movq m2, [ r1 + r2 ] + movq m3, [ r3 + r4 ] + + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + + lea r1, [ r1 + r2 * 2 ] + lea r3, [ r3 + r4 * 2 ] + + psubsw m0, m1 + psubsw m2, m3 + + movdqu [ r0 ], m0 + movdqu [ r0 + 16 ], m2 + add r0, 32 + + movq m0, [ r1 ] + movq m1, [ r3 ] + movq m2, [ r1 + r2 ] + movq m3, [ r3 + r4 ] + + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + + lea r1, [ r1 + r2 * 2 ] + lea r3, [ r3 + r4 * 2 ] + + psubsw m0, m1 + psubsw m2, m3 + + movdqu [ r0 ], m0 + movdqu [ r0 + 16 ], m2 + + RET + +;----------------------------------------------------------------------------------------------------------------- +;void __cdecl y262_add_8x8_sse2( uint8_t *pui8_destination, int32_t i_destination_stride, uint8_t *pui8_base, int32_t i_base_stride, int16_t *pi_difference ); +;----------------------------------------------------------------------------------------------------------------- +INIT_XMMS + +cglobal y262_add_8x8_sse2, 5, 5, 5 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + + pxor m4, m4 + +%rep 4 + movq m0, [ r2 ] + movq m2, [ r2 + r3 ] + movdqu m1, [ r4 ] + movdqu m3, [ r4 + 16 ] + + punpcklbw m0, m4 + punpcklbw m2, m4 + + paddsw m0, m1 + paddsw m2, m3 + + packuswb m0, m0 + packuswb m2, m2 + + movq [ r0 ], m0 + movq [ r0 + r1 ], m2 + + lea r0, [ r0 + r1 * 2 ] + lea r2, [ r2 + r3 * 2 ] + add r4, 32 +%endrep + + RET + + + +; motcomp functions + +%macro MMX_LINE_00 1 +%rep %1 + movq mm0, [ r0 ] + add r0, r1 + movq [ r2 ], mm0 + add r2, r3 +%endrep +%endmacro + +%macro MMX_LINE_01 1 +%rep %1 + movq mm0, [ r0 ] + movq mm1, [ r0 + 1 ] + add r0, r1 + pavgb mm0, mm1 + movq [ r2 ], mm0 + add r2, r3 +%endrep +%endmacro + +%macro MMX_LINE_10 1 + movq mm0, [ r0 ] +%rep %1 + movq mm1, [ r0 + r1 ] + add r0, r1 + pavgb mm0, mm1 + movq [ r2 ], mm0 + add r2, r3 + movq mm0, mm1 +%endrep +%endmacro + +%macro MMX_LINE_11 1 + movq mm0, [ r0 ] + movq mm2, [ r0 + 1 ] +%rep %1 + movq mm1, [ r0 + r1 ] + movq mm3, [ r0 + r1 + 1 ] + add r0, r1 + movq mm5, mm0 + pxor mm5, mm1 + movq mm4, mm2 + pxor mm4, mm3 + pavgb mm0, mm1 + pavgb mm2, mm3 + por mm5, mm4 + movq mm4, mm0 + pxor mm4, mm2 + pand mm5, mm4 + pand mm5, [ M128_ONE_BYTE ] + pavgb mm0, mm2 + psubusb mm0, mm5 + movq [ r2 ], mm0 + add r2, r3 + movq mm0, mm1 + movq mm2, mm3 +%endrep +%endmacro + + +%macro MMX_LINE_00A 1 +%rep %1 + movq mm0, [ r0 ] + add r0, r1 + movq mm1, [ r2 ] + pavgb mm0, mm1 + movq [ r2 ], mm0 + add r2, r3 +%endrep +%endmacro + +%macro MMX_LINE_01A 1 +%rep %1 + movq mm0, [ r0 ] + movq mm1, [ r0 + 1 ] + add r0, r1 + pavgb mm0, mm1 + movq mm1, [ r2 ] + pavgb mm0, mm1 + movq [ r2 ], mm0 + add r2, r3 +%endrep +%endmacro + + +%macro MMX_LINE_10A 1 + movq mm0, [ r0 ] +%rep %1 + movq mm1, [ r0 + r1 ] + add r0, r1 + pavgb mm0, mm1 + movq mm2, [ r2 ] + pavgb mm0, mm2 + movq [ r2 ], mm0 + add r2, r3 + movq mm0, mm1 +%endrep +%endmacro + + + +%macro MMX_LINE_11A 1 + movq mm0, [ r0 ] + movq mm2, [ r0 + 1 ] +%rep %1 + movq mm1, [ r0 + r1 ] + movq mm3, [ r0 + r1 + 1 ] + add r0, r1 + movq mm6, mm0 + pxor mm6, mm1 + movq mm5, mm2 + pxor mm5, mm3 + pavgb mm0, mm1 + pavgb mm2, mm3 + por mm6, mm5 + movq mm5, mm0 + pxor mm5, mm2 + pand mm6, mm5 + pand mm6, [ M128_ONE_BYTE ] + pavgb mm0, mm2 + psubusb mm0, mm6 + movq mm4, [ r2 ] + pavgb mm0, mm4 + movq [ r2 ], mm0 + add r2, r3 + movq mm0, mm1 + movq mm2, mm3 +%endrep +%endmacro + + +%macro MOTCOMP_MMX_8W_FUNC 1 +cglobal y262_motcomp_8x %+ %1 %+ _00_put_mmxext, 4, 4, 0 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + MMX_LINE_00 %1 + emms + RET + +cglobal y262_motcomp_8x %+ %1 %+ _01_put_mmxext, 4, 4, 0 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + MMX_LINE_01 %1 + emms + RET + +cglobal y262_motcomp_8x %+ %1 %+ _10_put_mmxext, 4, 4, 0 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + MMX_LINE_10 %1 + emms + RET + +cglobal y262_motcomp_8x %+ %1 %+ _11_put_mmxext, 4, 4, 0 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + MMX_LINE_11 %1 + emms + RET + +cglobal y262_motcomp_8x %+ %1 %+ _00_avg_mmxext, 4, 4, 0 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + MMX_LINE_00A %1 + emms + RET + +cglobal y262_motcomp_8x %+ %1 %+ _01_avg_mmxext, 4, 4, 0 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + MMX_LINE_01A %1 + emms + RET + +cglobal y262_motcomp_8x %+ %1 %+ _10_avg_mmxext, 4, 4, 0 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + MMX_LINE_10A %1 + emms + RET + +cglobal y262_motcomp_8x %+ %1 %+ _11_avg_mmxext, 4, 4, 0 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + MMX_LINE_11A %1 + emms + RET + +%endmacro + + +MOTCOMP_MMX_8W_FUNC 16 +MOTCOMP_MMX_8W_FUNC 8 +MOTCOMP_MMX_8W_FUNC 4 + + + +%macro SSE2_LINE_00 1 +%rep %1 + movdqu xmm0, [ r0 ] + add r0, r1 + movdqa [ r2 ], xmm0 + add r2, r3 +%endrep +%endmacro + +%macro SSE2_LINE_01 1 +%rep %1 + movdqu xmm0, [ r0 ] + movdqu xmm1, [ r0 + 1 ] + add r0, r1 + pavgb xmm0, xmm1 + movdqa [ r2 ], xmm0 + add r2, r3 +%endrep +%endmacro + + +%macro SSE2_LINE_10 1 + movdqu xmm0, [ r0 ] +%rep %1 + movdqu xmm1, [ r0 + r1 ] + add r0, r1 + pavgb xmm0, xmm1 + movdqa [ r2 ], xmm0 + add r2, r3 + movdqu xmm0, xmm1 +%endrep +%endmacro + + +%macro SSE2_LINE_11 1 + movdqu xmm0, [ r0 ] + movdqu xmm2, [ r0 + 1 ] +%rep %1 + movdqu xmm1, [ r0 + r1 ] + movdqu xmm3, [ r0 + r1 + 1 ] + add r0, r1 + movdqu xmm6, xmm0 + pxor xmm6, xmm1 + movdqu xmm5, xmm2 + pxor xmm5, xmm3 + pavgb xmm0, xmm1 + pavgb xmm2, xmm3 + por xmm6, xmm5 + movdqu xmm5, xmm0 + pxor xmm5, xmm2 + pand xmm6, xmm5 + pand xmm6, [ M128_ONE_BYTE ] + pavgb xmm0, xmm2 + psubusb xmm0, xmm6 + movdqa [ r2 ], xmm0 + add r2, r3 + movdqu xmm0, xmm1 + movdqu xmm2, xmm3 +%endrep +%endmacro + +%macro SSE2_LINE_00A 1 +%rep %1 + movdqu xmm0, [ r0 ] + add r0, r1 + movdqa xmm1, [ r2 ] + pavgb xmm0, xmm1 + movdqa [ r2 ], xmm0 + add r2, r3 +%endrep +%endmacro + +%macro SSE2_LINE_01A 1 +%rep %1 + movdqu xmm0, [ r0 ] + movdqu xmm1, [ r0 + 1 ] + add r0, r1 + pavgb xmm0, xmm1 + movdqa xmm1, [ r2 ] + pavgb xmm0, xmm1 + movdqa [ r2 ], xmm0 + add r2, r3 +%endrep +%endmacro + + +%macro SSE2_LINE_10A 1 + movdqu xmm0, [ r0 ] +%rep %1 + movdqu xmm1, [ r0 + r1 ] + add r0, r1 + pavgb xmm0, xmm1 + movdqa xmm2, [ r2 ] + pavgb xmm0, xmm2 + movdqa [ r2 ], xmm0 + add r2, r3 + movdqu xmm0, xmm1 +%endrep +%endmacro + + + +%macro SSE2_LINE_11A 1 + movdqu xmm0, [ r0 ] + movdqu xmm2, [ r0 + 1 ] +%rep %1 + movdqu xmm1, [ r0 + r1 ] + movdqu xmm3, [ r0 + r1 + 1 ] + add r0, r1 + movdqu xmm6, xmm0 + pxor xmm6, xmm1 + movdqu xmm5, xmm2 + pxor xmm5, xmm3 + pavgb xmm0, xmm1 + pavgb xmm2, xmm3 + por xmm6, xmm5 + movdqu xmm5, xmm0 + pxor xmm5, xmm2 + pand xmm6, xmm5 + pand xmm6, [ M128_ONE_BYTE ] + pavgb xmm0, xmm2 + psubusb xmm0, xmm6 + movdqa xmm4, [ r2 ] + pavgb xmm0, xmm4 + movdqa [ r2 ], xmm0 + add r2, r3 + movdqu xmm0, xmm1 + movdqu xmm2, xmm3 +%endrep +%endmacro + + +%macro MOTCOMP_SSE2_16W_FUNC 1 +cglobal y262_motcomp_16x %+ %1 %+ _00_put_sse2, 4, 4, 2 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + SSE2_LINE_00 %1 + RET + +cglobal y262_motcomp_16x %+ %1 %+ _01_put_sse2, 4, 4, 2 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + SSE2_LINE_01 %1 + RET + +cglobal y262_motcomp_16x %+ %1 %+ _10_put_sse2, 4, 4, 3 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + SSE2_LINE_10 %1 + RET + +cglobal y262_motcomp_16x %+ %1 %+ _11_put_sse2, 4, 4, 7 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + SSE2_LINE_11 %1 + RET + +cglobal y262_motcomp_16x %+ %1 %+ _00_avg_sse2, 4, 4, 2 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + SSE2_LINE_00A %1 + RET + +cglobal y262_motcomp_16x %+ %1 %+ _01_avg_sse2, 4, 4, 2 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + SSE2_LINE_01A %1 + RET + +cglobal y262_motcomp_16x %+ %1 %+ _10_avg_sse2, 4, 4, 3 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + SSE2_LINE_10A %1 + RET + +cglobal y262_motcomp_16x %+ %1 %+ _11_avg_sse2, 4, 4, 7 +%ifdef ARCH_X86_64 + movsxd r1, r1d + movsxd r3, r3d +%endif + SSE2_LINE_11A %1 + RET +%endmacro + +MOTCOMP_SSE2_16W_FUNC 16 +MOTCOMP_SSE2_16W_FUNC 8 + + diff --git a/src/y262/pixelop_x86.h b/src/y262/pixelop_x86.h new file mode 100644 index 0000000..bfa267b --- /dev/null +++ b/src/y262/pixelop_x86.h @@ -0,0 +1,40 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + + +int32_t y262_sad_16x16_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); +int32_t y262_sad_16x8_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); +int32_t y262_ssd_16x16_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); +int32_t y262_ssd_8x8_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); + +void y262_sub_8x8_sse2( int16_t *pi16_diff, uint8_t *pui8_src1, int32_t i_stride_src1, uint8_t *pui8_src2, int32_t i_stride_src2 ); +void y262_add_8x8_sse2( uint8_t *pui8_destination, int32_t i_destination_stride, uint8_t *pui8_base, int32_t i_base_stride, int16_t *pi_difference ); + +int32_t y262_satd_8x8_sse2( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); diff --git a/src/y262/ratectrl.c b/src/y262/ratectrl.c new file mode 100644 index 0000000..e7eb192 --- /dev/null +++ b/src/y262/ratectrl.c @@ -0,0 +1,1221 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + + + +bool_t y262_ratectrl_init( y262_t *ps_y262 ) +{ + int32_t i_num_mb; + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + ps_ratectrl->i_vbv_occupancy = ps_ratectrl->i_vbv_size; + ps_ratectrl->i64_vbv_occupancy_fractional = 0; + + ps_ratectrl->i_timescale = ps_y262->i_sequence_derived_timescale; + ps_ratectrl->i_picture_duration = ps_y262->i_sequence_derived_picture_duration; + ps_ratectrl->i_pulldown_timescale = ps_y262->i_sequence_derived_pulldown_timescale; + ps_ratectrl->i_pulldown_picture_duration = ps_y262->i_sequence_derived_pulldown_picture_duration; + + ps_ratectrl->i64_output_ticks = 0; + ps_ratectrl->i64_output_frames = 0; + ps_ratectrl->i64_output_seconds = 0; + ps_ratectrl->d_output_bits = 200.0; + ps_ratectrl->d_qb_qplx = 12.0 * 200.0; + ps_ratectrl->d_target_bits = 200.0; + ps_ratectrl->rgd_satd_predictors[ 0 ] = 1.0; + ps_ratectrl->rgd_satd_predictors_weight[ 0 ] = 1.0; + ps_ratectrl->rgd_satd_predictors[ 1 ] = 1.0; + ps_ratectrl->rgd_satd_predictors_weight[ 1 ] = 1.0; + ps_ratectrl->rgd_satd_predictors[ 2 ] = 1.0; + ps_ratectrl->rgd_satd_predictors_weight[ 2 ] = 1.0; + ps_ratectrl->rgd_satd_predictors[ 3 ] = 1.0; + ps_ratectrl->rgd_satd_predictors_weight[ 3 ] = 1.0; + ps_ratectrl->d_confidence_predict_behind = 1.0; + ps_ratectrl->d_confidence_predict_ahead = 1.0; + ps_ratectrl->ps_samples = NULL; + + ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ] = ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] = 0; + ps_ratectrl->rgd_last_ref_quantizers[ 0 ] = ps_ratectrl->rgd_last_ref_quantizers[ 1 ] = 0.0; + + ps_ratectrl->i_i_picture_baseline_bits = 0; + ps_ratectrl->i_min_satd_for_satd_prediction = ( ( ps_y262->i_sequence_width * ps_y262->i_sequence_height ) / 256 ) * 11; + ps_ratectrl->i_min_bits_for_satd_prediction = ( ( ps_y262->i_sequence_width * ps_y262->i_sequence_height ) / 256 ) * 3; + + if( ps_ratectrl->i_mode == BITRATE_CONTROL_PASS2 ) + { + int32_t i_don, i_num_samples, i_sample_size, i_iter; + double d_target_bits_per_sample = ( ( ( double )ps_ratectrl->i_bitrate ) * ps_ratectrl->i_picture_duration ) / ps_ratectrl->i_timescale; + double d_sequence_quantizer, d_sequence_cplx, d_estimated_bits, d_adjust; + y262_ratectrl_sample_t s_sample; + + if( !ps_y262->s_funcs.pf_rcsample_callback ) + { + return FALSE; + } + + i_num_samples = 0; + while( ( i_sample_size = ps_y262->s_funcs.pf_rcsample_callback( ps_y262->p_cb_handle, i_num_samples, ( uint8_t *)&s_sample, sizeof( s_sample ) ) ) ) + { + if( i_sample_size != sizeof( s_sample ) ) + { + return FALSE; + } + i_num_samples++; + } + + i_don = 0; + ps_ratectrl->d_qb_qplx_2p = 0; + ps_ratectrl->d_target_bits_2p = 0; + ps_ratectrl->d_estimated_bits = 0; + d_sequence_cplx = 0; + + ps_ratectrl->ps_samples = ( y262_ratectrl_isample_t * ) y262_alloc( sizeof( y262_ratectrl_isample_t ) * i_num_samples ); + if( ps_ratectrl->ps_samples == NULL ) + { + return FALSE; + } + + while( i_don < i_num_samples ) + { + i_sample_size = ps_y262->s_funcs.pf_rcsample_callback( ps_y262->p_cb_handle, i_don, ( uint8_t *)&s_sample, sizeof( s_sample ) ); + if( i_sample_size != sizeof( s_sample ) ) + { + return FALSE; + } + ps_ratectrl->ps_samples[ i_don ].d_cplx = ( ( double )s_sample.i_cplx_f8 ) / 256.0; + ps_ratectrl->ps_samples[ i_don ].d_quantizer = ( ( double )s_sample.i_quantizer_f8 ) / 256.0; + ps_ratectrl->ps_samples[ i_don ].i_bits = s_sample.i_bits; + ps_ratectrl->ps_samples[ i_don ].i_estimated_bits = 0; + ps_ratectrl->ps_samples[ i_don ].ui8_frame_type = s_sample.ui8_frame_type; + ps_ratectrl->ps_samples[ i_don ].i_satd_cost = s_sample.i_satd_cost; + + ps_ratectrl->d_target_bits_2p += d_target_bits_per_sample; + ps_ratectrl->d_qb_qplx_2p += ( ps_ratectrl->ps_samples[ i_don ].d_quantizer * ps_ratectrl->ps_samples[ i_don ].i_bits ) / ps_ratectrl->ps_samples[ i_don ].d_cplx; + d_sequence_cplx += ps_ratectrl->ps_samples[ i_don ].d_cplx; + + i_don++; + } + d_sequence_cplx /= ( double )i_num_samples; + + i_don = 0; + i_iter = 0; + d_adjust = 1.0; + while( 1 ) + { + d_sequence_quantizer = ( ps_ratectrl->d_qb_qplx_2p * d_sequence_cplx ) / ps_ratectrl->d_target_bits_2p; + d_sequence_quantizer *= d_adjust; + + d_estimated_bits = 0; + i_don = 0; + while( i_don < i_num_samples ) + { + double d_sample_quantizer = ( ( ps_ratectrl->ps_samples[ i_don ].d_cplx / ps_ratectrl->d_target_bits_2p ) * ps_ratectrl->d_qb_qplx_2p ) * d_adjust; + ps_ratectrl->ps_samples[ i_don ].i_estimated_bits = ( int32_t ) ( ( ps_ratectrl->ps_samples[ i_don ].i_bits * ps_ratectrl->ps_samples[ i_don ].d_quantizer ) / d_sample_quantizer ); + d_estimated_bits += ps_ratectrl->ps_samples[ i_don ].i_estimated_bits; + i_don++; + } + + if( fabs( d_estimated_bits - ps_ratectrl->d_target_bits_2p ) < ps_ratectrl->d_target_bits_2p / 200.0 || i_iter > 10 ) + { + break; + } + i_iter++; + d_adjust = d_estimated_bits / ps_ratectrl->d_target_bits_2p; + } + ps_ratectrl->d_qb_qplx_2p *= d_adjust; + ps_ratectrl->i_current_sample = 0; + ps_ratectrl->i_num_samples = i_num_samples; + } + + ps_ratectrl->i_picture_scaled_satd = 0; + + i_num_mb = ( ps_y262->i_sequence_width >> 4 ) * ( ps_y262->i_sequence_height >> 4 ); + ps_ratectrl->ps_mb_samples = y262_alloc( sizeof( y262_ratectrl_mb_sample_t ) * i_num_mb ); + + return TRUE; +} + +void y262_ratectrl_deinit( y262_t *ps_y262 ) +{ + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + if( ps_ratectrl->ps_samples ) + { + y262_dealloc( ps_ratectrl->ps_samples ); + } + if( ps_ratectrl->ps_mb_samples ) + { + y262_dealloc( ps_ratectrl->ps_mb_samples ); + } +} + + + +double y262_ratectrl_get_cplx( y262_t *ps_y262, int32_t i_cost ) +{ + if( 1 ) + { + return 1.0; + } + else + { + return sqrt( ( float )i_cost ); + } +} + + +int32_t y262_ratectrl_predict_frame_size_baseline( y262_t *ps_y262, int32_t i_picture_type ) +{ + int32_t i_bits; + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + if( i_picture_type == PICTURE_CODING_TYPE_I ) + { + i_bits = ps_ratectrl->i_i_picture_baseline_bits; + i_bits += ( ( ps_y262->i_sequence_width * ps_y262->i_sequence_height ) / 256 ) * 4 * 8; + } + else + { + i_bits = ( ( ( ps_y262->i_sequence_width * ps_y262->i_sequence_height ) / 256 ) * 5 ) / 2; + } + + return i_bits; +} + + + + +int32_t y262_ratectrl_predict_frame_size( y262_t *ps_y262, int32_t i_picture_type, int32_t i_picture_cost, double d_quantizer ) +{ + int32_t i_bits; + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + i_bits = y262_ratectrl_predict_frame_size_baseline( ps_y262, i_picture_type ); + + i_bits += ( int32_t ) ( ( ( ps_ratectrl->rgd_satd_predictors[ i_picture_type ] * i_picture_cost ) / d_quantizer ) / ps_ratectrl->rgd_satd_predictors_weight[ i_picture_type ] ); + + return i_bits; +} + + +int32_t y262_ratectrl_predict_frame_size_2pass_i( y262_t *ps_y262, int32_t i_picture_type, int32_t i_picture_cost, double d_quantizer, int32_t i_don ) +{ + int32_t i_idx, i_count, i_baseline, i_bits_ahead; + y262_bitrate_control_t *ps_ratectrl; + double d_satd_pred, d_satd_pred_weight, d_weight; + + ps_ratectrl = &ps_y262->s_ratectrl; + + i_baseline = y262_ratectrl_predict_frame_size_baseline( ps_y262, i_picture_type ); + + if( i_picture_cost > ps_ratectrl->i_min_satd_for_satd_prediction ) + { + d_satd_pred = d_satd_pred_weight = 0.0; + d_weight = 1.0; + i_count = 0; + for( i_idx = i_don; i_idx < ps_ratectrl->i_num_samples; i_idx++ ) + { + y262_ratectrl_isample_t *ps_sample = &ps_ratectrl->ps_samples[ i_idx ]; + if( ps_sample->ui8_frame_type == i_picture_type && ps_sample->i_satd_cost > ps_ratectrl->i_min_satd_for_satd_prediction ) + { + d_satd_pred += ( ( MAX( ps_sample->i_bits - i_baseline, i_baseline * 0.05 ) * ps_sample->d_quantizer ) / ps_sample->i_satd_cost ) * d_weight; + d_satd_pred_weight += d_weight; + d_weight = d_weight * 0.5; + i_count++; + if( i_count >= 3 ) + { + break; + } + } + } + if( i_count > 0 ) + { + i_bits_ahead = i_baseline; + i_bits_ahead += ( int32_t ) ( ( ( d_satd_pred * i_picture_cost ) / d_quantizer ) / d_satd_pred_weight ); + return i_bits_ahead; + } + } + + return ( int32_t ) ( i_baseline * 1.3 ); +} + + +int32_t y262_ratectrl_predict_frame_size_2pass( y262_t *ps_y262, int32_t i_picture_type, int32_t i_picture_cost, double d_quantizer, int32_t i_don ) +{ + int32_t i_bits_ahead, i_bits_behind; + y262_bitrate_control_t *ps_ratectrl; + double d_conf_sum, d_predicted; + + ps_ratectrl = &ps_y262->s_ratectrl; + + i_bits_behind = y262_ratectrl_predict_frame_size( ps_y262, i_picture_type, i_picture_cost, d_quantizer ); + i_bits_ahead = y262_ratectrl_predict_frame_size_2pass_i( ps_y262, i_picture_type, i_picture_cost, d_quantizer, i_don ); + + d_conf_sum = ps_ratectrl->d_confidence_predict_behind + ps_ratectrl->d_confidence_predict_ahead; + d_predicted = ( i_bits_behind * ps_ratectrl->d_confidence_predict_behind ) + ( i_bits_ahead * ps_ratectrl->d_confidence_predict_ahead ); + d_predicted = d_predicted / d_conf_sum; + + d_predicted = MIN( INT32_MAX, MAX( 1, d_predicted ) ); + + return ( int32_t ) d_predicted; +} + + +int32_t y262_ratectrl_predict_first_frame_size( y262_t *ps_y262, int32_t i_picture_type, int32_t i_picture_cost, double d_quantizer, int32_t i_don, int32_t i_which ) +{ + y262_bitrate_control_t *ps_ratectrl; + y262_picture_t *ps_picture; + int32_t i_estimated_bits; + double d_intra_weight, d_intra_bits, d_type_bits; + + ps_ratectrl = &ps_y262->s_ratectrl; + ps_picture = ps_y262->ps_input_picture; + + if( i_which == 1 || ps_ratectrl->i_mode != BITRATE_CONTROL_PASS2 ) + { + d_intra_bits = y262_ratectrl_predict_frame_size( ps_y262, PICTURE_CODING_TYPE_I, ps_picture->i_frame_intra_cost, d_quantizer ); + d_type_bits = y262_ratectrl_predict_frame_size( ps_y262, ps_picture->i_frame_type, ps_picture->i_frame_cost, d_quantizer ); + } + else if( i_which == 0 ) + { + d_intra_bits = y262_ratectrl_predict_frame_size_2pass( ps_y262, PICTURE_CODING_TYPE_I, ps_picture->i_frame_intra_cost, d_quantizer, ps_picture->i_don ); + d_type_bits = y262_ratectrl_predict_frame_size_2pass( ps_y262, ps_picture->i_frame_type, ps_picture->i_frame_cost, d_quantizer, ps_picture->i_don ); + } + else + { + if( i_which != 2 ) + { + int32_t *pi_null = NULL; + *pi_null = 0; + } + d_intra_bits = y262_ratectrl_predict_frame_size_2pass_i( ps_y262, PICTURE_CODING_TYPE_I, ps_picture->i_frame_intra_cost, d_quantizer, ps_picture->i_don ); + d_type_bits = y262_ratectrl_predict_frame_size_2pass_i( ps_y262, ps_picture->i_frame_type, ps_picture->i_frame_cost, d_quantizer, ps_picture->i_don ); + } + if( ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] < ps_picture->i_pon ) + { + d_intra_weight = MAX( 0.0, ( ps_ratectrl->rgd_last_ref_quantizers[ 1 ] - d_quantizer ) / ps_ratectrl->rgd_last_ref_quantizers[ 1 ] ); + } + else + { + d_intra_weight = 0.0; + } + i_estimated_bits = ( int32_t ) ( ( d_intra_bits * d_intra_weight ) + ( d_type_bits * ( 1.0 - d_intra_weight ) ) ); + + return i_estimated_bits; +} + + + +double y262_ratectrl_get_picture_quantizer( y262_t *ps_y262, int32_t i_picture_type, double d_quantizer ) +{ + return d_quantizer; +} + + + +double y262_ratectrl_clamp_to_vbv( y262_t *ps_y262, double d_quantizer ) +{ + int32_t i_idx, i_estimated_bits, i_iterations; + int32_t i_estimated_vbv, i_start_estimated_vbv, i_lower_limit, i_upper_limit, i_total_vbv_gain, i_vbv_gain, i_vbv_pictures, i_first_keyframe, i_down_limit; + bool_t b_up = FALSE, b_down = FALSE, b_cbr, b_force_down; + y262_bitrate_control_t *ps_ratectrl; + y262_picture_t *ps_picture; + + ps_ratectrl = &ps_y262->s_ratectrl; + ps_picture = ps_y262->ps_input_picture; + + b_cbr = ps_ratectrl->i_bitrate == ps_ratectrl->i_vbvrate; + + d_quantizer = MIN( 31.0, MAX( 0.1, d_quantizer ) ); + + if( ps_picture->i_pon == 66 ) + { + ps_picture = ps_picture; + } + + i_vbv_gain = ( ( ( int64_t ) ps_ratectrl->i_vbvrate ) * ps_ratectrl->i_picture_duration ) / ps_ratectrl->i_timescale; + i_vbv_pictures = ( ps_ratectrl->i_vbv_size / i_vbv_gain ) + 1; + + i_iterations = 0; + while( i_iterations++ < 1000 ) + { + i_estimated_vbv = ps_ratectrl->i_vbv_occupancy; + i_total_vbv_gain = 0; + + if( d_quantizer < 0.1 ) + { + d_quantizer = 0.1; + break; + } + if( d_quantizer > 31.0 ) + { + d_quantizer = 31.0; + break; + } + + i_estimated_vbv -= i_vbv_gain; /* adjust */ + i_start_estimated_vbv = i_estimated_vbv; + i_first_keyframe = -1; + b_force_down = FALSE; + + for( i_idx = 0; i_idx < ps_picture->i_num_lookahead_pictures; i_idx++ ) + { + double d_picture_quantizer; + + i_estimated_vbv += i_vbv_gain; + i_estimated_vbv = MIN( i_estimated_vbv, ps_ratectrl->i_vbv_size ); + + i_total_vbv_gain += i_vbv_gain; + + d_picture_quantizer = y262_ratectrl_get_picture_quantizer( ps_y262, ps_picture->rgi_lookahead_picture_types[ i_idx ], d_quantizer ); + if( i_idx == 0 ) + { + i_estimated_bits = y262_ratectrl_predict_first_frame_size( ps_y262, ps_picture->rgi_lookahead_picture_types[ i_idx ], ps_picture->rgi_lookahead_picture_costs[ i_idx ], d_picture_quantizer, ps_picture->i_don, 0 ); + } + else + { + if( i_first_keyframe == -1 && ps_picture->rgi_lookahead_picture_types[ i_idx ] == PICTURE_CODING_TYPE_I ) + { + i_first_keyframe = i_idx; + } + if( ps_ratectrl->i_mode == BITRATE_CONTROL_PASS2 ) + { + i_estimated_bits = y262_ratectrl_predict_frame_size_2pass( ps_y262, ps_picture->rgi_lookahead_picture_types[ i_idx ], ps_picture->rgi_lookahead_picture_costs[ i_idx ], d_picture_quantizer, ps_picture->i_don + i_idx ); + } + else + { + i_estimated_bits = y262_ratectrl_predict_frame_size( ps_y262, ps_picture->rgi_lookahead_picture_types[ i_idx ], ps_picture->rgi_lookahead_picture_costs[ i_idx ], d_picture_quantizer ); + } + } + + i_estimated_bits = ( int32_t ) ( i_estimated_bits * 1.05 ); + i_estimated_vbv -= i_estimated_bits; + if( i_estimated_vbv < ps_ratectrl->i_vbv_size / 4 ) + { + b_force_down = TRUE; + break; + } + if( ( i_estimated_bits * 1.4 ) > ps_ratectrl->i_vbv_size ) + { + b_force_down = TRUE; + break; + } + } + + if( i_first_keyframe == -1 ) + { + i_first_keyframe = ps_picture->i_num_lookahead_pictures; + } + i_down_limit = MAX( i_first_keyframe, ( ( i_vbv_pictures * 3 ) / 2 ) ); + + i_lower_limit = ps_ratectrl->i_vbv_size / 2; + i_lower_limit = MIN( i_lower_limit, i_start_estimated_vbv + ( i_total_vbv_gain / 2 ) ); + + i_upper_limit = ( ps_ratectrl->i_vbv_size * 9 ) / 10; + i_upper_limit = MAX( i_upper_limit, i_start_estimated_vbv - ( i_total_vbv_gain / 2 ) ); + + /*if( ps_y262->ps_input_picture->i_pon == 2652 ) + fprintf( stderr, "iter: %d ( %d %d )\n", i_estimated_vbv, i_lower_limit, i_upper_limit );*/ + + if( i_estimated_vbv < i_lower_limit && !b_up && ( i_idx <= i_down_limit || b_force_down ) ) + { + d_quantizer *= 1.1; + b_down = TRUE; + continue; + } + if( i_estimated_vbv > i_upper_limit && !b_down && b_cbr ) + { + d_quantizer *= 0.9; + b_up = TRUE; + continue; + } + break; + } + + d_quantizer = MIN( 31.0, MAX( 0.1, d_quantizer ) ); + /*if( ps_y262->ps_input_picture->i_pon == 2652 ) + fprintf( stderr, "end: %f ( %d )\n", d_quantizer, i_estimated_vbv ); */ + return d_quantizer; +} + + +void y262_ratectrl_start_picture( y262_t *ps_y262, int32_t i_header_bits ) +{ + int32_t i_picture_cost, i_picture_type, i_predicted_frame_size, i_predicted_frame_size_behind, i_predicted_frame_size_ahead; + double d_picture_cplx, d_quantizer; + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + i_picture_cost = ps_y262->ps_input_picture->i_frame_cost; + i_picture_type = ps_y262->ps_input_picture->i_frame_type; + + d_picture_cplx = y262_ratectrl_get_cplx( ps_y262, i_picture_cost ); + + if( i_picture_type == PICTURE_CODING_TYPE_I ) + { + ps_ratectrl->i_i_picture_baseline_bits = i_header_bits; + } + + if( ps_ratectrl->i_mode == BITRATE_CONTROL_CQ ) + { + d_quantizer = ps_y262->i_quantizer; + } + else if( ps_ratectrl->i_mode == BITRATE_CONTROL_PASS1 ) + { + double d_adjust; + + d_quantizer = ( d_picture_cplx / ps_ratectrl->d_target_bits ) * ps_ratectrl->d_qb_qplx; + + /* short term adjust */ + d_adjust = 1.0 + ( ( ps_ratectrl->d_output_bits - ps_ratectrl->d_target_bits ) / ps_ratectrl->i_vbv_size ); + d_adjust = MIN( 2.5, MAX( 0.5, d_adjust ) ); + d_quantizer = d_quantizer * d_adjust; + } + else if( ps_ratectrl->i_mode == BITRATE_CONTROL_PASS2 ) + { + double d_adjust; + + d_quantizer = ( d_picture_cplx / ps_ratectrl->d_target_bits_2p ) * ps_ratectrl->d_qb_qplx_2p; + + /* long term adjust */ + d_adjust = 1.0 + ( ( ps_ratectrl->d_output_bits - ps_ratectrl->d_estimated_bits ) / ( ps_ratectrl->d_target_bits_2p / 20.0 ) ); + d_adjust = MIN( 2.5, MAX( 0.5, d_adjust ) ); + d_quantizer = d_quantizer * d_adjust; + } + else + { + assert( FALSE ); + } + /*fprintf( stderr, "pre: %3.2f -", d_quantizer );*/ + d_quantizer = y262_ratectrl_get_picture_quantizer( ps_y262, ps_y262->ps_input_picture->i_frame_type, d_quantizer ); + + d_quantizer = y262_ratectrl_clamp_to_vbv( ps_y262, d_quantizer ); + + /*fprintf( stderr, "post: %3.2f -", d_quantizer );*/ + d_quantizer = MIN( 31.0, MAX( 0.1, d_quantizer ) ); + + if( ps_y262->ps_input_picture->i_frame_type == PICTURE_CODING_TYPE_B ) + { + /*fprintf( stderr, " rqp %d %d %d\n", ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ] , ps_y262->ps_input_picture->i_pon , ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] );*/ + if( ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ] < ps_y262->ps_input_picture->i_pon && + ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] > ps_y262->ps_input_picture->i_pon ) + { + double d_mid_quantizer, d_delta, d_w1, d_w2; + d_delta = ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] - ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ]; + d_w1 = d_delta - ( ps_y262->ps_input_picture->i_pon - ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ] ); + d_w2 = d_delta - ( ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] - ps_y262->ps_input_picture->i_pon ); + d_mid_quantizer = ( ps_ratectrl->rgd_last_ref_quantizers[ 0 ] * d_w1 ) + ( ps_ratectrl->rgd_last_ref_quantizers[ 1 ] * d_w2 ); + d_mid_quantizer = d_mid_quantizer / d_delta; + d_quantizer = MAX( d_mid_quantizer, d_quantizer ); + } + else if( ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ] > ps_y262->ps_input_picture->i_pon ) + { + d_quantizer = MAX( d_quantizer, ps_ratectrl->rgd_last_ref_quantizers[ 0 ] ); + } + else if( ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] < ps_y262->ps_input_picture->i_pon ) + { + d_quantizer = MAX( d_quantizer, ps_ratectrl->rgd_last_ref_quantizers[ 1 ] ); + } + else + { + d_quantizer = MAX( d_quantizer, MAX( ps_ratectrl->rgd_last_ref_quantizers[ 0 ], ps_ratectrl->rgd_last_ref_quantizers[ 1 ] ) ); + } + } + + d_quantizer = MIN( 31.0, MAX( 0.1, d_quantizer ) ); + /*fprintf( stderr, "post2: %3.2f -\n", d_quantizer );*/ + + i_predicted_frame_size = y262_ratectrl_predict_first_frame_size( ps_y262, i_picture_type, i_picture_cost, d_quantizer, ps_y262->ps_input_picture->i_don, 0 ); + i_predicted_frame_size_behind = y262_ratectrl_predict_first_frame_size( ps_y262, i_picture_type, i_picture_cost, d_quantizer, ps_y262->ps_input_picture->i_don, 1 ); + if( ps_ratectrl->i_mode == BITRATE_CONTROL_PASS2 ) + { + i_predicted_frame_size_ahead = y262_ratectrl_predict_first_frame_size( ps_y262, i_picture_type, i_picture_cost, d_quantizer, ps_y262->ps_input_picture->i_don, 2 ); + } + else + { + i_predicted_frame_size_ahead = i_predicted_frame_size_behind; + } + + + ps_ratectrl->i_quantizer = ( int32_t ) ( d_quantizer * 256.0 ); + + ps_ratectrl->i_picture_accumulated_quantizer = 0; + ps_ratectrl->i_picture_num_accumulated_quantizer = 0; + ps_ratectrl->d_picture_accumulated_quantizer_bits = 0.0; + ps_ratectrl->d_picture_accumulated_bits_quantizer_over_satd = 0.0; + ps_ratectrl->i_num_picture_accumulated_bits_quantizer_over_satd = 0; + ps_ratectrl->i_picture_bit_budget = ( int32_t ) ( ( i_predicted_frame_size * 1.3 ) * ( 1.0 + ( 0.2 * ps_y262->i_num_slice_encoders ) ) ); + ps_ratectrl->i_picture_bit_budget += MAX( 0, ( ps_ratectrl->i_vbv_occupancy - ps_ratectrl->i_picture_bit_budget ) / 4 ); + ps_ratectrl->i_picture_bit_budget = MIN( ps_ratectrl->i_picture_bit_budget, ps_ratectrl->i_vbv_occupancy - ( ps_ratectrl->i_vbv_occupancy / 10 ) ); + ps_ratectrl->i_picture_coded_scaled_satd = 0; + ps_ratectrl->i_picture_coded_size = 0; + ps_ratectrl->i_picture_adjusted_bit_budget = ps_ratectrl->i_picture_bit_budget; + ps_ratectrl->i_predicted_frame_size = i_predicted_frame_size; + ps_ratectrl->i_predicted_frame_size_behind = i_predicted_frame_size_behind; + ps_ratectrl->i_predicted_frame_size_ahead = i_predicted_frame_size_ahead; + ps_ratectrl->b_picture_bad_encountered = FALSE; + ps_ratectrl->i_picture_uncoded_size = 0; + ps_ratectrl->b_picture_reencode_pass = FALSE; + + + /*fprintf( stderr, "pred %d budget %d\n", i_predicted_frame_size, ps_ratectrl->i_picture_bit_budget );*/ + + /*fprintf( stderr, "PRED: %d %f : %d\n", ps_y262->ps_input_picture->i_frame_cost, d_quantizer, + y262_ratectrl_predict_frame_size( ps_y262, ps_y262->ps_input_picture->i_frame_type, ps_y262->ps_input_picture->i_frame_cost, d_quantizer ) );*/ + + { + int32_t i_mb_idx, i_num_mb; + ps_ratectrl->i_picture_scaled_satd = 0; + + i_num_mb = ( ps_y262->i_sequence_width >> 4 ) * ( ps_y262->i_sequence_height >> 4 ); + + for( i_mb_idx = 0; i_mb_idx < i_num_mb; i_mb_idx++ ) + { + int32_t i_satd, i_scale, i_scaled_satd; + if( ps_y262->ps_input_picture->i_frame_type == PICTURE_CODING_TYPE_I ) + { + i_satd = ps_y262->ps_input_picture->ps_lookahead[ i_mb_idx ].i_intra_cost; + } + else + { + i_satd = ps_y262->ps_input_picture->ps_lookahead[ i_mb_idx ].i_best_cost; + } + i_scale = ( ps_y262->ps_input_picture->ps_lookahead[ i_mb_idx ].i_quantizer_scale * ps_y262->ps_input_picture->ps_lookahead[ i_mb_idx ].i_quantizer_aq_scale ) >> 12; + i_scaled_satd = ( i_satd << 12 ) / i_scale; + + ps_ratectrl->ps_mb_samples[ i_mb_idx ].i_satd = i_satd; + ps_ratectrl->ps_mb_samples[ i_mb_idx ].i_scaled_satd = i_scaled_satd; + + ps_ratectrl->i_picture_scaled_satd += i_scaled_satd; + } + } + + //fprintf( stderr, "predict: %d, budget: %d\n", y262_ratectrl_predict_frame_size( ps_y262, ps_y262->ps_input_picture->i_frame_type, ps_y262->ps_input_picture->i_frame_cost, d_quantizer ), ps_ratectrl->i_picture_bit_budget ); + /*fprintf( stderr, "%d: quant: %f\n", ps_y262->ps_input_picture->i_pon, d_quantizer );*/ + + if( !ps_y262->b_sequence_mpeg1 ) + { + int32_t i_vbv_delay, i_occupancy_bits; + int64_t i64_occupancy_ticks; + + i_occupancy_bits = ps_ratectrl->i_vbv_occupancy - i_header_bits; + + i64_occupancy_ticks = ( ( ( ( int64_t ) 90000 ) * ps_ratectrl->i_vbv_size ) + ( ps_ratectrl->i_vbvrate - 1 ) ) / ps_ratectrl->i_vbvrate; + if( i64_occupancy_ticks >= 0xffff ) + { + /* buffer too large, signal "*shrug*" */ + i_vbv_delay = 0xffff; + } + else + { + i64_occupancy_ticks = ( ( ( ( int64_t ) 90000 ) * ( i_occupancy_bits + !!ps_ratectrl->i64_vbv_occupancy_fractional ) ) + ( ps_ratectrl->i_vbvrate - 1 ) ) / ps_ratectrl->i_vbvrate; + } + ps_y262->ps_input_picture->i_vbv_delay = ( int32_t ) i64_occupancy_ticks; + } + else + { + int32_t i_vbv_delay, i_occupancy_bits; + int64_t i64_occupancy_ticks; + + if( !ps_y262->b_sequence_cbr ) + { + i_vbv_delay = 0xffff; /* signal vbr */ + } + else + { + i_occupancy_bits = ps_ratectrl->i_vbv_occupancy - i_header_bits; + + i64_occupancy_ticks = ( ( ( ( int64_t ) 90000 ) * ps_ratectrl->i_vbv_size ) + ( ps_ratectrl->i_vbvrate - 1 ) ) / ps_ratectrl->i_vbvrate; + if( i64_occupancy_ticks >= 0xffff ) + { + /* buffer too large, signal vbr as best effort */ + i_vbv_delay = 0xffff; + } + else + { + i64_occupancy_ticks = ( ( ( ( int64_t ) 90000 ) * ( i_occupancy_bits + !!ps_ratectrl->i64_vbv_occupancy_fractional ) ) + ( ps_ratectrl->i_vbvrate - 1 ) ) / ps_ratectrl->i_vbvrate; + } + } + ps_y262->ps_input_picture->i_vbv_delay = ( int32_t ) i64_occupancy_ticks; + } +} + + +bool_t y262_ratectrl_commit_bits( y262_t *ps_y262, int32_t i_bits ) +{ + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + ps_ratectrl->i_vbv_occupancy -= i_bits; + if( ps_ratectrl->i_vbv_occupancy < 0 ) + { + return FALSE; + } + return TRUE; +} + + +int32_t y262_ratectrl_stuffing_bits_needed( y262_t *ps_y262 ) +{ + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + if( ps_ratectrl->i_vbv_occupancy_overflow >= ps_ratectrl->i_vbv_size ) + { + return ps_ratectrl->i_vbv_occupancy_overflow + 2 - ps_ratectrl->i_vbv_size; /* +1 because of fractional, +1 because reasons */ + } + else + { + return 0; + } +} + + +void y262_ratectrl_commit_stuffing_bits( y262_t *ps_y262, int32_t i_bits ) +{ + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + ps_ratectrl->i_vbv_occupancy = ps_ratectrl->i_vbv_occupancy_overflow - i_bits; + ps_ratectrl->i64_vbv_occupancy_fractional = ps_ratectrl->i64_vbv_occupancy_overflow_fractional; + + if( ps_ratectrl->i_vbv_occupancy >= ps_ratectrl->i_vbv_size ) + { + if( ps_y262->s_funcs.pf_error_callback ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_BUFFER, "Not enough stuffing bits commited, internal encoder error." ); + } + ps_ratectrl->i_vbv_occupancy = ps_ratectrl->i_vbv_size; + ps_ratectrl->i64_vbv_occupancy_fractional = 0; + } +} + + +void y262_ratectrl_commit_ticks( y262_t *ps_y262, int32_t i_ticks ) +{ + int64_t i64_vbv_gain_fractional; + int32_t i_vbv_gain; + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + ps_ratectrl->i64_output_ticks += i_ticks; + ps_ratectrl->i64_output_frames++; + if( ps_ratectrl->i64_output_ticks >= ps_ratectrl->i_pulldown_timescale ) + { + ps_ratectrl->i64_output_ticks -= ps_ratectrl->i_pulldown_timescale; + ps_ratectrl->i64_output_frames = 0; + ps_ratectrl->i64_output_seconds++; + } + + i_vbv_gain = ( int32_t)( ( ( ( int64_t )ps_ratectrl->i_vbvrate ) * i_ticks ) / ps_ratectrl->i_pulldown_timescale ); + i64_vbv_gain_fractional = ( ps_ratectrl->i_vbvrate * i_ticks ) % ps_ratectrl->i_pulldown_timescale; + + ps_ratectrl->i_vbv_occupancy += i_vbv_gain; + ps_ratectrl->i64_vbv_occupancy_fractional += i64_vbv_gain_fractional; + while( ps_ratectrl->i64_vbv_occupancy_fractional >= ps_ratectrl->i_pulldown_timescale ) + { + ps_ratectrl->i_vbv_occupancy++; + ps_ratectrl->i64_vbv_occupancy_fractional -= ps_ratectrl->i_pulldown_timescale; + } + + ps_ratectrl->i_vbv_occupancy_overflow = ps_ratectrl->i_vbv_occupancy; + ps_ratectrl->i64_vbv_occupancy_overflow_fractional = ps_ratectrl->i64_vbv_occupancy_fractional; + if( ps_ratectrl->i_vbv_occupancy >= ps_ratectrl->i_vbv_size ) + { + ps_ratectrl->i_vbv_occupancy = ps_ratectrl->i_vbv_size; + ps_ratectrl->i64_vbv_occupancy_fractional = 0; + } +} + + +void y262_ratectrl_end_picture( y262_t *ps_y262, int32_t i_bits ) +{ + int32_t i_picture_cost, i_picture_type, i_picture_ticks; + double d_quantizer, d_cplx, d_bits_quantizer_over_satd, d_prev_set_quantizer; + y262_bitrate_control_t *ps_ratectrl; + y262_picture_t *ps_picture; + + ps_ratectrl = &ps_y262->s_ratectrl; + ps_picture = ps_y262->ps_input_picture; + + d_prev_set_quantizer = ps_ratectrl->i_quantizer / 256.0; + d_quantizer = ps_ratectrl->d_picture_accumulated_quantizer_bits / ps_ratectrl->i_picture_coded_size; + /*fprintf( stderr, " set quant: %f, actual quant: %f\n", ( ( double ) ps_ratectrl->i_quantizer ) / 256.0, d_quantizer );*/ + i_picture_cost = ps_y262->ps_input_picture->i_frame_cost; + i_picture_type = ps_y262->ps_input_picture->i_frame_type; + + d_bits_quantizer_over_satd = ps_ratectrl->d_picture_accumulated_bits_quantizer_over_satd / ps_ratectrl->i_num_picture_accumulated_bits_quantizer_over_satd; + + /* single pass */ + ps_ratectrl->d_target_bits += ( ( ( double )ps_ratectrl->i_bitrate ) * ps_ratectrl->i_picture_duration ) / ps_ratectrl->i_timescale; + ps_ratectrl->d_output_bits += i_bits; + d_cplx = y262_ratectrl_get_cplx( ps_y262, i_picture_cost ); + ps_ratectrl->d_qb_qplx += ( d_quantizer * i_bits ) / d_cplx; + + /* second pass */ + if( ps_ratectrl->i_mode == BITRATE_CONTROL_PASS2 ) + { + if( ps_ratectrl->i_current_sample < ps_ratectrl->i_num_samples ) + { + ps_ratectrl->d_estimated_bits += ps_ratectrl->ps_samples[ ps_ratectrl->i_current_sample ].i_estimated_bits; + ps_ratectrl->i_current_sample++; + } + else + { + /* error */ + } + } + + /*fprintf( stderr, "est: %d, actual %d, %f ( %f )\n", ps_ratectrl->i_predicted_frame_size, i_bits, ( double )ps_ratectrl->i_predicted_frame_size / i_bits, d_prev_set_quantizer );*/ + + /* buffer model */ + /*fprintf( stderr, "actual: %d ( satd: %d )\n", i_bits, i_picture_cost );*/ + if( !y262_ratectrl_commit_bits( ps_y262, i_bits ) ) + { + y262_error( ps_y262, Y262_ERROR_BUFFER, ( int8_t * )"buffer underrun by %d bits", ps_ratectrl->i_vbv_occupancy ); + } + i_picture_ticks = ps_ratectrl->i_pulldown_picture_duration; + i_picture_ticks += ps_y262->ps_input_picture->b_repeat_first_field ? ( ps_ratectrl->i_pulldown_picture_duration / 2 ) : 0; + + y262_ratectrl_commit_ticks( ps_y262, i_picture_ticks ); + + /* update satd predictor */ + if( i_picture_cost > ps_ratectrl->i_min_satd_for_satd_prediction ) + { + int32_t i_baseline_bits = y262_ratectrl_predict_frame_size_baseline( ps_y262, i_picture_type ); + int32_t i_satd_pred_bits = i_bits - i_baseline_bits; + + if( i_satd_pred_bits > ps_ratectrl->i_min_bits_for_satd_prediction ) + { + ps_ratectrl->rgd_satd_predictors[ i_picture_type ] *= 0.5; + ps_ratectrl->rgd_satd_predictors_weight[ i_picture_type ] *= 0.5; + ps_ratectrl->rgd_satd_predictors[ i_picture_type ] += ( ( ( double ) i_satd_pred_bits ) / i_picture_cost ) * d_quantizer; + ps_ratectrl->rgd_satd_predictors_weight[ i_picture_type ] += 1.0; + } + /* + if( i_picture_type != PICTURE_CODING_TYPE_I ) + { + if( ps_picture->i_frame_intra_cost < ( ps_picture->i_frame_cost * 1.2 ) ) + { + ps_ratectrl->rgd_satd_predictors[ PICTURE_CODING_TYPE_I ] *= 0.5; + ps_ratectrl->rgd_satd_predictors_weight[ PICTURE_CODING_TYPE_I ] *= 0.5; + ps_ratectrl->rgd_satd_predictors[ PICTURE_CODING_TYPE_I ] += ( ( ( double ) i_bits ) / i_picture_cost ) * d_prev_set_quantizer; + ps_ratectrl->rgd_satd_predictors_weight[ PICTURE_CODING_TYPE_I ] += 1.0; + } + } + */ + } + else + { + i_picture_cost = i_picture_cost; + } + + /* update confidence */ + if( ps_ratectrl->i_mode == BITRATE_CONTROL_PASS2 ) + { + double d_delta_behind, d_conf_behind; + double d_delta_ahead, d_conf_ahead; + + d_delta_behind = ps_ratectrl->i_predicted_frame_size_behind - i_bits; + d_delta_ahead = ps_ratectrl->i_predicted_frame_size_ahead - i_bits; + + d_delta_behind *= d_delta_behind; + d_delta_ahead *= d_delta_ahead; + + d_conf_behind = 1.0 / MAX( d_delta_behind, 1.0 ); + d_conf_ahead = 1.0 / MAX( d_delta_ahead, 1.0 ); + + /*fprintf( stderr, "pred actual %d, merge: %d, behind: %d ( %f ), ahead: %d ( %f )\n", i_bits, ps_ratectrl->i_predicted_frame_size, ps_ratectrl->i_predicted_frame_size_behind, d_conf_behind, ps_ratectrl->i_predicted_frame_size_ahead, d_conf_ahead );*/ + + ps_ratectrl->d_confidence_predict_behind = ( ps_ratectrl->d_confidence_predict_behind * 0.5 ) + d_conf_behind; + ps_ratectrl->d_confidence_predict_ahead = ( ps_ratectrl->d_confidence_predict_ahead * 0.5 ) + d_conf_ahead; + } + + if( i_picture_type != PICTURE_CODING_TYPE_B ) + { + ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ] = ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ]; + ps_ratectrl->rgd_last_ref_quantizers[ 0 ] = ps_ratectrl->rgd_last_ref_quantizers[ 1 ]; + ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] = ps_y262->ps_input_picture->i_pon; + ps_ratectrl->rgd_last_ref_quantizers[ 1 ] = d_quantizer; + } + + + if( ps_y262->s_funcs.pf_result_callback ) + { + y262_ratectrl_sample_t s_sample; + y262_result_t s_result; + + s_sample.ui8_frame_type = i_picture_type; + s_sample.i_bits = i_bits; + s_sample.i_quantizer_f8 = ( int32_t )( d_quantizer * 256.0 ); + s_sample.i_cplx_f8 = ( int32_t )( d_cplx * 256 ); + s_sample.i_estimated_bits = 0; + s_sample.i_satd_cost = ps_picture->i_frame_cost; + + s_result.rc_sample.i_data_length = sizeof( s_sample ); + s_result.rc_sample.pui8_data = ( uint8_t *)( &s_sample ); + s_result.rc_sample.i_don = ps_y262->ps_input_picture->i_don; + + ps_y262->s_funcs.pf_result_callback( ps_y262->p_cb_handle, Y262_RESULT_RC_SAMPLE, &s_result ); + } +} + + +bool_t y262_ratectrl_check_for_reencode( y262_t *ps_y262, int32_t i_bits ) +{ + int32_t i_picture_cost, i_picture_type, i_predicted_frame_size, i_predicted_frame_size_ahead, i_predicted_frame_size_behind; + double d_quantizer, d_prev_quantizer, d_prev_set_quantizer; + y262_bitrate_control_t *ps_ratectrl; + y262_picture_t *ps_picture; + bool_t b_this_shall_pass = FALSE; + + ps_ratectrl = &ps_y262->s_ratectrl; + + ps_picture = ps_y262->ps_input_picture; + + if( ps_y262->ps_input_picture->i_pon == 48 ) + { + ps_y262 = ps_y262; + } + + if( i_bits < ( ps_ratectrl->i_min_bits_for_satd_prediction * 5 ) / 2 ) + { + b_this_shall_pass = TRUE; + } + + if( !b_this_shall_pass && + ps_ratectrl->b_picture_bad_encountered ) + { + d_prev_set_quantizer = ps_ratectrl->i_quantizer / 256.0; + d_prev_quantizer = ps_ratectrl->d_picture_accumulated_quantizer_bits / ps_ratectrl->i_picture_coded_size; + + i_picture_cost = ps_y262->ps_input_picture->i_frame_cost; + i_picture_type = ps_y262->ps_input_picture->i_frame_type; + + /*if( ( i_picture_cost / ( ( ps_y262->i_sequence_width * ps_y262->i_sequence_height ) / 256 ) ) > 11 ) + { + if( i_bits >( int32_t )( ps_ratectrl->i_predicted_frame_size * 2 ) ) + { + ps_ratectrl->rgd_satd_predictors[ i_picture_type ] = ( ( ( double ) i_bits ) / i_picture_cost ) * d_prev_quantizer; + ps_ratectrl->rgd_satd_predictors_weight[ i_picture_type ] = 1.0; + if( i_picture_type != PICTURE_CODING_TYPE_I ) + { + if( ps_y262->ps_input_picture->i_frame_intra_cost < ( ps_picture->i_frame_cost * 1.2 ) ) + { + ps_ratectrl->rgd_satd_predictors[ PICTURE_CODING_TYPE_I ] = ( ( ( double ) i_bits * 1.2 ) / i_picture_cost ) * d_prev_quantizer; + ps_ratectrl->rgd_satd_predictors_weight[ PICTURE_CODING_TYPE_I ] = 1.0; + } + } + } + d_quantizer = d_prev_quantizer; + } + else*/ + { + d_quantizer = d_prev_quantizer; + ps_ratectrl->i_predicted_frame_size = ps_ratectrl->i_picture_coded_size; + } + + d_quantizer = y262_ratectrl_get_picture_quantizer( ps_y262, ps_y262->ps_input_picture->i_frame_type, d_quantizer ); + + /*fprintf( stderr, "1: pre: %3.2f, enc: %3.2f, post: %3.2f - ( %3.2f ), %d %d\n", d_prev_set_quantizer, d_prev_quantizer, d_quantizer, d_quantizer - d_prev_set_quantizer, i_bits, ps_ratectrl->i_min_bits_for_satd_prediction );*/ + + d_quantizer = y262_ratectrl_clamp_to_vbv( ps_y262, d_quantizer ); + + /*fprintf( stderr, "2: pre: %3.2f, post: %3.2f - ( %3.2f ), %d %d\n", d_prev_set_quantizer, d_quantizer, d_quantizer - d_prev_set_quantizer, i_bits, ps_ratectrl->i_min_bits_for_satd_prediction );*/ + d_quantizer = MIN( 31.0, MAX( 0.1, d_quantizer ) ); + + if( ps_y262->ps_input_picture->i_frame_type == PICTURE_CODING_TYPE_B ) + { + if( ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ] < ps_y262->ps_input_picture->i_pon && + ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] > ps_y262->ps_input_picture->i_pon ) + { + double d_mid_quantizer, d_delta, d_w1, d_w2; + d_delta = ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] - ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ]; + d_w1 = d_delta - ( ps_y262->ps_input_picture->i_pon - ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ] ); + d_w2 = d_delta - ( ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] - ps_y262->ps_input_picture->i_pon ); + d_mid_quantizer = ( ps_ratectrl->rgd_last_ref_quantizers[ 0 ] * d_w1 ) + ( ps_ratectrl->rgd_last_ref_quantizers[ 0 ] * d_w2 ); + d_mid_quantizer = d_mid_quantizer / d_delta; + d_quantizer = MAX( d_mid_quantizer, d_quantizer ); + } + else if( ps_ratectrl->rgi_last_ref_quantizers_pons[ 0 ] > ps_y262->ps_input_picture->i_pon ) + { + d_quantizer = MAX( d_quantizer, ps_ratectrl->rgd_last_ref_quantizers[ 0 ] ); + } + else if( ps_ratectrl->rgi_last_ref_quantizers_pons[ 1 ] < ps_y262->ps_input_picture->i_pon ) + { + d_quantizer = MAX( d_quantizer, ps_ratectrl->rgd_last_ref_quantizers[ 1 ] ); + } + else + { + d_quantizer = MAX( d_quantizer, MAX( ps_ratectrl->rgd_last_ref_quantizers[ 0 ], ps_ratectrl->rgd_last_ref_quantizers[ 1 ] ) ); + } + } + d_quantizer = MIN( 31.0, MAX( 0.1, d_quantizer ) ); + + i_predicted_frame_size = y262_ratectrl_predict_first_frame_size( ps_y262, i_picture_type, i_picture_cost, d_quantizer, ps_y262->ps_input_picture->i_don, 0 ); + i_predicted_frame_size_behind = y262_ratectrl_predict_first_frame_size( ps_y262, i_picture_type, i_picture_cost, d_quantizer, ps_y262->ps_input_picture->i_don, 1 ); + if( ps_ratectrl->i_mode == BITRATE_CONTROL_PASS2 ) + { + i_predicted_frame_size_ahead = y262_ratectrl_predict_first_frame_size( ps_y262, i_picture_type, i_picture_cost, d_quantizer, ps_y262->ps_input_picture->i_don, 2 ); + } + else + { + i_predicted_frame_size_ahead = i_predicted_frame_size_behind; + } + + + if( fabs( d_prev_set_quantizer - d_quantizer ) > ( d_quantizer * 0.1 ) || + ps_ratectrl->b_picture_bad_encountered ) + { + int32_t i_mb_idx, i_num_mb; + + ps_ratectrl->i_quantizer = ( int32_t ) ( d_quantizer * 256.0 ); + + ps_ratectrl->i_picture_accumulated_quantizer = 0; + ps_ratectrl->i_picture_num_accumulated_quantizer = 0; + ps_ratectrl->d_picture_accumulated_quantizer_bits = 0.0; + ps_ratectrl->d_picture_accumulated_bits_quantizer_over_satd = 0.0; + ps_ratectrl->i_num_picture_accumulated_bits_quantizer_over_satd = 0; + ps_ratectrl->i_picture_coded_scaled_satd = 0; + ps_ratectrl->i_picture_coded_size = 0; + ps_ratectrl->b_picture_bad_encountered = FALSE; + ps_ratectrl->i_picture_uncoded_size = 0; + ps_ratectrl->b_picture_reencode_pass = TRUE; +/* ps_ratectrl->i_predicted_frame_size = i_predicted_frame_size; + ps_ratectrl->i_predicted_frame_size_behind = i_predicted_frame_size_behind; + ps_ratectrl->i_predicted_frame_size_ahead = i_predicted_frame_size_ahead;*/ + + i_num_mb = ( ps_y262->i_sequence_width >> 4 ) * ( ps_y262->i_sequence_height >> 4 ); + + for( i_mb_idx = 0; i_mb_idx < i_num_mb; i_mb_idx++ ) + { + int32_t i_coded_bits; + double d_mb_quantizer, d_mb_size_scale; + + d_mb_quantizer = ps_ratectrl->ps_mb_samples[ i_mb_idx ].i_quantizer / 256.0; + d_mb_size_scale = d_quantizer / d_mb_quantizer; + + i_coded_bits = ps_ratectrl->ps_mb_samples[ i_mb_idx ].i_coded_bits; + i_coded_bits = MAX( 0, i_coded_bits - 5 ); + ps_ratectrl->ps_mb_samples[ i_mb_idx ].i_predicted_bits = ( ( int32_t )( i_coded_bits * d_mb_size_scale ) ) + 5; + } + + return TRUE; + } + } + return FALSE; +} + + +void y262_ratectrl_start_slice_encoder( y262_t *ps_y262, y262_slice_encoder_bitrate_control_t *ps_slice_rc, int32_t i_start_mb_addr, int32_t i_end_mb_addr ) +{ + int32_t i_idx; + double d_slice_in_picture, d_slice_base, d_slice_dynamic; + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + ps_slice_rc->i_slice_scaled_satd = 0; + for( i_idx = i_start_mb_addr; i_idx <= i_end_mb_addr; i_idx++ ) + { + ps_slice_rc->i_slice_scaled_satd += ps_ratectrl->ps_mb_samples[ i_idx ].i_scaled_satd; + } + + if( ps_y262->ps_input_picture->i_pon == 2652 ) + { + ps_y262 = ps_y262; + } + + d_slice_in_picture = ( ( double )ps_slice_rc->i_slice_scaled_satd ) / ps_ratectrl->i_picture_scaled_satd; + d_slice_base = ( ( double )ps_ratectrl->i_picture_bit_budget ) * 0.2 * ps_y262->i_num_slice_encoders; + d_slice_dynamic = ( ( double )ps_ratectrl->i_picture_bit_budget ) - d_slice_base; + d_slice_base /= ps_y262->i_num_slice_encoders; + d_slice_dynamic *= d_slice_in_picture; + + ps_slice_rc->i_slice_bit_budget = ( int32_t ) ( ( d_slice_base + d_slice_dynamic ) * 0.8 ); + ps_slice_rc->i_slice_bit_budget_extra = ( int32_t )( ( d_slice_base + d_slice_dynamic ) * 0.2 ); + + ps_slice_rc->i_slice_accumulated_quantizer = 0; + ps_slice_rc->i_slice_num_accumulated_quantizer = 0; + ps_slice_rc->i_slice_coded_scaled_satd = 0; + ps_slice_rc->i_slice_coded_size = 0; + ps_slice_rc->d_slice_accumulated_quantizer_bits = 0.0; + ps_slice_rc->d_slice_accumulated_bits_quantizer_over_satd = 0.0; + ps_slice_rc->i_num_slice_accumulated_bits_quantizer_over_satd = 0; + ps_slice_rc->b_slice_bad_encountered = FALSE; + + ps_slice_rc->b_reencode_pass = ps_ratectrl->b_picture_reencode_pass; + ps_slice_rc->i_slice_accumulated_predicted_size = 0; +} + +void y262_ratectrl_end_slice_encoder( y262_t *ps_y262, y262_slice_encoder_bitrate_control_t *ps_slice_rc ) +{ + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + ps_ratectrl->i_picture_accumulated_quantizer += ps_slice_rc->i_slice_accumulated_quantizer; + ps_ratectrl->i_picture_num_accumulated_quantizer += ps_slice_rc->i_slice_num_accumulated_quantizer; + ps_ratectrl->d_picture_accumulated_quantizer_bits += ps_slice_rc->d_slice_accumulated_quantizer_bits; + ps_ratectrl->i_picture_coded_scaled_satd += ps_slice_rc->i_slice_coded_scaled_satd; + ps_ratectrl->i_picture_coded_size += ps_slice_rc->i_slice_coded_size; + ps_ratectrl->d_picture_accumulated_bits_quantizer_over_satd += ps_slice_rc->d_slice_accumulated_bits_quantizer_over_satd; + ps_ratectrl->i_num_picture_accumulated_bits_quantizer_over_satd += ps_slice_rc->i_num_slice_accumulated_bits_quantizer_over_satd; + ps_ratectrl->b_picture_bad_encountered = ps_ratectrl->b_picture_bad_encountered || ps_slice_rc->b_slice_bad_encountered; + ps_ratectrl->i_picture_uncoded_size += ( ps_slice_rc->i_slice_bit_budget + ps_slice_rc->i_slice_bit_budget_extra ) - ps_slice_rc->i_slice_coded_size; +} + + +int32_t y262_ratectrl_get_slice_mb_quantizer( y262_t *ps_y262, y262_slice_encoder_bitrate_control_t *ps_slice_rc, int32_t i_mb_addr ) +{ + int32_t i_quantizer, i_extra_bits, i_baseline, i_adjusted_slice_coded_size; + double d_quantizer, d_adjusted_quantizer, d_adjustment_weight; + y262_bitrate_control_t *ps_ratectrl; + + ps_ratectrl = &ps_y262->s_ratectrl; + + d_quantizer = ps_ratectrl->i_quantizer / 256.0; + + if( ps_slice_rc->i_slice_coded_scaled_satd > ( ps_slice_rc->i_slice_scaled_satd / 20 ) || + ps_slice_rc->i_slice_coded_size > ( ps_slice_rc->i_slice_bit_budget / 20 ) ) + { + i_baseline = ( ps_slice_rc->i_slice_num_accumulated_quantizer * 5 ); + i_adjusted_slice_coded_size = ps_slice_rc->i_slice_coded_size - i_baseline; + i_adjusted_slice_coded_size = MAX( 0, i_adjusted_slice_coded_size ); + + i_extra_bits = ( int32_t )( ( ps_slice_rc->i_slice_bit_budget_extra * 0.8 ) * ( ( ( double )ps_slice_rc->i_slice_coded_scaled_satd ) / ps_slice_rc->i_slice_scaled_satd ) ); + i_extra_bits += ( int32_t )( ( ps_slice_rc->i_slice_bit_budget_extra * 0.2 ) * MAX( 0.0, + ( ( ( double )ps_slice_rc->i_slice_coded_scaled_satd - ( ( ps_slice_rc->i_slice_scaled_satd * 4 ) / 5 ) ) / MAX( 1, ( ps_slice_rc->i_slice_scaled_satd - ( ( ps_slice_rc->i_slice_scaled_satd * 4 ) / 5 ) ) ) ) ) ); + i_extra_bits -= i_baseline; + + if( !ps_slice_rc->b_reencode_pass ) + { + if( ( ps_slice_rc->i_slice_bit_budget + i_extra_bits - i_adjusted_slice_coded_size ) > ( 512 + 9 ) ) + { + d_adjusted_quantizer = ( ( ( ( ps_slice_rc->i_slice_scaled_satd - ps_slice_rc->i_slice_coded_scaled_satd ) * 11 ) / 10 ) * ( ps_slice_rc->d_slice_accumulated_quantizer_bits / MAX( 1, ps_slice_rc->i_slice_coded_scaled_satd ) ) ) / + ( ( ( ( ps_slice_rc->i_slice_bit_budget + i_extra_bits - i_adjusted_slice_coded_size ) - 512 ) * 9 ) / 10 ); + d_adjusted_quantizer += ( d_adjusted_quantizer - d_quantizer ) * 0.1; + } + else + { + d_adjusted_quantizer = 9000.0; + } + } + else + { + int32_t i_adjusted_slice_predicted_size; + i_adjusted_slice_predicted_size = ps_slice_rc->i_slice_accumulated_predicted_size; + i_adjusted_slice_predicted_size -= i_baseline; + i_adjusted_slice_predicted_size = MAX( 0, i_adjusted_slice_predicted_size ); + + if( ( ps_slice_rc->i_slice_bit_budget + i_extra_bits - i_adjusted_slice_coded_size ) > ( 512 + 9 ) ) + { + double d_scale; + if( i_adjusted_slice_coded_size > 0 && i_adjusted_slice_predicted_size > 0 ) + { + d_scale = i_adjusted_slice_coded_size / ( double ) i_adjusted_slice_predicted_size; + } + + d_adjusted_quantizer = d_quantizer * d_scale; + d_adjusted_quantizer += ( d_adjusted_quantizer - d_quantizer ) * 0.1; + } + else + { + d_adjusted_quantizer = 9000.0; + } + } + d_adjustment_weight = MIN( 1.0, ( ( double )i_adjusted_slice_coded_size ) / MAX( 1, ( ( ps_slice_rc->i_slice_bit_budget + i_extra_bits ) / 4 ) ) ); + + d_adjusted_quantizer = ( d_quantizer * ( 1.0 - d_adjustment_weight ) ) + ( d_adjusted_quantizer * d_adjustment_weight ); + + if( d_adjusted_quantizer > ( d_quantizer * 1.5 ) || ( ( d_adjusted_quantizer - d_quantizer ) > 2.0 ) ) + { + /*fprintf( stderr, "BAD: %d %d ( %d ): %f ( %f )\n", ps_y262->ps_input_picture->i_pon, i_mb_addr, ps_slice_rc->i_slice_coded_scaled_satd, d_adjusted_quantizer - d_quantizer, d_adjusted_quantizer );*/ + ps_slice_rc->b_slice_bad_encountered = TRUE; + + } + d_quantizer = MAX( d_quantizer, d_adjusted_quantizer ); + } + + d_quantizer = MIN( 31.0, MAX( 0.5, d_quantizer ) ); /* 0.5 because it might be increased by aq */ + i_quantizer = ( int32_t ) ( d_quantizer * 256.0 ); + + ps_slice_rc->i_mb_queued_quantizer_f8 = i_quantizer; /* 128-* */ + return i_quantizer; +} + +void y262_ratectrl_update_slice_mb( y262_t *ps_y262, y262_slice_encoder_bitrate_control_t *ps_slice_rc, int32_t i_mb_addr, int32_t i_mb_bits ) +{ + y262_bitrate_control_t *ps_ratectrl; + int32_t i_scaled_satd_cost; + int32_t i_satd, i_quantizer; + + ps_ratectrl = &ps_y262->s_ratectrl; + + i_quantizer = MAX( 256, ps_slice_rc->i_mb_queued_quantizer_f8 ); /* clamp to valid range to not anger 2pass mechanics */ + i_scaled_satd_cost = ps_ratectrl->ps_mb_samples[ i_mb_addr ].i_scaled_satd; + i_satd = ps_ratectrl->ps_mb_samples[ i_mb_addr ].i_satd; + + ps_slice_rc->i_slice_coded_scaled_satd += i_scaled_satd_cost; + ps_slice_rc->i_slice_coded_size += i_mb_bits; + ps_slice_rc->i_slice_accumulated_quantizer += i_quantizer; + ps_slice_rc->i_slice_num_accumulated_quantizer++; + ps_slice_rc->d_slice_accumulated_quantizer_bits += ( i_quantizer * MAX( 1, i_mb_bits ) ) / 256.0; + if( i_satd > 10 ) + { + ps_slice_rc->d_slice_accumulated_bits_quantizer_over_satd = ( ( double ) ( i_mb_bits * ps_slice_rc->i_mb_queued_quantizer_f8 ) ) / i_satd; + ps_slice_rc->i_num_slice_accumulated_bits_quantizer_over_satd++; + } + + ps_ratectrl->ps_mb_samples[ i_mb_addr ].i_quantizer = ps_slice_rc->i_mb_queued_quantizer_f8; + ps_ratectrl->ps_mb_samples[ i_mb_addr ].i_coded_bits = i_mb_bits; + ps_slice_rc->i_slice_accumulated_predicted_size += ps_ratectrl->ps_mb_samples[ i_mb_addr ].i_predicted_bits; + +} + + + + + diff --git a/src/y262/ratectrl.h b/src/y262/ratectrl.h new file mode 100644 index 0000000..7d81887 --- /dev/null +++ b/src/y262/ratectrl.h @@ -0,0 +1,43 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +bool_t y262_ratectrl_init( y262_t *ps_y262 ); +void y262_ratectrl_deinit( y262_t *ps_y262 ); +void y262_ratectrl_start_picture( y262_t *ps_y262, int32_t i_bits_baseline ); +void y262_ratectrl_end_picture( y262_t *ps_y262, int32_t i_bits ); +bool_t y262_ratectrl_check_for_reencode( y262_t *ps_y262, int32_t i_bits ); + +void y262_ratectrl_start_slice_encoder( y262_t *ps_y262, y262_slice_encoder_bitrate_control_t *ps_slice_rc, int32_t i_start_mb_addr, int32_t i_end_mb_addr ); +void y262_ratectrl_end_slice_encoder( y262_t *ps_y262, y262_slice_encoder_bitrate_control_t *ps_slice_rc ); +int32_t y262_ratectrl_get_slice_mb_quantizer( y262_t *ps_y262, y262_slice_encoder_bitrate_control_t *ps_slice_rc, int32_t i_mb_addr ); +void y262_ratectrl_update_slice_mb( y262_t *ps_y262, y262_slice_encoder_bitrate_control_t *ps_slice_rc, int32_t i_mb_addr, int32_t i_mb_bits ); + +int32_t y262_ratectrl_stuffing_bits_needed( y262_t *ps_y262 ); +void y262_ratectrl_commit_stuffing_bits( y262_t *ps_y262, int32_t i_bits ); \ No newline at end of file diff --git a/src/y262/tables.c b/src/y262/tables.c new file mode 100644 index 0000000..42cadd0 --- /dev/null +++ b/src/y262/tables.c @@ -0,0 +1,1122 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + +int32_t rgi_y262_framerate_code_duration[ 16 ] = { + 3600, 1001, 1000, 3600, 1001, 3000, 1800, 1001, 1500, 3600, 3600, 3600, 3600, 3600, 3600, 3600 +}; + +int32_t rgi_y262_framerate_code_timescale[ 16 ] = { + 90000, 24000, 24000, 90000, 30000, 90000, 90000, 60000, 90000, 90000, 90000, 90000, 90000, 90000, 90000, 90000 +}; + +uint8_t rgui8_y262_default_intra_matrix[ 64 ] = { + 8, 16, 19, 22, 26, 27, 29, 34, + 16, 16, 22, 24, 27, 29, 34, 37, + 19, 22, 26, 27, 29, 34, 34, 38, + 22, 22, 26, 27, 29, 34, 37, 40, + 22, 26, 27, 29, 32, 35, 40, 48, + 26, 27, 29, 32, 35, 40, 48, 58, + 26, 27, 29, 34, 38, 46, 56, 69, + 27, 29, 35, 38, 46, 56, 69, 83 +}; + +uint8_t rgui8_y262_default_non_intra_matrix[ 64 ] = { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 +}; + +uint8_t rgui8_y262_scan_0_table[] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +uint8_t rgui8_y262_scan_1_table[] = { + 0, 8, 16, 24, 1, 9, 2, 10, + 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, + 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, + 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, + 38, 46, 54, 62, 39, 47, 55, 63 +}; + +int32_t rgi_y262_block_type_dims[][ 2 ] = { + { 16, 16 }, + { 16, 8 } +}; + +int32_t rgui_y262_luma_blk_offsets[ 4 ][ 2 ] = { + { 0, 0 }, + { 8, 0 }, + { 0, 8 }, + { 8, 8 }, +}; + +int32_t rgui_y262_luma_il_blk_offsets[ 4 ][ 2 ] = { + { 0, 0 }, + { 8, 0 }, + { 0, 1 }, + { 8, 1 }, +}; + +int32_t rgui_num_chroma_blk[ 4 ] = { + 1, + 1, + 2, + 4 +}; + +int32_t rgui_y262_chroma_blk_offsets[ 4 ][ 4 ][ 2 ] = { + { + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + }, + { + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + }, + { + { 0, 0 }, + { 0, 8 }, + { 0, 0 }, + { 0, 0 }, + }, + { + { 0, 0 }, + { 8, 0 }, + { 0, 8 }, + { 8, 8 }, + }, +}; + +int32_t rgui_y262_chroma_il_blk_offsets[ 4 ][ 4 ][ 2 ] = { + { + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + }, + { + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + }, + { + { 0, 0 }, + { 0, 1 }, + { 0, 0 }, + { 0, 0 }, + }, + { + { 0, 0 }, + { 8, 0 }, + { 0, 1 }, + { 8, 1 }, + }, +}; + + +y262_vlc_t rgs_y262_macroblock_address_increment_table[] = { + { 1, 1 }, + { 3, 3 }, + { 2, 3 }, + { 3, 4 }, + { 2, 4 }, + { 3, 5 }, + { 2, 5 }, + { 7, 7 }, + { 6, 7 }, + { 11, 8 }, + { 10, 8 }, + { 9, 8 }, + { 8, 8 }, + { 7, 8 }, + { 6, 8 }, + { 23, 10 }, + { 22, 10 }, + { 21, 10 }, + { 20, 10 }, + { 19, 10 }, + { 18, 10 }, + { 35, 11 }, + { 34, 11 }, + { 33, 11 }, + { 32, 11 }, + { 31, 11 }, + { 30, 11 }, + { 29, 11 }, + { 28, 11 }, + { 27, 11 }, + { 26, 11 }, + { 25, 11 }, + { 24, 11 }, + { 8, 11 }, + { VLC_SENTINEL, VLC_SENTINEL } +}; + +y262_vlc_t rgs_y262_macroblock_type_i_picture_table[] = { + { 1, 1 }, + { 1, 2 }, + { VLC_SENTINEL, VLC_SENTINEL } +}; + + +int32_t rgui_y262_macroblock_type_i_picture_flags_table[] = { + MACROBLOCK_INTRA, + MACROBLOCK_INTRA | MACROBLOCK_QUANT +}; + + + +y262_vlc_t rgs_y262_macroblock_type_p_picture_table[] = { + { 1, 1 }, + { 1, 2 }, + { 1, 3 }, + { 3, 5 }, + { 2, 5 }, + { 1, 5 }, + { 1, 6 }, + { VLC_SENTINEL, VLC_SENTINEL } +}; + + +int32_t rgui_y262_macroblock_type_p_picture_flags_table[] = { + MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN, + MACROBLOCK_PATTERN, + MACROBLOCK_MOTION_FORWARD, + MACROBLOCK_INTRA, + MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN, + MACROBLOCK_QUANT | MACROBLOCK_PATTERN, + MACROBLOCK_QUANT | MACROBLOCK_INTRA +}; + + +y262_vlc_t rgs_y262_macroblock_type_b_picture_table[] = { + { 2, 2 }, + { 3, 2 }, + { 2, 3 }, + { 3, 3 }, + { 2, 4 }, + { 3, 4 }, + { 3, 5 }, + { 2, 5 }, + { 3, 6 }, + { 2, 6 }, + { 1, 6 }, + { VLC_SENTINEL, VLC_SENTINEL } +}; + + +int32_t rgui_y262_macroblock_type_b_picture_flags_table[] = { + MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD, + MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN, + MACROBLOCK_MOTION_BACKWARD, + MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN, + MACROBLOCK_MOTION_FORWARD, + MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN, + MACROBLOCK_INTRA, + MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN, + MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN, + MACROBLOCK_QUANT | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN, + MACROBLOCK_QUANT | MACROBLOCK_INTRA +}; + + +y262_vlc_t rgs_y262_dct_dc_size_luminance_table[] = { + { 0, 2 }, /* 1 */ + { 1, 2 }, /* 2 */ + { 4, 3 }, /* 0 */ + { 5, 3 }, /* 3 */ + { 6, 3 }, /* 4 */ + { 14, 4 }, /* 5 */ + { 30, 5 }, /* 6 */ + { 62, 6 }, /* 7 */ + { 126, 7 }, /* 8 */ + { 254, 8 }, /* 9 */ + { 510, 9 }, /* 10 */ + { 511, 9 }, /* 11 */ + { VLC_SENTINEL, VLC_SENTINEL } +}; + +int32_t rgi_y262_dct_dc_size_luminance_lookup_table[] = { + 1, + 2, + 0, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11 +}; + +y262_vlc_t rgs_y262_dct_dc_size_chrominance_table[] = { + { 0, 2 }, /* 0 */ + { 1, 2 }, /* 1 */ + { 2, 2 }, /* 2 */ + { 6, 3 }, /* 3 */ + { 14, 4 }, /* 4 */ + { 30, 5 }, /* 5 */ + { 62, 6 }, /* 6 */ + { 126, 7 }, /* 7 */ + { 254, 8 }, /* 8 */ + { 510, 9 }, /* 9 */ + { 1022, 10 }, /* 10 */ + { 1023, 10 }, /* 11 */ + { VLC_SENTINEL, VLC_SENTINEL } +}; + + +int32_t rgi_y262_dct_dc_size_chrominance_lookup_table[] = { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11 +}; + + +y262_vlc_t rgs_y262_dct_coefficients_table_zero[] = { + { 2, 2 }, /* (Note 2) End of Block */ + { 3, 2 }, /* s (Note 4) 0 1 */ + { 3, 3 }, /* s 1 1 */ + { 4, 4 }, /* s 0 2 */ + { 5, 4 }, /* s 2 1 */ + { 5, 5 }, /* s 0 3 */ + { 7, 5 }, /* s 3 1 */ + { 6, 5 }, /* s 4 1 */ + { 6, 6 }, /* s 1 2 */ + { 7, 6 }, /* s 5 1 */ + { 5, 6 }, /* s 6 1 */ + { 4, 6 }, /* s 7 1 */ + { 1, 6 }, /* Escape */ + { 6, 7 }, /* s 0 4 */ + { 4, 7 }, /* s 2 2 */ + { 7, 7 }, /* s 8 1 */ + { 5, 7 }, /* s 9 1 */ + { 38, 8 }, /* s 0 5 */ + { 33, 8 }, /* s 0 6 */ + { 37, 8 }, /* s 1 3 */ + { 36, 8 }, /* s 3 2 */ + { 39, 8 }, /* s 10 1 */ + { 35, 8 }, /* s 11 1 */ + { 34, 8 }, /* s 12 1 */ + { 32, 8 }, /* s 13 1 */ + { 10, 10 }, /* s 0 7 */ + { 12, 10 }, /* s 1 4 */ + { 11, 10 }, /* s 2 3 */ + { 15, 10 }, /* s 4 2 */ + { 9, 10 }, /* s 5 2 */ + { 14, 10 }, /* s 14 1 */ + { 13, 10 }, /* s 15 1 */ + { 8, 10 }, /* s 16 1 */ + { 29, 12 }, /* s 0 8 */ + { 24, 12 }, /* s 0 9 */ + { 19, 12 }, /* s 0 10 */ + { 16, 12 }, /* s 0 11 */ + { 27, 12 }, /* s 1 5 */ + { 20, 12 }, /* s 2 4 */ + { 28, 12 }, /* s 3 3 */ + { 18, 12 }, /* s 4 3 */ + { 30, 12 }, /* s 6 2 */ + { 21, 12 }, /* s 7 2 */ + { 17, 12 }, /* s 8 2 */ + { 31, 12 }, /* s 17 1 */ + { 26, 12 }, /* s 18 1 */ + { 25, 12 }, /* s 19 1 */ + { 23, 12 }, /* s 20 1 */ + { 22, 12 }, /* s 21 1 */ + { 26, 13 }, /* s 0 12 */ + { 25, 13 }, /* s 0 13 */ + { 24, 13 }, /* s 0 14 */ + { 23, 13 }, /* s 0 15 */ + { 22, 13 }, /* s 1 6 */ + { 21, 13 }, /* s 1 7 */ + { 20, 13 }, /* s 2 5 */ + { 19, 13 }, /* s 3 4 */ + { 18, 13 }, /* s 5 3 */ + { 17, 13 }, /* s 9 2 */ + { 16, 13 }, /* s 10 2 */ + { 31, 13 }, /* s 22 1 */ + { 30, 13 }, /* s 23 1 */ + { 29, 13 }, /* s 24 1 */ + { 28, 13 }, /* s 25 1 */ + { 27, 13 }, /* s 26 1 */ + { 31, 14 }, /* s 0 16 */ + { 30, 14 }, /* s 0 17 */ + { 29, 14 }, /* s 0 18 */ + { 28, 14 }, /* s 0 19 */ + { 27, 14 }, /* s 0 20 */ + { 26, 14 }, /* s 0 21 */ + { 25, 14 }, /* s 0 22 */ + { 24, 14 }, /* s 0 23 */ + { 23, 14 }, /* s 0 24 */ + { 22, 14 }, /* s 0 25 */ + { 21, 14 }, /* s 0 26 */ + { 20, 14 }, /* s 0 27 */ + { 19, 14 }, /* s 0 28 */ + { 18, 14 }, /* s 0 29 */ + { 17, 14 }, /* s 0 30 */ + { 16, 14 }, /* s 0 31 */ + { 24, 15 }, /* s 0 32 */ + { 23, 15 }, /* s 0 33 */ + { 22, 15 }, /* s 0 34 */ + { 21, 15 }, /* s 0 35 */ + { 20, 15 }, /* s 0 36 */ + { 19, 15 }, /* s 0 37 */ + { 18, 15 }, /* s 0 38 */ + { 17, 15 }, /* s 0 39 */ + { 16, 15 }, /* s 0 40 */ + { 31, 15 }, /* s 1 8 */ + { 30, 15 }, /* s 1 9 */ + { 29, 15 }, /* s 1 10 */ + { 28, 15 }, /* s 1 11 */ + { 27, 15 }, /* s 1 12 */ + { 26, 15 }, /* s 1 13 */ + { 25, 15 }, /* s 1 14 */ + { 19, 16 }, /* s 1 15 */ + { 18, 16 }, /* s 1 16 */ + { 17, 16 }, /* s 1 17 */ + { 16, 16 }, /* s 1 18 */ + { 20, 16 }, /* s 6 3 */ + { 26, 16 }, /* s 11 2 */ + { 25, 16 }, /* s 12 2 */ + { 24, 16 }, /* s 13 2 */ + { 23, 16 }, /* s 14 2 */ + { 22, 16 }, /* s 15 2 */ + { 21, 16 }, /* s 16 2 */ + { 31, 16 }, /* s 27 1 */ + { 30, 16 }, /* s 28 1 */ + { 29, 16 }, /* s 29 1 */ + { 28, 16 }, /* s 30 1 */ + { 27, 16 }, /* s 31 1 */ + { VLC_SENTINEL, VLC_SENTINEL } +}; + +y262_run_level_t rgs_y262_dct_coefficients_lookup_table_zero[] = { + { RUN_LEVEL_END_OF_BLOCK, RUN_LEVEL_END_OF_BLOCK }, + { 0, 1 }, /**/ + { 1, 1 }, /**/ + { 0, 2 }, /**/ + { 2, 1 }, /**/ + { 0, 3 }, /**/ + { 3, 1 }, /**/ + { 4, 1 }, /**/ + { 1, 2 }, /**/ + { 5, 1 }, /**/ + { 6, 1 }, /**/ + { 7, 1 }, /**/ + { RUN_LEVEL_ESCAPE, RUN_LEVEL_ESCAPE }, + { 0, 4 }, /**/ + { 2, 2 }, /**/ + { 8, 1 }, /**/ + { 9, 1 }, /**/ + { 0, 5 }, /**/ + { 0, 6 }, /**/ + { 1, 3 }, /**/ + { 3, 2 }, /**/ + { 10, 1 }, /**/ + { 11, 1 }, /**/ + { 12, 1 }, /**/ + { 13, 1 }, /**/ + { 0, 7 }, /**/ + { 1, 4 }, /**/ + { 2, 3 }, /**/ + { 4, 2 }, /**/ + { 5, 2 }, /**/ + { 14, 1 }, /**/ + { 15, 1 }, /**/ + { 16, 1 }, /**/ + { 0, 8 }, /**/ + { 0, 9 }, /**/ + { 0, 10 }, /**/ + { 0, 11 }, /**/ + { 1, 5 }, /**/ + { 2, 4 }, /**/ + { 3, 3 }, /**/ + { 4, 3 }, /**/ + { 6, 2 }, /**/ + { 7, 2 }, /**/ + { 8, 2 }, /**/ + { 17, 1 }, /**/ + { 18, 1 }, /**/ + { 19, 1 }, /**/ + { 20, 1 }, /**/ + { 21, 1 }, /**/ + { 0, 12 }, /**/ + { 0, 13 }, /**/ + { 0, 14 }, /**/ + { 0, 15 }, /**/ + { 1, 6 }, /**/ + { 1, 7 }, /**/ + { 2, 5 }, /**/ + { 3, 4 }, /**/ + { 5, 3 }, /**/ + { 9, 2 }, /**/ + { 10, 2 }, /**/ + { 22, 1 }, /**/ + { 23, 1 }, /**/ + { 24, 1 }, /**/ + { 25, 1 }, /**/ + { 26, 1 }, /**/ + { 0, 16 }, /**/ + { 0, 17 }, /**/ + { 0, 18 }, /**/ + { 0, 19 }, /**/ + { 0, 20 }, /**/ + { 0, 21 }, /**/ + { 0, 22 }, /**/ + { 0, 23 }, /**/ + { 0, 24 }, /**/ + { 0, 25 }, /**/ + { 0, 26 }, /**/ + { 0, 27 }, /**/ + { 0, 28 }, /**/ + { 0, 29 }, /**/ + { 0, 30 }, /**/ + { 0, 31 }, /**/ + { 0, 32 }, /**/ + { 0, 33 }, /**/ + { 0, 34 }, /**/ + { 0, 35 }, /**/ + { 0, 36 }, /**/ + { 0, 37 }, /**/ + { 0, 38 }, /**/ + { 0, 39 }, /**/ + { 0, 40 }, /**/ + { 1, 8 }, /**/ + { 1, 9 }, /**/ + { 1, 10 }, /**/ + { 1, 11 }, /**/ + { 1, 12 }, /**/ + { 1, 13 }, /**/ + { 1, 14 }, /**/ + { 1, 15 }, /**/ + { 1, 16 }, /**/ + { 1, 17 }, /**/ + { 1, 18 }, /**/ + { 6, 3 }, /**/ + { 11, 2 }, /**/ + { 12, 2 }, /**/ + { 13, 2 }, /**/ + { 14, 2 }, /**/ + { 15, 2 }, /**/ + { 16, 2 }, /**/ + { 27, 1 }, /**/ + { 28, 1 }, /**/ + { 29, 1 }, /**/ + { 30, 1 }, /**/ + { 31, 1 } /**/ +}; + +int8_t rgi_y262_run_level_bits_zero[ 32 ][ 41 ] = { + { 24, 3, 5, 6, 8, 9, 9, 11, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16 }, + { 24, 4, 7, 9, 11, 13, 14, 14, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 5, 8, 11, 13, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 6, 9, 13, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 6, 11, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 7, 11, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 7, 13, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 7, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 8, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 8, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 9, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 9, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 9, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 9, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 11, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 11, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 11, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 14, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }, + { 24, 17, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 } +}; + + +y262_vlc_t rgs_y262_dct_coefficients_table_one[] = { + { 2, 2 }, /* s 0 1 */ + { 2, 3 }, /* s 1 1 */ + { 6, 3 }, /* s 0 2 */ + { 7, 4 }, /* s 0 3 */ + { 6, 4 }, /* End of Block */ + { 5, 5 }, /* s 2 1 */ + { 7, 5 }, /* s 3 1 */ + { 6, 5 }, /* s 1 2 */ + { 28, 5 }, /* s 0 4 */ + { 29, 5 }, /* s 0 5 */ + { 4, 6 }, /* s 0 7 */ + { 5, 6 }, /* s 0 6 */ + { 1, 6 }, /* Escape */ + { 7, 6 }, /* s 5 1 */ + { 6, 6 }, /* s 4 1 */ + { 6, 7 }, /* s 6 1 */ + { 4, 7 }, /* s 7 1 */ + { 121, 7 }, /* s 1 3 */ + { 122, 7 }, /* s 10 1 */ + { 123, 7 }, /* s 0 8 */ + { 124, 7 }, /* s 0 9 */ + { 7, 7 }, /* s 2 2 */ + { 5, 7 }, /* s 8 1 */ + { 120, 7 }, /* s 9 1 */ + { 38, 8 }, /* s 3 2 */ + { 33, 8 }, /* s 11 1 */ + { 37, 8 }, /* s 12 1 */ + { 36, 8 }, /* s 13 1 */ + { 39, 8 }, /* s 1 4 */ + { 252, 8 }, /* s 2 3 */ + { 253, 8 }, /* s 4 2 */ + { 35, 8 }, /* s 0 10 */ + { 34, 8 }, /* s 0 11 */ + { 32, 8 }, /* s 1 5 */ + { 250, 8 }, /* s 0 12 */ + { 251, 8 }, /* s 0 13 */ + { 254, 8 }, /* s 0 14 */ + { 255, 8 }, /* s 0 15 */ + { 4, 9 }, /* s 5 2 */ + { 5, 9 }, /* s 14 1 */ + { 7, 9 }, /* s 15 1 */ + { 13, 10 }, /* s 16 1 */ + { 12, 10 }, /* s 2 4 */ + { 28, 12 }, /* s 3 3 */ + { 18, 12 }, /* s 4 3 */ + { 30, 12 }, /* s 6 2 */ + { 21, 12 }, /* s 7 2 */ + { 17, 12 }, /* s 8 2 */ + { 31, 12 }, /* s 17 1 */ + { 26, 12 }, /* s 18 1 */ + { 25, 12 }, /* s 19 1 */ + { 23, 12 }, /* s 20 1 */ + { 22, 12 }, /* s 21 1 */ + { 22, 13 }, /* s 1 6 */ + { 21, 13 }, /* s 1 7 */ + { 20, 13 }, /* s 2 5 */ + { 19, 13 }, /* s 3 4 */ + { 18, 13 }, /* s 5 3 */ + { 17, 13 }, /* s 9 2 */ + { 16, 13 }, /* s 10 2 */ + { 31, 13 }, /* s 22 1 */ + { 30, 13 }, /* s 23 1 */ + { 29, 13 }, /* s 24 1 */ + { 28, 13 }, /* s 25 1 */ + { 27, 13 }, /* s 26 1 */ + { 31, 14 }, /* s 0 16 */ + { 30, 14 }, /* s 0 17 */ + { 29, 14 }, /* s 0 18 */ + { 28, 14 }, /* s 0 19 */ + { 27, 14 }, /* s 0 20 */ + { 26, 14 }, /* s 0 21 */ + { 25, 14 }, /* s 0 22 */ + { 24, 14 }, /* s 0 23 */ + { 23, 14 }, /* s 0 24 */ + { 22, 14 }, /* s 0 25 */ + { 21, 14 }, /* s 0 26 */ + { 20, 14 }, /* s 0 27 */ + { 19, 14 }, /* s 0 28 */ + { 18, 14 }, /* s 0 29 */ + { 17, 14 }, /* s 0 30 */ + { 16, 14 }, /* s 0 31 */ + { 24, 15 }, /* s 0 32 */ + { 23, 15 }, /* s 0 33 */ + { 22, 15 }, /* s 0 34 */ + { 21, 15 }, /* s 0 35 */ + { 20, 15 }, /* s 0 36 */ + { 19, 15 }, /* s 0 37 */ + { 18, 15 }, /* s 0 38 */ + { 17, 15 }, /* s 0 39 */ + { 16, 15 }, /* s 0 40 */ + { 31, 15 }, /* s 1 8 */ + { 30, 15 }, /* s 1 9 */ + { 29, 15 }, /* s 1 10 */ + { 28, 15 }, /* s 1 11 */ + { 27, 15 }, /* s 1 12 */ + { 26, 15 }, /* s 1 13 */ + { 25, 15 }, /* s 1 14 */ + { 19, 16 }, /* s 1 15 */ + { 18, 16 }, /* s 1 16 */ + { 17, 16 }, /* s 1 17 */ + { 16, 16 }, /* s 1 18 */ + { 20, 16 }, /* s 6 3 */ + { 26, 16 }, /* s 11 2 */ + { 25, 16 }, /* s 12 2 */ + { 24, 16 }, /* s 13 2 */ + { 23, 16 }, /* s 14 2 */ + { 22, 16 }, /* s 15 2 */ + { 21, 16 }, /* s 16 2 */ + { 31, 16 }, /* s 27 1 */ + { 30, 16 }, /* s 28 1 */ + { 29, 16 }, /* s 29 1 */ + { 28, 16 }, /* s 30 1 */ + { 27, 16 }, /* s 31 1 */ + { VLC_SENTINEL, VLC_SENTINEL } +}; + +y262_run_level_t rgs_y262_dct_coefficients_lookup_table_one[] = { + { 0, 1 }, /**/ + { 1, 1 }, /**/ + { 0, 2 }, /**/ + { 0, 3 }, /**/ + { RUN_LEVEL_END_OF_BLOCK, RUN_LEVEL_END_OF_BLOCK }, /* End of Block */ + { 2, 1 }, /**/ + { 3, 1 }, /**/ + { 1, 2 }, /**/ + { 0, 4 }, /**/ + { 0, 5 }, /**/ + { 0, 7 }, /**/ + { 0, 6 }, /**/ + { RUN_LEVEL_ESCAPE, RUN_LEVEL_ESCAPE }, /* Escape */ + { 5, 1 }, /**/ + { 4, 1 }, /**/ + { 6, 1 }, /**/ + { 7, 1 }, /**/ + { 1, 3 }, /**/ + { 10, 1 }, /**/ + { 0, 8 }, /**/ + { 0, 9 }, /**/ + { 2, 2 }, /**/ + { 8, 1 }, /**/ + { 9, 1 }, /**/ + { 3, 2 }, /**/ + { 11, 1 }, /**/ + { 12, 1 }, /**/ + { 13, 1 }, /**/ + { 1, 4 }, /**/ + { 2, 3 }, /**/ + { 4, 2 }, /**/ + { 0, 10 }, /**/ + { 0, 11 }, /**/ + { 1, 5 }, /**/ + { 0, 12 }, /**/ + { 0, 13 }, /**/ + { 0, 14 }, /**/ + { 0, 15 }, /**/ + { 5, 2 }, /**/ + { 14, 1 }, /**/ + { 15, 1 }, /**/ + { 16, 1 }, /**/ + { 2, 4 }, /**/ + { 3, 3 }, /**/ + { 4, 3 }, /**/ + { 6, 2 }, /**/ + { 7, 2 }, /**/ + { 8, 2 }, /**/ + { 17, 1 }, /**/ + { 18, 1 }, /**/ + { 19, 1 }, /**/ + { 20, 1 }, /**/ + { 21, 1 }, /**/ + { 1, 6 }, /**/ + { 1, 7 }, /**/ + { 2, 5 }, /**/ + { 3, 4 }, /**/ + { 5, 3 }, /**/ + { 9, 2 }, /**/ + { 10, 2 }, /**/ + { 22, 1 }, /**/ + { 23, 1 }, /**/ + { 24, 1 }, /**/ + { 25, 1 }, /**/ + { 26, 1 }, /**/ + { 0, 16 }, /**/ + { 0, 17 }, /**/ + { 0, 18 }, /**/ + { 0, 19 }, /**/ + { 0, 20 }, /**/ + { 0, 21 }, /**/ + { 0, 22 }, /**/ + { 0, 23 }, /**/ + { 0, 24 }, /**/ + { 0, 25 }, /**/ + { 0, 26 }, /**/ + { 0, 27 }, /**/ + { 0, 28 }, /**/ + { 0, 29 }, /**/ + { 0, 30 }, /**/ + { 0, 31 }, /**/ + { 0, 32 }, /**/ + { 0, 33 }, /**/ + { 0, 34 }, /**/ + { 0, 35 }, /**/ + { 0, 36 }, /**/ + { 0, 37 }, /**/ + { 0, 38 }, /**/ + { 0, 39 }, /**/ + { 0, 40 }, /**/ + { 1, 8 }, /**/ + { 1, 9 }, /**/ + { 1, 10 }, /**/ + { 1, 11 }, /**/ + { 1, 12 }, /**/ + { 1, 13 }, /**/ + { 1, 14 }, /**/ + { 1, 15 }, /**/ + { 1, 16 }, /**/ + { 1, 17 }, /**/ + { 1, 18 }, /**/ + { 6, 3 }, /**/ + { 11, 2 }, /**/ + { 12, 2 }, /**/ + { 13, 2 }, /**/ + { 14, 2 }, /**/ + { 15, 2 }, /**/ + { 16, 2 }, /**/ + { 27, 1 }, /**/ + { 28, 1 }, /**/ + { 29, 1 }, /**/ + { 30, 1 }, /**/ + { 31, 1 } /**/ +}; + + +int8_t rgi8_y262_quantiser_scale_table[ 2 ][ 32 ] = { + { + -1, + 2, + 4, + 6, + 8, + 10, + 12, + 14, + 16, + 18, + 20, + 22, + 24, + 26, + 28, + 30, + 32, + 34, + 36, + 38, + 40, + 42, + 44, + 46, + 48, + 50, + 52, + 54, + 56, + 58, + 60, + 62 + }, + { + -1, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 10, + 12, + 14, + 16, + 18, + 20, + 22, + 24, + 28, + 32, + 36, + 40, + 44, + 48, + 52, + 56, + 64, + 72, + 80, + 88, + 96, + 104, + 112 + } +}; + + +y262_vlc_t rgs_y262_motion_code_table[] = { + { 1, 1 }, /* 0 */ + { 3, 3 }, /* –1 */ + { 2, 3 }, /* 1 */ + { 3, 4 }, /* –2 */ + { 2, 4 }, /* 2 */ + { 3, 5 }, /* –3 */ + { 2, 5 }, /* 3 */ + { 7, 7 }, /* –4 */ + { 6, 7 }, /* 4 */ + { 7, 8 }, /* –7 */ + { 9, 8 }, /* –6 */ + { 11, 8 }, /* –5 */ + { 10, 8 }, /* 5 */ + { 8, 8 }, /* 6 */ + { 6, 8 }, /* 7 */ + { 19, 10 }, /* –10 */ + { 21, 10 }, /* –9 */ + { 23, 10 }, /* –8 */ + { 22, 10 }, /* 8 */ + { 20, 10 }, /* 9 */ + { 18, 10 }, /* 10 */ + { 25, 11 }, /* –16 */ + { 27, 11 }, /* –15 */ + { 29, 11 }, /* –14 */ + { 31, 11 }, /* –13 */ + { 33, 11 }, /* –12 */ + { 35, 11 }, /* –11 */ + { 34, 11 }, /* 11 */ + { 32, 11 }, /* 12 */ + { 30, 11 }, /* 13 */ + { 28, 11 }, /* 14 */ + { 26, 11 }, /* 15 */ + { 24, 11 }, /* 16 */ + { VLC_SENTINEL, VLC_SENTINEL } +}; + +int32_t rgi_y262_motion_bits[ 17 ] = { 1, 3, 4, 5, 7, 8, 8, 8, 10, 10, 10, 11, 11, 11, 11, 11, 11 }; + +int32_t rgi_y262_motion_delta_lookup_table[] = { + 0, + -1, + 1, + -2, + 2, + -3, + 3, + -4, + 4, + -7, + -6, + -5, + 5, + 6, + 7, + -10, + -9, + -8, + 8, + 9, + 10, + -16, + -15, + -14, + -13, + -12, + -11, + 11, + 12, + 13, + 14, + 15, + 16 +}; + + +y262_vlc_t rgs_y262_coded_block_pattern_table[] = { + { 7, 3 }, /* 60 */ + { 13, 4 }, /* 4 */ + { 12, 4 }, /* 8 */ + { 11, 4 }, /* 16 */ + { 10, 4 }, /* 32 */ + { 19, 5 }, /* 12 */ + { 18, 5 }, /* 48 */ + { 17, 5 }, /* 20 */ + { 16, 5 }, /* 40 */ + { 15, 5 }, /* 28 */ + { 14, 5 }, /* 44 */ + { 13, 5 }, /* 52 */ + { 12, 5 }, /* 56 */ + { 11, 5 }, /* 1 */ + { 10, 5 }, /* 61 */ + { 9, 5 }, /* 2 */ + { 8, 5 }, /* 62 */ + { 15, 6 }, /* 24 */ + { 14, 6 }, /* 36 */ + { 13, 6 }, /* 3 */ + { 12, 6 }, /* 63 */ + { 23, 7 }, /* 5 */ + { 22, 7 }, /* 9 */ + { 21, 7 }, /* 17 */ + { 20, 7 }, /* 33 */ + { 19, 7 }, /* 6 */ + { 18, 7 }, /* 10 */ + { 17, 7 }, /* 18 */ + { 16, 7 }, /* 34 */ + { 31, 8 }, /* 7 */ + { 30, 8 }, /* 11 */ + { 29, 8 }, /* 19 */ + { 28, 8 }, /* 35 */ + { 27, 8 }, /* 13 */ + { 26, 8 }, /* 49 */ + { 25, 8 }, /* 21 */ + { 24, 8 }, /* 41 */ + { 23, 8 }, /* 14 */ + { 22, 8 }, /* 50 */ + { 21, 8 }, /* 22 */ + { 20, 8 }, /* 42 */ + { 19, 8 }, /* 15 */ + { 18, 8 }, /* 51 */ + { 17, 8 }, /* 23 */ + { 16, 8 }, /* 43 */ + { 15, 8 }, /* 25 */ + { 14, 8 }, /* 37 */ + { 13, 8 }, /* 26 */ + { 12, 8 }, /* 38 */ + { 11, 8 }, /* 29 */ + { 10, 8 }, /* 45 */ + { 9, 8 }, /* 53 */ + { 8, 8 }, /* 57 */ + { 7, 8 }, /* 30 */ + { 6, 8 }, /* 46 */ + { 5, 8 }, /* 54 */ + { 4, 8 }, /* 58 */ + { 7, 9 }, /* 31 */ + { 6, 9 }, /* 47 */ + { 5, 9 }, /* 55 */ + { 4, 9 }, /* 59 */ + { 3, 9 }, /* 27 */ + { 2, 9 }, /* 39 */ + { 1, 9 }, /* 0 (Note */ + + { VLC_SENTINEL, VLC_SENTINEL } +}; + +int32_t rgi_y262_coded_block_pattern_lookup_table[] = { + 60, + 4, + 8, + 16, + 32, + 12, + 48, + 20, + 40, + 28, + 44, + 52, + 56, + 1, + 61, + 2, + 62, + 24, + 36, + 3, + 63, + 5, + 9, + 17, + 33, + 6, + 10, + 18, + 34, + 7, + 11, + 19, + 35, + 13, + 49, + 21, + 41, + 14, + 50, + 22, + 42, + 15, + 51, + 23, + 43, + 25, + 37, + 26, + 38, + 29, + 45, + 53, + 57, + 30, + 46, + 54, + 58, + 31, + 47, + 55, + 59, + 27, + 39, + 0 +}; + diff --git a/src/y262/tables.h b/src/y262/tables.h new file mode 100644 index 0000000..94d4528 --- /dev/null +++ b/src/y262/tables.h @@ -0,0 +1,75 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +extern int32_t rgi_y262_framerate_code_duration[ 16 ]; +extern int32_t rgi_y262_framerate_code_timescale[ 16 ]; + +extern uint8_t rgui8_y262_default_intra_matrix[ 64 ]; +extern uint8_t rgui8_y262_default_non_intra_matrix[ 64 ]; + +extern int32_t rgui_y262_luma_blk_offsets[ 4 ][ 2 ]; +extern int32_t rgui_y262_luma_il_blk_offsets[ 4 ][ 2 ]; +extern int32_t rgui_num_chroma_blk[ 4 ]; +extern int32_t rgui_y262_chroma_blk_offsets[ 4 ][ 4 ][ 2 ]; +extern int32_t rgui_y262_chroma_il_blk_offsets[ 4 ][ 4 ][ 2 ]; +extern int32_t rgi_y262_block_type_dims[ ][ 2 ]; + +extern y262_vlc_t rgs_y262_macroblock_address_increment_table[ ]; +extern y262_vlc_t rgs_y262_macroblock_type_i_picture_table[ ]; +extern int32_t rgui_y262_macroblock_type_i_picture_flags_table[ ]; +extern y262_vlc_t rgs_y262_macroblock_type_p_picture_table[ ]; +extern int32_t rgui_y262_macroblock_type_p_picture_flags_table[ ]; +extern y262_vlc_t rgs_y262_macroblock_type_b_picture_table[ ]; +extern int32_t rgui_y262_macroblock_type_b_picture_flags_table[ ]; + +extern y262_vlc_t rgs_y262_dct_dc_size_luminance_table[ ]; +extern int32_t rgi_y262_dct_dc_size_luminance_lookup_table[ ]; +extern y262_vlc_t rgs_y262_dct_dc_size_chrominance_table[ ]; +extern int32_t rgi_y262_dct_dc_size_chrominance_lookup_table[ ]; + +extern y262_vlc_t rgs_y262_dct_coefficients_table_zero[ ]; +extern y262_run_level_t rgs_y262_dct_coefficients_lookup_table_zero[ ]; +extern int8_t rgi_y262_run_level_bits_zero[ 32 ][ 41 ]; + +extern y262_vlc_t rgs_y262_dct_coefficients_table_one[ ]; +extern y262_run_level_t rgs_y262_dct_coefficients_lookup_table_one[ ]; + +extern int8_t rgi8_y262_quantiser_scale_table[ 2 ][ 32 ]; + +extern uint8_t rgui8_y262_scan_0_table[ ]; +extern uint8_t rgui8_y262_scan_1_table[ ]; + +extern y262_vlc_t rgs_y262_motion_code_table[ ]; +extern int32_t rgi_y262_motion_bits[ 17 ]; +extern int32_t rgi_y262_motion_delta_lookup_table[ ]; + +extern y262_vlc_t rgs_y262_coded_block_pattern_table[ ]; +extern int32_t rgi_y262_coded_block_pattern_lookup_table[ ]; + diff --git a/src/y262/threads.c b/src/y262/threads.c new file mode 100644 index 0000000..3c958fa --- /dev/null +++ b/src/y262/threads.c @@ -0,0 +1,400 @@ +/* +Copyright (c) 2013,2016, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + + + +#include "y262.h" + +#if defined( WIN32 ) || defined( WIN64 ) +#include +#include +#elif defined( HAVE_LIBPTHREAD ) +#include + +typedef struct +{ + pthread_t s_thread; + y262_thread_f *pf_func; + void *p_arg; +} y262_pthread_thread_t; + +void *y262_pthread_func( void *p_arg ) +{ + y262_pthread_thread_t *ps_thread = ( y262_pthread_thread_t * ) p_arg; + ps_thread->pf_func( ps_thread->p_arg ); + return NULL; +} + +#endif + +bool_t y262_can_do_threads( ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + return TRUE; +#elif defined( HAVE_LIBPTHREAD ) + return TRUE; +#else + return FALSE; +#endif +} + + +void *y262_create_thread( y262_t *ps_y262, y262_thread_f *pf_func, void *p_arg ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + if( _beginthread( pf_func, 0, p_arg ) == -1 ) + { + return 0; + } + return (void *) 0x1; +#elif defined( HAVE_LIBPTHREAD ) + y262_pthread_thread_t *ps_thread; + ps_thread = y262_alloc( sizeof( y262_pthread_thread_t ) ); + ps_thread->pf_func = pf_func; + ps_thread->p_arg = p_arg; + if( pthread_create( &ps_thread->s_thread, 0x0, y262_pthread_func, ps_thread ) ) + { + y262_dealloc( ps_thread ); + return NULL; + } + return ps_thread; +#else + + return NULL; +#endif +} + + +void y262_join_thread( y262_t *ps_y262, void *p_thread ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + /* windows cannot join threads */ +#elif defined( HAVE_LIBPTHREAD ) + y262_pthread_thread_t *ps_thread = ( y262_pthread_thread_t *)p_thread; + pthread_join( ps_thread->s_thread, NULL ); + y262_dealloc( ps_thread ); +#endif +} + + +void *y262_create_mutex( y262_t *ps_y262 ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + CRITICAL_SECTION *ps_cs; + ps_cs = ( CRITICAL_SECTION * ) y262_alloc( sizeof( CRITICAL_SECTION ) ); + if( ps_cs != NULL ) + { + InitializeCriticalSection( ps_cs ); + } + else + { + return NULL; + } + return ( void * )ps_cs; +#elif defined( HAVE_LIBPTHREAD ) + pthread_mutex_t *ps_mutex; + ps_mutex = ( pthread_mutex_t * )y262_alloc( sizeof( pthread_mutex_t ) ); + if( ps_mutex == NULL ) + { + return NULL; + } + if( pthread_mutex_init( ps_mutex, NULL ) ) + { + y262_dealloc( ps_mutex ); + return NULL; + } + return ps_mutex; +#else + return NULL; +#endif +} + + +void y262_destroy_mutex( y262_t *ps_y262, void *p_cs ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + CRITICAL_SECTION *ps_cs = ( CRITICAL_SECTION * ) p_cs; + if( p_cs != NULL ) + { + DeleteCriticalSection( ps_cs ); + y262_dealloc( p_cs ); + } +#elif defined( HAVE_LIBPTHREAD ) + pthread_mutex_t *ps_mutex = ( pthread_mutex_t * ) p_cs; + if( ps_mutex ) + { + pthread_mutex_destroy( ps_mutex ); + y262_dealloc( ps_mutex ); + } +#else +#endif +} + + +void y262_mutex_lock( y262_t *ps_y262, void *p_cs ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + CRITICAL_SECTION *ps_cs = ( CRITICAL_SECTION * ) p_cs; + if( ps_cs != NULL ) + { + EnterCriticalSection( ps_cs ); + } +#elif defined( HAVE_LIBPTHREAD ) + pthread_mutex_t *ps_mutex = ( pthread_mutex_t * ) p_cs; + if( ps_mutex ) + { + pthread_mutex_lock( ps_mutex ); + } +#else +#endif +} + + +void y262_mutex_unlock( y262_t *ps_y262, void *p_cs ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + CRITICAL_SECTION *ps_cs = ( CRITICAL_SECTION * ) p_cs; + if( ps_cs != NULL ) + { + LeaveCriticalSection( ps_cs ); + } +#elif defined( HAVE_LIBPTHREAD ) + pthread_mutex_t *ps_mutex = ( pthread_mutex_t * ) p_cs; + if( ps_mutex ) + { + pthread_mutex_unlock( ps_mutex ); + } +#else +#endif +} + + +#if defined( HAVE_LIBPTHREAD ) +typedef struct { + volatile int i_triggered; + pthread_cond_t s_cond; + pthread_mutex_t s_mutex; +} y262_pthread_event_t; +#endif + + +/* pthread impl, if any, will suffer */ +void *y262_create_event( y262_t *ps_y262 ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + HANDLE ps_event; + ps_event = CreateEvent( NULL, FALSE, FALSE, NULL ); + return ps_event; +#elif defined( HAVE_LIBPTHREAD ) + y262_pthread_event_t *ps_event; + ps_event = y262_alloc( sizeof( y262_pthread_event_t ) ); + if( ps_event == NULL ) + { + return NULL; + } + ps_event->i_triggered = 0; + if( pthread_cond_init( &ps_event->s_cond, NULL ) ) + { + free( ps_event ); + return NULL; + } + if( pthread_mutex_init( &ps_event->s_mutex, NULL ) ) + { + pthread_cond_destroy( &ps_event->s_cond ); + free( ps_event ); + return NULL; + } + return ps_event; +#else + return NULL; +#endif +} + +void y262_destroy_event( y262_t *ps_y262, void *p_event ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + CloseHandle( ( HANDLE )p_event ); +#elif defined( HAVE_LIBPTHREAD ) + y262_pthread_event_t *ps_event = ( y262_pthread_event_t * )p_event;; + pthread_mutex_destroy( &ps_event->s_mutex ); + pthread_cond_destroy( &ps_event->s_cond ); + y262_dealloc( ps_event ); +#else +#endif +} + +void y262_event_wait_( y262_t *ps_y262, void *p_event ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + WaitForSingleObject( ( HANDLE )p_event, INFINITE ); +#elif defined( HAVE_LIBPTHREAD ) + y262_pthread_event_t *ps_event = ( y262_pthread_event_t * )p_event;; + pthread_mutex_lock( &ps_event->s_mutex ); + if( ps_event->i_triggered == 0 ) + { + pthread_cond_wait( &ps_event->s_cond, &ps_event->s_mutex ); + } + ps_event->i_triggered = 0; + pthread_mutex_unlock( &ps_event->s_mutex ); +#else +#endif +} + +void y262_event_set_( y262_t *ps_y262, void *p_event ) +{ +#if defined( _WIN32 ) || defined( _WIN64 ) + SetEvent( ( HANDLE )p_event ); +#elif defined( HAVE_LIBPTHREAD ) + y262_pthread_event_t *ps_event = ( y262_pthread_event_t * )p_event;; + pthread_mutex_lock( &ps_event->s_mutex ); + ps_event->i_triggered = 1; + pthread_cond_broadcast( &ps_event->s_cond ); + pthread_mutex_unlock( &ps_event->s_mutex ); +#else +#endif +} + +void y262_event_wait_g( y262_t *ps_y262, void *p_event ) +{ +#if defined( WIN32 ) || defined( WIN64 ) + WaitForSingleObject( ( HANDLE ) p_event, INFINITE ); +#elif defined( HAVE_LIBPTHREAD ) + y262_pthread_event_t *ps_event = ( y262_pthread_event_t * ) p_event;; + pthread_mutex_lock( ps_y262->p_resource_mutex ); + if( ps_event->i_triggered == 0 ) + { + pthread_cond_wait( &ps_event->s_cond, ps_y262->p_resource_mutex ); + } + ps_event->i_triggered = 0; + pthread_mutex_unlock( ps_y262->p_resource_mutex ); +#else +#endif +} + +void y262_event_set_g( y262_t *ps_y262, void *p_event ) +{ +#if defined( _WIN32 ) || defined( _WIN64 ) + SetEvent( ( HANDLE ) p_event ); +#elif defined( HAVE_LIBPTHREAD ) + y262_pthread_event_t *ps_event = ( y262_pthread_event_t * ) p_event;; + pthread_mutex_lock( ps_y262->p_resource_mutex ); + ps_event->i_triggered = 1; + pthread_cond_broadcast( &ps_event->s_cond ); + pthread_mutex_unlock( ps_y262->p_resource_mutex ); +#else +#endif +} + + + +void y262_slice_thread( void *p_arg ) +{ + int32_t i_idx; + y262_slice_thread_t *ps_slice_thread; + y262_t *ps_slice_encoder; + + ps_slice_thread = ( y262_slice_thread_t * ) p_arg; + ps_slice_encoder = ps_slice_thread->ps_y262->rgps_slice_encoders[ ps_slice_thread->i_slice_encoder_idx ]; + + while( 1 ) + { + y262_event_wait_g( ps_slice_thread->ps_y262, ps_slice_thread->p_start_event ); + if( ps_slice_thread->i_command == Y262_SLICE_THREAD_CMD_LOOKAHEAD ) + { + y262_lookahead_analyze_slice( ps_slice_thread->ps_y262, ps_slice_thread->ps_pic, ps_slice_thread->ps_fw_ref, + ps_slice_thread->ps_bw_ref, ps_slice_thread->i_first_slice_row, ps_slice_thread->i_last_slice_row ); + y262_event_set_g( ps_slice_thread->ps_y262, ps_slice_thread->p_finished_event ); + } + else if( ps_slice_thread->i_command == Y262_SLICE_THREAD_CMD_ENCODE ) + { + y262_bitstream_reset( &ps_slice_encoder->s_bitstream ); + for( i_idx = ps_slice_thread->i_first_slice_row; i_idx <= ps_slice_thread->i_last_slice_row; i_idx++ ) + { + y262_encode_unit_slice( ps_slice_encoder, ps_slice_thread->i_picture_type, i_idx ); + y262_bitstream_bytealign( &ps_slice_encoder->s_bitstream ); + } + y262_event_set_g( ps_slice_thread->ps_y262, ps_slice_thread->p_finished_event ); + } + else if( ps_slice_thread->i_command == Y262_SLICE_THREAD_CMD_EXIT ) + { + break; + } + } + y262_event_set_g( ps_slice_thread->ps_y262, ps_slice_thread->p_finished_event ); + return; +} + + +void y262_lookahead_thread( void *p_arg ) +{ + y262_slice_thread_t *ps_slice_thread; + + ps_slice_thread = ( y262_slice_thread_t * ) p_arg; + + while( 1 ) + { + y262_event_wait_g( ps_slice_thread->ps_y262, ps_slice_thread->p_start_event ); + if( ps_slice_thread->i_command == Y262_SLICE_THREAD_CMD_LOOKAHEAD ) + { + y262_lookahead_analyze_slice( ps_slice_thread->ps_y262, ps_slice_thread->ps_pic, ps_slice_thread->ps_fw_ref, + ps_slice_thread->ps_bw_ref, ps_slice_thread->i_first_slice_row, ps_slice_thread->i_last_slice_row ); + y262_event_set_g( ps_slice_thread->ps_y262, ps_slice_thread->p_finished_event ); + } + else if( ps_slice_thread->i_command == Y262_SLICE_THREAD_CMD_EXIT ) + { + break; + } + } + y262_event_set_g( ps_slice_thread->ps_y262, ps_slice_thread->p_finished_event ); + return; +} + +void y262_main_lookahead_thread( void *p_arg ) +{ + y262_t *ps_y262 = ( y262_t *)p_arg; + + while( 1 ) + { + y262_event_wait_g( ps_y262, ps_y262->s_lookahead_thread.p_start_event ); + if( ps_y262->s_lookahead_thread.i_command == Y262_LOOKAHEAD_THREAD_CMD_LOOKAHEAD ) + { + y262_process_lookahead_internal( ps_y262 ); + y262_event_set_g( ps_y262, ps_y262->s_lookahead_thread.p_finished_event ); + } + else if( ps_y262->s_lookahead_thread.i_command == Y262_SLICE_THREAD_CMD_EXIT ) + { + break; + } + } + y262_event_set_g( ps_y262, ps_y262->s_lookahead_thread.p_finished_event ); + return; + +} + diff --git a/src/y262/threads.h b/src/y262/threads.h new file mode 100644 index 0000000..47f4d7f --- /dev/null +++ b/src/y262/threads.h @@ -0,0 +1,53 @@ +/* +Copyright (c) 2013,2016, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + + + +bool_t y262_can_do_threads( ); + +void *y262_create_thread( y262_t *ps_y262, y262_thread_f *pf_func, void *p_arg ); +void y262_join_thread( y262_t *ps_y262, void *p_thread ); + +void *y262_create_mutex( y262_t *ps_y262 ); +void y262_destroy_mutex( y262_t *ps_y262, void *p_cs ); +void y262_mutex_lock( y262_t *ps_y262, void *p_cs ); +void y262_mutex_unlock( y262_t *ps_y262, void *p_cs ); + +void *y262_create_event( y262_t *ps_y262 ); +void y262_destroy_event( y262_t *ps_y262, void *p_event ); +void y262_event_wait_( y262_t *ps_y262, void *p_event ); +void y262_event_set_( y262_t *ps_y262, void *p_event ); +void y262_event_wait_g( y262_t *ps_y262, void *p_event ); +void y262_event_set_g( y262_t *ps_y262, void *p_event ); + +void y262_slice_thread( void *p_arg ); +void y262_lookahead_thread( void *p_arg ); +void y262_main_lookahead_thread( void *p_arg ); +void y262_encode_unit_slice( y262_t *ps_y262, int32_t i_picture_type, int32_t i_slice_row ); /* not in threads.c */ \ No newline at end of file diff --git a/src/y262/transform.c b/src/y262/transform.c new file mode 100644 index 0000000..4f72834 --- /dev/null +++ b/src/y262/transform.c @@ -0,0 +1,812 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + +#define RND1BITS ( 11 ) +#define RND2BITS ( 31 - RND1BITS ) + +static const int16_t rgi16_y262_fdct_cs1[ 8 ][ 8 ] = { + { 16383, 16383, 16383, 16383, 16383, 16383, 16383, 16383 }, + { 22724, 19265, 12872, 4520, -4520, -12872, -19265, -22724 }, + { 21406, 8867, -8867, -21406, -21406, -8867, 8867, 21406 }, + { 19265, -4520, -22724, -12872, 12872, 22724, 4520, -19265 }, + { 16383, -16383, -16383, 16383, 16383, -16383, -16383, 16383 }, + { 12872, -22724, 4520, 19265, -19265, -4520, 22724, -12872 }, + { 8867, -21406, 21406, -8867, -8867, 21406, -21406, 8867 }, + { 4520, -12872, 19265, -22724, 22724, -19265, 12872, -4520 }, +}; +static const int16_t rgi16_y262_fdct_cs2[ 8 ][ 8 ] = { + { 16385, 16385, 16385, 16385, 16385, 16385, 16385, 16385 }, + { 22726, 19266, 12873, 4521, -4521, -12873, -19266, -22726 }, + { 21408, 8867, -8867, -21408, -21408, -8867, 8867, 21408 }, + { 19266, -4521, -22726, -12873, 12873, 22726, 4521, -19266 }, + { 16385, -16385, -16385, 16385, 16385, -16385, -16385, 16385 }, + { 12873, -22726, 4521, 19266, -19266, -4521, 22726, -12873 }, + { 8867, -21408, 21408, -8867, -8867, 21408, -21408, 8867 }, + { 4521, -12873, 19266, -22726, 22726, -19266, 12873, -4521 }, +}; + + + +void y262_fdct_c( int16_t *pi16_block, int16_t *pi16_dst ) +{ + int i_i, i_j, i_k; + int32_t i_s; + int16_t rgi16_tmp[ 64 ]; + int32_t rgi16_e[ 4 ][ 8 ], rgi16_ee[ 2 ][ 8 ]; + +#define RND( x, y ) ( ( ( x ) + ( ( y ) ? ( 1 << ( y - 1 ) ) : 0 ) ) >> ( y ) ) +#define MUL( x, m ) ( ( x ) * ( m ) ) + + for( i_i = 0; i_i < 8; i_i++ ) + { + for( i_j = 1; i_j < 8; i_j += 2 ) + { + i_s = 0; + for ( i_k = 0; i_k < 4; i_k++ ) + { + i_s += rgi16_y262_fdct_cs1[ i_j ][ i_k ] * ( pi16_block[ 8 * i_k + i_i ] - pi16_block[ 8 * ( 7 - i_k ) + i_i ] ); + } + rgi16_tmp[ 8 * i_i + i_j ] = RND( i_s, RND1BITS ); + } + } + for( i_i = 0; i_i < 8; i_i++ ) + { + for ( i_k = 0; i_k < 4; i_k++ ) + { + rgi16_e[ i_k ][ i_i ] = ( pi16_block[ 8 * i_k + i_i ] + pi16_block[ 8 * ( 7 - i_k ) + i_i ] ); + } + } + for( i_i = 0; i_i < 8; i_i++ ) + { + for( i_j = 2; i_j < 8; i_j += 4 ) + { + i_s = 0; + for ( i_k = 0; i_k < 2; i_k++ ) + { + i_s += rgi16_y262_fdct_cs1[ i_j ][ i_k ] * ( rgi16_e[ i_k ][ i_i ] - rgi16_e[ 3 - i_k ][ i_i ] ); + } + rgi16_tmp[ 8 * i_i + i_j ] = RND( i_s, RND1BITS ); + } + } + for( i_i = 0; i_i < 8; i_i++ ) + { + for ( i_k = 0; i_k < 2; i_k++ ) + { + rgi16_ee[ i_k ][ i_i ] = ( rgi16_e[ i_k ][ i_i ] + rgi16_e[ 3 - i_k ][ i_i ] ); + } + } + for( i_i = 0; i_i < 8; i_i++ ) + { + for( i_j = 0; i_j < 8; i_j += 4 ) + { + i_s = 0; + for ( i_k = 0; i_k < 2; i_k++ ) + { + i_s += rgi16_y262_fdct_cs1[ i_j ][ i_k ] * rgi16_ee[ i_k ][ i_i ]; + } + rgi16_tmp[ 8 * i_i + i_j ] = RND( i_s, RND1BITS ); + } + } + + /* ... */ + + for( i_i = 0; i_i < 8; i_i++ ) + { + for( i_j = 1; i_j < 8; i_j += 2 ) + { + i_s = 0; + for ( i_k = 0; i_k < 4; i_k++ ) + { + i_s += rgi16_y262_fdct_cs2[ i_j ][ i_k ] * ( rgi16_tmp[ 8 * i_k + i_i ] - rgi16_tmp[ 8 * ( 7 - i_k ) + i_i ] ); + } + pi16_dst[ 8 * i_i + i_j ] = RND( i_s, RND2BITS ); + } + } + for( i_i = 0; i_i < 8; i_i++ ) + { + for ( i_k = 0; i_k < 4; i_k++ ) + { + rgi16_e[ i_k ][ i_i ] = ( rgi16_tmp[ 8 * i_k + i_i ] + rgi16_tmp[ 8 * ( 7 - i_k ) + i_i ] ); + } + } + for( i_i = 0; i_i < 8; i_i++ ) + { + for( i_j = 2; i_j < 8; i_j += 4 ) + { + i_s = 0; + for ( i_k = 0; i_k < 2; i_k++ ) + { + i_s += rgi16_y262_fdct_cs2[ i_j ][ i_k ] * ( rgi16_e[ i_k ][ i_i ] - rgi16_e[ 3 - i_k ][ i_i ] ); + } + pi16_dst[ 8 * i_i + i_j ] = RND( i_s, RND2BITS ); + } + } + for( i_i = 0; i_i < 8; i_i++ ) + { + for ( i_k = 0; i_k < 2; i_k++ ) + { + rgi16_ee[ i_k ][ i_i ] = ( rgi16_e[ i_k ][ i_i ] + rgi16_e[ 3 - i_k ][ i_i ] ); + } + } + for( i_i = 0; i_i < 8; i_i++ ) + { + for( i_j = 0; i_j < 8; i_j += 4 ) + { + i_s = 0; + for ( i_k = 0; i_k < 2; i_k++ ) + { + i_s += rgi16_y262_fdct_cs2[ i_j ][ i_k ] * rgi16_ee[ i_k ][ i_i ]; + } + pi16_dst[ 8 * i_i + i_j ] = RND( i_s, RND2BITS ); + } + } +} + + +#define RND1BITS ( 11 ) +#define RND2BITS ( 31 - RND1BITS ) + +static const int16_t rgi16_y262_idct_cs1[ 8 ][ 8 ] = { + { 16383, 16383, 16383, 16383, 16383, 16383, 16383, 16383 }, + { 22724, 19265, 12872, 4520, -4520, -12872, -19265, -22724 }, + { 21406, 8867, -8867, -21406, -21406, -8867, 8867, 21406 }, + { 19265, -4520, -22724, -12872, 12872, 22724, 4520, -19265 }, + { 16383, -16383, -16383, 16383, 16383, -16383, -16383, 16383 }, + { 12872, -22724, 4520, 19265, -19265, -4520, 22724, -12872 }, + { 8867, -21406, 21406, -8867, -8867, 21406, -21406, 8867 }, + { 4520, -12872, 19265, -22724, 22724, -19265, 12872, -4520 }, +}; +static const int16_t rgi16_y262_idct_cs2[ 8 ][ 8 ] = { + { 16385, 16385, 16385, 16385, 16385, 16385, 16385, 16385 }, + { 22726, 19266, 12873, 4521, -4521, -12873, -19266, -22726 }, + { 21408, 8867, -8867, -21408, -21408, -8867, 8867, 21408 }, + { 19266, -4521, -22726, -12873, 12873, 22726, 4521, -19266 }, + { 16385, -16385, -16385, 16385, 16385, -16385, -16385, 16385 }, + { 12873, -22726, 4521, 19266, -19266, -4521, 22726, -12873 }, + { 8867, -21408, 21408, -8867, -8867, 21408, -21408, 8867 }, + { 4521, -12873, 19266, -22726, 22726, -19266, 12873, -4521 }, +}; + + +void y262_idct_c( int16_t *pi16_block, int16_t *pi16_dst ) +{ + int i_j, i_k; + int16_t rgi16_tmp[ 64 ]; + int32_t rgi_e[ 4 ], rgi_o[ 4 ]; + int32_t rgi_ee[ 2 ], rgi_eo[ 2 ]; + + +#define RND( x, y ) ( ( ( x ) + ( ( y ) ? ( 1 << ( y - 1 ) ) : 0 ) ) >> ( y ) ) +#define MUL( x, m ) ( ( x ) * ( m ) ) + + for( i_j = 0; i_j < 8; i_j++ ) + { + rgi_o[ 0 ] = rgi16_y262_idct_cs1[ 1 ][ 0 ] * pi16_block[ i_j + 8 * 1 ] + rgi16_y262_idct_cs1[ 3 ][ 0 ] * pi16_block[ i_j + 8 * 3 ] + + rgi16_y262_idct_cs1[ 5 ][ 0 ] * pi16_block[ i_j + 8 * 5 ] + rgi16_y262_idct_cs1[ 7 ][ 0 ] * pi16_block[ i_j + 8 * 7 ]; + + rgi_o[ 1 ] = rgi16_y262_idct_cs1[ 1 ][ 1 ] * pi16_block[ i_j + 8 * 1 ] + rgi16_y262_idct_cs1[ 3 ][ 1 ] * pi16_block[ i_j + 8 * 3 ] + + rgi16_y262_idct_cs1[ 5 ][ 1 ] * pi16_block[ i_j + 8 * 5 ] + rgi16_y262_idct_cs1[ 7 ][ 1 ] * pi16_block[ i_j + 8 * 7 ]; + + rgi_o[ 2 ] = rgi16_y262_idct_cs1[ 1 ][ 2 ] * pi16_block[ i_j + 8 * 1 ] + rgi16_y262_idct_cs1[ 3 ][ 2 ] * pi16_block[ i_j + 8 * 3 ] + + rgi16_y262_idct_cs1[ 5 ][ 2 ] * pi16_block[ i_j + 8 * 5 ] + rgi16_y262_idct_cs1[ 7 ][ 2 ] * pi16_block[ i_j + 8 * 7 ]; + + rgi_o[ 3 ] = rgi16_y262_idct_cs1[ 1 ][ 3 ] * pi16_block[ i_j + 8 * 1 ] + rgi16_y262_idct_cs1[ 3 ][ 3 ] * pi16_block[ i_j + 8 * 3 ] + + rgi16_y262_idct_cs1[ 5 ][ 3 ] * pi16_block[ i_j + 8 * 5 ] + rgi16_y262_idct_cs1[ 7 ][ 3 ] * pi16_block[ i_j + 8 * 7 ]; + + rgi_eo[ 0 ] = rgi16_y262_idct_cs1[ 2 ][ 0 ] * pi16_block[ i_j + 8 * 2 ] + rgi16_y262_idct_cs1[ 6 ][ 0 ] * pi16_block[ i_j + 8 * 6 ]; + rgi_eo[ 1 ] = rgi16_y262_idct_cs1[ 2 ][ 1 ] * pi16_block[ i_j + 8 * 2 ] + rgi16_y262_idct_cs1[ 6 ][ 1 ] * pi16_block[ i_j + 8 * 6 ]; + rgi_ee[ 0 ] = rgi16_y262_idct_cs1[ 0 ][ 0 ] * pi16_block[ i_j + 8 * 0 ] + rgi16_y262_idct_cs1[ 4 ][ 0 ] * pi16_block[ i_j + 8 * 4 ]; + rgi_ee[ 1 ] = rgi16_y262_idct_cs1[ 0 ][ 1 ] * pi16_block[ i_j + 8 * 0 ] + rgi16_y262_idct_cs1[ 4 ][ 1 ] * pi16_block[ i_j + 8 * 4 ]; + + rgi_e[ 0 ] = rgi_ee[ 0 ] + rgi_eo[ 0 ]; + rgi_e[ 1 ] = rgi_ee[ 1 ] + rgi_eo[ 1 ]; + rgi_e[ 2 ] = rgi_ee[ 1 ] - rgi_eo[ 1 ]; + rgi_e[ 3 ] = rgi_ee[ 0 ] - rgi_eo[ 0 ]; + + for( i_k = 0; i_k < 4; i_k++ ) + { + rgi16_tmp[ i_j + 8 * i_k ] = RND( rgi_e[ i_k ] + rgi_o[ i_k ], RND1BITS ); + rgi16_tmp[ i_j + 8 * ( i_k + 4 ) ] = RND( rgi_e[ 3 - i_k ] - rgi_o[ 3 - i_k ], RND1BITS ); + } + } + + for( i_j = 0; i_j < 8; i_j++ ) + { + rgi_e[ 0 ] = rgi16_y262_idct_cs2[ 0 ][ 0 ] * rgi16_tmp[ i_j * 8 + 0 ] + rgi16_y262_idct_cs2[ 2 ][ 0 ] * rgi16_tmp[ i_j * 8 + 2 ] + + rgi16_y262_idct_cs2[ 4 ][ 0 ] * rgi16_tmp[ i_j * 8 + 4 ] + rgi16_y262_idct_cs2[ 6 ][ 0 ] * rgi16_tmp[ i_j * 8 + 6 ]; + rgi_e[ 1 ] = rgi16_y262_idct_cs2[ 0 ][ 1 ] * rgi16_tmp[ i_j * 8 + 0 ] + rgi16_y262_idct_cs2[ 2 ][ 1 ] * rgi16_tmp[ i_j * 8 + 2 ] + + rgi16_y262_idct_cs2[ 4 ][ 1 ] * rgi16_tmp[ i_j * 8 + 4 ] + rgi16_y262_idct_cs2[ 6 ][ 1 ] * rgi16_tmp[ i_j * 8 + 6 ]; + rgi_e[ 2 ] = rgi16_y262_idct_cs2[ 0 ][ 1 ] * rgi16_tmp[ i_j * 8 + 0 ] + -rgi16_y262_idct_cs2[ 2 ][ 1 ] * rgi16_tmp[ i_j * 8 + 2 ] + + rgi16_y262_idct_cs2[ 4 ][ 1 ] * rgi16_tmp[ i_j * 8 + 4 ] + -rgi16_y262_idct_cs2[ 6 ][ 1 ] * rgi16_tmp[ i_j * 8 + 6 ]; + rgi_e[ 3 ] = rgi16_y262_idct_cs2[ 0 ][ 0 ] * rgi16_tmp[ i_j * 8 + 0 ] + -rgi16_y262_idct_cs2[ 2 ][ 0 ] * rgi16_tmp[ i_j * 8 + 2 ] + + rgi16_y262_idct_cs2[ 4 ][ 0 ] * rgi16_tmp[ i_j * 8 + 4 ] + -rgi16_y262_idct_cs2[ 6 ][ 0 ] * rgi16_tmp[ i_j * 8 + 6 ]; + + rgi_o[ 0 ] = rgi16_y262_idct_cs2[ 1 ][ 0 ] * rgi16_tmp[ i_j * 8 + 1 ] + rgi16_y262_idct_cs2[ 3 ][ 0 ] * rgi16_tmp[ i_j * 8 + 3 ] + + rgi16_y262_idct_cs2[ 5 ][ 0 ] * rgi16_tmp[ i_j * 8 + 5 ] + rgi16_y262_idct_cs2[ 7 ][ 0 ] * rgi16_tmp[ i_j * 8 + 7 ]; + rgi_o[ 1 ] = rgi16_y262_idct_cs2[ 1 ][ 1 ] * rgi16_tmp[ i_j * 8 + 1 ] + rgi16_y262_idct_cs2[ 3 ][ 1 ] * rgi16_tmp[ i_j * 8 + 3 ] + + rgi16_y262_idct_cs2[ 5 ][ 1 ] * rgi16_tmp[ i_j * 8 + 5 ] + rgi16_y262_idct_cs2[ 7 ][ 1 ] * rgi16_tmp[ i_j * 8 + 7 ]; + rgi_o[ 2 ] = rgi16_y262_idct_cs2[ 1 ][ 2 ] * rgi16_tmp[ i_j * 8 + 1 ] + rgi16_y262_idct_cs2[ 3 ][ 2 ] * rgi16_tmp[ i_j * 8 + 3 ] + + rgi16_y262_idct_cs2[ 5 ][ 2 ] * rgi16_tmp[ i_j * 8 + 5 ] + rgi16_y262_idct_cs2[ 7 ][ 2 ] * rgi16_tmp[ i_j * 8 + 7 ]; + rgi_o[ 3 ] = rgi16_y262_idct_cs2[ 1 ][ 3 ] * rgi16_tmp[ i_j * 8 + 1 ] + rgi16_y262_idct_cs2[ 3 ][ 3 ] * rgi16_tmp[ i_j * 8 + 3 ] + + rgi16_y262_idct_cs2[ 5 ][ 3 ] * rgi16_tmp[ i_j * 8 + 5 ] + rgi16_y262_idct_cs2[ 7 ][ 3 ] * rgi16_tmp[ i_j * 8 + 7 ]; + + for( i_k = 0; i_k < 4; i_k++ ) + { + pi16_dst[ i_j * 8 + i_k ] = RND( rgi_e[ i_k ] + rgi_o[ i_k ], RND2BITS ); + pi16_dst[ i_j * 8 + ( i_k + 4 ) ] = RND( rgi_e[ 3 - i_k ] - rgi_o[ 3 - i_k ], RND2BITS ); + } + } +} + + + +int32_t y262_quant8x8_intra_fw_mpeg2( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat, uint16_t *pui16_bias ) +{ + int32_t i_y, i_x, i_qm, i_nz; + + i_nz = 0; + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = ( i_y == 0 ) ? 1 : 0; i_x < 8; i_x++ ) + { + int32_t i_level; + i_qm = pui16_qmat[ i_y * 8 + i_x ]; + i_level = pi_coeffs[ i_y * i_stride + i_x ]; + if( i_level < 0 ) + { + i_level = -( ( ( ( -i_level + pui16_bias[ i_y * 8 + i_x ] ) * i_qm ) ) >> 16 ); + } + else + { + i_level = ( ( ( i_level + pui16_bias[ i_y * 8 + i_x ] ) * i_qm ) ) >> 16; + } + pi_coeffs[ i_y * i_stride + i_x ] = i_level; + i_nz |= i_level; + } + } + return i_nz; +} + +#define CLAMP256( x ) ( ( x ) < -256 ? -256 : ( ( x ) > 255 ? 255 : ( x ) ) ) + +int32_t y262_quant8x8_intra_fw_mpeg1( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat, uint16_t *pui16_bias ) +{ + int32_t i_y, i_x, i_qm, i_nz; + + i_nz = 0; + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = ( i_y == 0 ) ? 1 : 0; i_x < 8; i_x++ ) + { + int32_t i_level; + i_qm = pui16_qmat[ i_y * 8 + i_x ]; + i_level = pi_coeffs[ i_y * i_stride + i_x ]; + if( i_level < 0 ) + { + i_level = -( ( ( ( -i_level + pui16_bias[ i_y * 8 + i_x ] ) * i_qm ) ) >> 16 ); + } + else + { + i_level = ( ( ( i_level + pui16_bias[ i_y * 8 + i_x ] ) * i_qm ) ) >> 16; + } + pi_coeffs[ i_y * i_stride + i_x ] = CLAMP256( i_level ); + i_nz |= i_level; + } + } + return i_nz; +} + + +int32_t y262_quant8x8_intra_fw( y262_t *ps_y262, int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat, uint16_t *pui16_bias ) +{ + if( !ps_y262->b_sequence_mpeg1 ) + { + return ps_y262->s_funcs.f_quant8x8_intra_fw( pi_coeffs, i_stride, pui16_qmat, pui16_bias ); + } + else + { + return y262_quant8x8_intra_fw_mpeg1( pi_coeffs, i_stride, pui16_qmat, pui16_bias ); + } +} + +#define CLAMP_2047( x ) ( ( x ) < -2048 ? -2048 : ( ( x ) > 2047 ? 2047 : ( x ) ) ) + +void y262_quant8x8_intra_bw_mpeg2( int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, uint8_t *pui8_qmat ) +{ + int32_t i_y, i_x, i_qm, i_qt, i_missmatch_ctrl; + + i_missmatch_ctrl = pi_coeffs[ 0 ] + 1; + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = ( ( i_y == 0 ) ? 1 : 0 ); i_x < 8; i_x++ ) + { + int32_t i_level; + i_level = pi_coeffs[ i_y * i_stride + i_x ]; + + if( i_level != 0 ) + { + i_qm = pui8_qmat[ i_y * 8 + i_x ]; + i_qt = i_qm * i_quantizer * 2; + + i_level = ( i_level * i_qt ) / 32; + + pi_coeffs[ i_y * i_stride + i_x ] = CLAMP_2047( i_level ); + i_missmatch_ctrl += pi_coeffs[ i_y * i_stride + i_x ]; + } + } + } + pi_coeffs[ 7 * i_stride + 7 ] ^= ( int16_t ) ( i_missmatch_ctrl & 1 ); +} + +void y262_quant8x8_intra_bw_mpeg1( int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, uint8_t *pui8_qmat ) +{ + int32_t i_y, i_x, i_qm, i_qt; + + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = ( ( i_y == 0 ) ? 1 : 0 ); i_x < 8; i_x++ ) + { + int32_t i_level; + i_level = pi_coeffs[ i_y * i_stride + i_x ]; + + if( i_level != 0 ) + { + i_qm = pui8_qmat[ i_y * 8 + i_x ]; + i_qt = i_qm * i_quantizer * 2; + + i_level = ( i_level * i_qt ) / 32; + if( i_level < 0 ) + { + i_level = -i_level; + i_level = ( i_level - 1 ) | 1; + i_level = -i_level; + } + else + { + i_level = ( i_level - 1 ) | 1; + } + + pi_coeffs[ i_y * i_stride + i_x ] = CLAMP_2047( i_level ); + } + } + } +} + + +void y262_quant8x8_intra_bw( y262_t *ps_y262, int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, uint8_t *pui8_qmat ) +{ + if( !ps_y262->b_sequence_mpeg1 ) + { + y262_quant8x8_intra_bw_mpeg2( pi_coeffs, i_stride, i_quantizer, pui8_qmat ); + } + else + { + y262_quant8x8_intra_bw_mpeg1( pi_coeffs, i_stride, i_quantizer, pui8_qmat ); + } +} + + + +int32_t y262_quant8x8_inter_fw_mpeg2( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat ) +{ + int32_t i_y, i_x, i_qm, i_nz; + + i_nz = 0; + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + int32_t i_level; + i_qm = pui16_qmat[ i_y * 8 + i_x ]; + i_level = pi_coeffs[ i_y * i_stride + i_x ]; + if( i_level < 0 ) + { + i_level = -( ( -i_level * i_qm ) >> 16 ); + } + else + { + i_level = ( i_level * i_qm ) >> 16; + } + pi_coeffs[ i_y * i_stride + i_x ] = i_level; + i_nz |= i_level; + } + } + return i_nz; +} + +int32_t y262_quant8x8_inter_fw_mpeg1( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat ) +{ + int32_t i_y, i_x, i_qm, i_nz; + + i_nz = 0; + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + int32_t i_level; + i_qm = pui16_qmat[ i_y * 8 + i_x ]; + i_level = pi_coeffs[ i_y * i_stride + i_x ]; + if( i_level < 0 ) + { + i_level = -( ( -i_level * i_qm ) >> 16 ); + } + else + { + i_level = ( i_level * i_qm ) >> 16; + } + pi_coeffs[ i_y * i_stride + i_x ] = CLAMP256( i_level ); + i_nz |= i_level; + } + } + return i_nz; +} + +int32_t y262_quant8x8_inter_fw( y262_t *ps_y262, int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat ) +{ + if( !ps_y262->b_sequence_mpeg1 ) + { + return ps_y262->s_funcs.f_quant8x8_inter_fw( pi_coeffs, i_stride, pui16_qmat ); + } + else + { + return y262_quant8x8_inter_fw_mpeg1( pi_coeffs, i_stride, pui16_qmat ); + } +} + + +#define CLAMP_2047( x ) ( ( x ) < -2048 ? -2048 : ( ( x ) > 2047 ? 2047 : ( x ) ) ) + +void y262_quant8x8_inter_bw_mpeg2( int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, uint8_t *pui8_qmat ) +{ + int32_t i_y, i_x, i_qm, i_qt, i_missmatch_ctrl; + + i_missmatch_ctrl = 1; + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + int32_t i_level; + i_level = pi_coeffs[ i_y * i_stride + i_x ]; + + if( i_level != 0 ) + { + i_qm = pui8_qmat[ i_y * 8 + i_x ]; + i_qt = i_qm * i_quantizer; + + if( i_level > 0 ) + { + i_level = ( ( i_level * 2 + 1 ) * i_qt ) / 32; + } + else if( i_level < 0 ) + { + i_level = ( ( i_level * 2 - 1 ) * i_qt ) / 32; + } + + pi_coeffs[ i_y * i_stride + i_x ] = CLAMP_2047( i_level ); + i_missmatch_ctrl += pi_coeffs[ i_y * i_stride + i_x ]; + } + } + } + pi_coeffs[ 7 * i_stride + 7 ] ^= ( int16_t ) ( i_missmatch_ctrl & 1 ); +} + +void y262_quant8x8_inter_bw_mpeg1( int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, uint8_t *pui8_qmat ) +{ + int32_t i_y, i_x, i_qm, i_qt; + + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + int32_t i_level; + i_level = pi_coeffs[ i_y * i_stride + i_x ]; + + if( i_level != 0 ) + { + i_qm = pui8_qmat[ i_y * 8 + i_x ]; + i_qt = i_qm * i_quantizer; + + if( i_level > 0 ) + { + i_level = ( ( i_level * 2 + 1 ) * i_qt ) / 32; + i_level = ( i_level - 1 ) | 1; + } + else if( i_level < 0 ) + { + i_level = -i_level; + i_level = ( ( i_level * 2 + 1 ) * i_qt ) / 32; + i_level = ( i_level - 1 ) | 1; + i_level = -i_level; + } + + pi_coeffs[ i_y * i_stride + i_x ] = CLAMP_2047( i_level ); + } + } + } +} + +void y262_quant8x8_inter_bw( y262_t *ps_y262, int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, uint8_t *pui8_qmat ) +{ + if( !ps_y262->b_sequence_mpeg1 ) + { + y262_quant8x8_inter_bw_mpeg2( pi_coeffs, i_stride, i_quantizer, pui8_qmat ); + } + else + { + y262_quant8x8_inter_bw_mpeg1( pi_coeffs, i_stride, i_quantizer, pui8_qmat ); + } +} + + + +int32_t y262_size_run_level( int32_t i_run, int32_t i_level ) +{ + int32_t i_ulevel; + i_ulevel = i_level < 0 ? -i_level : i_level; + + if( i_run < 32 && i_ulevel < 41 ) + { + return rgi_y262_run_level_bits_zero[ i_run ][ i_ulevel ]; + } +#if 0 + for( i_idx = 0; rgs_y262_dct_coefficients_table_zero[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgs_y262_dct_coefficients_lookup_table_zero[ i_idx ].i_run == i_run && + rgs_y262_dct_coefficients_lookup_table_zero[ i_idx ].i_level == i_ulevel ) + { + return rgs_y262_dct_coefficients_table_zero[ i_idx ].i_length + 1; /* +1 sign */ + } + } +#endif + return 24; /* 6 escape 6 run 12 level */ +} + + +void y262_quant8x8_trellis_copy_int8( int8_t *pi8_dst, const int8_t *pi8_src, int32_t i_cnt ) +{ + int32_t i_idx; + + for( i_idx = 0; i_idx < i_cnt; i_idx++ ) + { + pi8_dst[ i_idx ] = pi8_src[ i_idx ]; + } +} + +void y262_quant8x8_trellis_copy_int16( int16_t *pi16_dst, const int16_t *pi16_src, int32_t i_cnt ) +{ + int32_t i_idx; + + for( i_idx = 0; i_idx < i_cnt; i_idx++ ) + { + pi16_dst[ i_idx ] = pi16_src[ i_idx ]; + } +} + + +int32_t y262_quant8x8_trellis_fw( y262_t *ps_y262, y262_slice_t *ps_slice, int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, bool_t b_intra ) +{ + int32_t i_dc, i_nz; + int32_t i_coeff, i_start, i_num_coeff, i_last_coeff, i_idx, i_idx2, i_run, i_level; + int16_t rgi16_levels[ 64 ]; + int16_t rgi16_coeffs[ 64 ]; + int8_t rgi8_idx[ 64 ]; + int16_t rgi16_level[ 64 ]; + uint8_t *pui8_qmat; + y262_macroblock_t *ps_mb; + + int32_t i_active_toggle; + int32_t i_candidate_level, i_dir, i_end, i_ssd, i_bits, i_cost, i_lambda; + + ps_mb = &ps_slice->s_macroblock; + + i_lambda = ps_mb->i_lambda; + + i_run = 0; + i_last_coeff = -1; + i_num_coeff = 0; + + for( i_idx = 0; i_idx < 8; i_idx++ ) + { + memcpy( &rgi16_levels[ i_idx * 8 ], pi_coeffs + ( i_idx * i_stride ), sizeof( int16_t ) * 8 ); + } + + if( b_intra ) + { + i_dc = rgi16_levels[ 0 ]; + i_start = 1; + pui8_qmat = ps_y262->rgui8_intra_quantiser_matrix; + y262_quant8x8_intra_fw( ps_y262, rgi16_levels, 8, ps_y262->rgui16_intra_quantizer_matrices[ i_quantizer ], ps_y262->rgui16_intra_quantizer_matrices_trellis_bias[ i_quantizer ] ); + i_nz = 1; + } + else + { + i_dc = 0; + i_start = 0; + pui8_qmat = ps_y262->rgui8_non_intra_quantiser_matrix; + y262_quant8x8_inter_fw( ps_y262, rgi16_levels, 8, ps_y262->rgui16_non_intra_quantizer_matrices[ i_quantizer ] ); + } + + if( i_nz ) + { + for( i_idx = i_start; i_idx < 64; i_idx++ ) + { + i_level = rgi16_levels[ rgui8_y262_scan_0_table[ i_idx ] ]; + + if( i_level != 0 ) + { + rgi16_coeffs[ i_num_coeff ] = pi_coeffs[ rgui8_y262_scan_0_table[ i_idx ] ]; + rgi8_idx[ i_num_coeff ] = i_idx; + rgi16_level[ i_num_coeff ] = i_level; + i_num_coeff++; + i_last_coeff = i_idx; + } + } + } + + for( i_idx = 0; i_idx < 8; i_idx++ ) + { + memset( pi_coeffs + ( i_idx * i_stride ), 0, sizeof( int16_t ) * 8 ); + } + pi_coeffs[ 0 ] = i_dc; + + if( !i_nz ) + { + return i_dc; + } + if( i_last_coeff < 0 ) + { + return i_dc; + } + + memset( &ps_y262->trellis.rgi8_path_active, 0, sizeof( ps_y262->trellis.rgi8_path_active ) ); + memset( &ps_y262->trellis.rgi_path_cost, 0, sizeof( ps_y262->trellis.rgi_path_cost ) ); + + i_active_toggle = 0; + ps_y262->trellis.rgi8_path_active[ i_active_toggle ][ 0 ] = 1; + + for( i_idx = 0; i_idx < i_num_coeff; i_idx++ ) + { + i_level = rgi16_level[ i_idx ]; + + if( i_level > 0 ) + { + i_dir = -1; + } + else + { + i_dir = 1; + } + i_end = i_level + i_dir * 2; + for( i_candidate_level = i_level; i_candidate_level != i_end; i_candidate_level += i_dir ) + { + int32_t i_qm, i_qt, i_x, i_y; + + i_x = rgui8_y262_scan_0_table[ rgi8_idx[ i_idx ] ] % 8; + i_y = rgui8_y262_scan_0_table[ rgi8_idx[ i_idx ] ] / 8; + i_qm = pui8_qmat[ i_y * 8 + i_x ]; + + if( b_intra ) + { + i_qt = i_qm * i_quantizer * 2; + i_coeff = ( i_candidate_level * i_qt ) / 32; + } + else + { + i_qt = i_qm * i_quantizer; + + if( i_candidate_level > 0 ) + { + i_coeff = ( ( i_candidate_level * 2 + 1 ) * i_qt ) / 32; + } + else if( i_candidate_level < 0 ) + { + i_coeff = ( ( i_candidate_level * 2 - 1 ) * i_qt ) / 32; + } + else + { + i_coeff = 0; + } + } + + i_coeff = CLAMP_2047( i_coeff ); + + i_ssd = ( i_coeff - rgi16_coeffs[ i_idx ] ) * ( i_coeff - rgi16_coeffs[ i_idx ] ); + + for( i_idx2 = i_idx; i_idx2 >= 0; i_idx2-- ) + { + if( ps_y262->trellis.rgi8_path_active[ i_active_toggle ][ i_idx2 ] ) + { + if( i_candidate_level != 0 ) + { + int32_t i_run; + if( i_idx2 > 0 ) + { + i_run = rgi8_idx[ i_idx ] - ps_y262->trellis.rgi8_path_idx[ i_idx2 ][ i_idx2 ] - 1; + } + else + { + i_run = rgi8_idx[ i_idx ]; + } + i_bits = y262_size_run_level( i_run, i_candidate_level ); + i_cost = ps_y262->trellis.rgi_path_cost[ i_idx2 ] + ( ( i_bits * i_lambda ) >> Y262_LAMBDA_BITS ) + i_ssd; + + if( !ps_y262->trellis.rgi8_path_active[ !i_active_toggle ][ i_idx2 + 1 ] || i_cost < ps_y262->trellis.rgi_path_cost[ i_idx2 + 1 ] ) + { + y262_quant8x8_trellis_copy_int8( &ps_y262->trellis.rgi8_path_idx[ i_idx2 + 1 ][ 0 ], &ps_y262->trellis.rgi8_path_idx[ i_idx2 ][ 0 ], ( i_idx2 + 1 ) ); + y262_quant8x8_trellis_copy_int16( &ps_y262->trellis.rgi16_path_level[ i_idx2 + 1 ][ 0 ], &ps_y262->trellis.rgi16_path_level[ i_idx2 ][ 0 ], ( i_idx2 + 1 ) ); + ps_y262->trellis.rgi8_path_idx[ i_idx2 + 1 ][ i_idx2 + 1 ] = rgi8_idx[ i_idx ]; + ps_y262->trellis.rgi16_path_level[ i_idx2 + 1 ][ i_idx2 + 1 ] = i_candidate_level; + ps_y262->trellis.rgi8_path_active[ !i_active_toggle ][ i_idx2 + 1 ] = 1; + ps_y262->trellis.rgi_path_cost[ i_idx2 + 1 ] = i_cost; + } + } + else + { + /* last coeff candidate iter, we can overwrite/activate current path */ + i_cost = ps_y262->trellis.rgi_path_cost[ i_idx2 ] + i_ssd; + if( !ps_y262->trellis.rgi8_path_active[ !i_active_toggle ][ i_idx2 ] || i_cost < ps_y262->trellis.rgi_path_cost[ i_idx2 ] ) + { + ps_y262->trellis.rgi8_path_active[ !i_active_toggle ][ i_idx2 ] = 1; + ps_y262->trellis.rgi_path_cost[ i_idx2 ] = i_cost; + } + } + } + } + } + memset( &ps_y262->trellis.rgi8_path_active[ i_active_toggle ][ 0 ], 0, sizeof( int8_t ) * 65 ); + i_active_toggle = i_active_toggle^1; + } + + i_cost = MAX_COST; + i_idx2 = 0; + for( i_idx = 0; i_idx <= i_num_coeff; i_idx++ ) + { + if( ps_y262->trellis.rgi8_path_active[ i_active_toggle ][ i_idx ] && ps_y262->trellis.rgi_path_cost[ i_idx ] < i_cost ) + { + i_idx2 = i_idx; + i_cost = ps_y262->trellis.rgi_path_cost[ i_idx ]; + } + } + for( i_idx = 1; i_idx <= i_idx2; i_idx++ ) + { + int32_t i_x, i_y; + if( ps_y262->trellis.rgi16_path_level[ i_idx2 ][ i_idx ] != rgi16_level[ i_idx - 1 ] ) + { + i_idx = i_idx; + } + i_x = rgui8_y262_scan_0_table[ ps_y262->trellis.rgi8_path_idx[ i_idx2 ][ i_idx ] ] % 8; + i_y = rgui8_y262_scan_0_table[ ps_y262->trellis.rgi8_path_idx[ i_idx2 ][ i_idx ] ] / 8; + pi_coeffs[ i_x + i_y * i_stride ] = ps_y262->trellis.rgi16_path_level[ i_idx2 ][ i_idx ]; + } + return i_idx2 != 0; +} + diff --git a/src/y262/transform.h b/src/y262/transform.h new file mode 100644 index 0000000..bfe9bb6 --- /dev/null +++ b/src/y262/transform.h @@ -0,0 +1,45 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +int32_t y262_quant8x8_intra_fw( y262_t *ps_y262, int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat, uint16_t *pui16_bias ); +void y262_quant8x8_intra_bw( y262_t *ps_y262, int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, uint8_t *pui8_qmat ); + + +int32_t y262_quant8x8_inter_fw( y262_t *ps_y262, int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat ); +void y262_quant8x8_inter_bw( y262_t *ps_y262, int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, uint8_t *pui8_qmat ); + +int32_t y262_quant8x8_intra_fw_mpeg2( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat, uint16_t *pui16_bias ); +int32_t y262_quant8x8_inter_fw_mpeg2( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat ); + +int32_t y262_quant8x8_trellis_fw( y262_t *ps_y262, y262_slice_t *ps_slice, int16_t *pi_coeffs, int32_t i_stride, int32_t i_quantizer, bool_t b_intra ); + +void y262_fdct_c( int16_t *pi16_block, int16_t *pi16_dst ); +void y262_idct_c( int16_t *pi16_block, int16_t *pi16_dst ); + diff --git a/src/y262/transform_x86.asm b/src/y262/transform_x86.asm new file mode 100644 index 0000000..357e169 --- /dev/null +++ b/src/y262/transform_x86.asm @@ -0,0 +1,575 @@ +%if 0 +Copyright (c) 2016, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +%endif + + +%include "x86inc.asm" + +SECTION_RODATA + +; fdct +ALIGN 16 +y262_fdct_tab1: +dw 22724, 19265, 22724, 19265, 22724, 19265, 22724, 19265 +dw 12872, 4520, 12872, 4520, 12872, 4520, 12872, 4520 +dw 19265, -4520, 19265, -4520, 19265, -4520, 19265, -4520 +dw -22724, -12872, -22724, -12872, -22724, -12872, -22724, -12872 +dw 12872, -22724, 12872, -22724, 12872, -22724, 12872, -22724 +dw 4520, 19265, 4520, 19265, 4520, 19265, 4520, 19265 +dw 4520, -12872, 4520, -12872, 4520, -12872, 4520, -12872 +dw 19265, -22724, 19265, -22724, 19265, -22724, 19265, -22724 +dw 21406, 8867, 21406, 8867, 21406, 8867, 21406, 8867 +dw -8867, -21406, -8867, -21406, -8867, -21406, -8867, -21406 +dw 8867, -21406, 8867, -21406, 8867, -21406, 8867, -21406 +dw 21406, -8867, 21406, -8867, 21406, -8867, 21406, -8867 +dw 16383, 16383, 16383, 16383, 16383, 16383, 16383, 16383 +dw 16383, 16383, 16383, 16383, 16383, 16383, 16383, 16383 +dw 16383, -16383, 16383, -16383, 16383, -16383, 16383, -16383 +dw -16383, 16383, -16383, 16383, -16383, 16383, -16383, 16383 + +ALIGN 16 +y262_fdct_rnd1: +dd 1024, 1024, 1024, 1024 + +ALIGN 16 +y262_fdct_tab2: +dw 16385, 16385, 22726, 19266, -8867, -21408, -22726, -12873 +dw 16385, 16385, 12873, 4521, 21408, 8867, 19266, -4521 +dw 16385, -16385, 12873, -22726, 21408, -8867, 19266, -22726 +dw -16385, 16385, 4521, 19266, 8867, -21408, 4521, -12873 +dw 16385, 22726, 21408, 19266, 16385, 12873, 8867, 4521 +dw 16385, 19266, 8867, -4521, -16385, -22726, -21408, -12873 +dw 16385, 12873, -8867, -22726, -16385, 4521, 21408, 19266 +dw 16385, 4521, -21408, -12873, 16385, 19266, -8867, -22726 + +ALIGN16 +y262_fdct_rnd2: +dd 524288, 524288, 524288, 524288 + +; idct + +ALIGN 16 +y262_idct_tab1: +dw 22724, 19265, 22724, 19265, 22724, 19265, 22724, 19265 +dw 12872, 4520, 12872, 4520, 12872, 4520, 12872, 4520 +dw 19265, -4520, 19265, -4520, 19265, -4520, 19265, -4520 +dw -22724, -12872, -22724, -12872, -22724, -12872, -22724, -12872 +dw 12872, -22724, 12872, -22724, 12872, -22724, 12872, -22724 +dw 4520, 19265, 4520, 19265, 4520, 19265, 4520, 19265 +dw 4520, -12872, 4520, -12872, 4520, -12872, 4520, -12872 +dw 19265, -22724, 19265, -22724, 19265, -22724, 19265, -22724 +dw 21406, 8867, 21406, 8867, 21406, 8867, 21406, 8867 +dw 16383, 16383, 16383, 16383, 16383, 16383, 16383, 16383 +dw 8867, -21406, 8867, -21406, 8867, -21406, 8867, -21406 +dw 16383, -16383, 16383, -16383, 16383, -16383, 16383, -16383 + +ALIGN 16 +y262_idct_rnd1: +dd 1024, 1024, 1024, 1024 + +ALIGN 16 +y262_idct_tab2: +dw 16385, 21408, 16385, 8867, 16385, -8867, 16385, -21408 +dw 16385, 8867, -16385, -21408, -16385, 21408, 16385, -8867 +dw 22726, 19266, 19266, -4521, 12873, -22726, 4521, -12873 +dw 12873, 4521, -22726, -12873, 4521, 19266, 19266, -22726 + +ALIGN 16 +y262_idct_rnd2: +dd 524288, 524288, 524288, 524288 + + +; quant + +ALIGN 16 +minus_1 : dd 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff +g_127 : dw 127, 127, 127, 127, 127, 127, 127, 127 +g_n127 : dw -127, -127, -127, -127, -127, -127, -127, -127 +g_2047 : dw 2047, 2047, 2047, 2047, 2047, 2047, 2047, 2047 +g_m2048 : dw -2048, -2048, -2048, -2048, -2048, -2048, -2048, -2048 + +SECTION .text + + +INIT_XMM +;void y262_fdct_sse2( short *block, short *dst ) +cglobal y262_fdct_sse2, 2, 5, 8 + lea r2, [ y262_fdct_tab1 ] + movdqu m1, [ r0 ] + movdqu m7, [ r0 + 112 ] + movdqa m6, m1 + psubsw m1, m7 + paddsw m6, m7 + movdqu [ r1 ], m6 + movdqu m2, [ r0 + 16 ] + movdqu m7, [ r0 + 96 ] + movdqa m6, m2 + psubsw m2, m7 + paddsw m6, m7 + movdqu m3, [ r0 + 32 ] + movdqu [ r1 + 32 ], m6 + movdqu m7, [ r0 + 80 ] + movdqa m6, m3 + psubsw m3, m7 + paddsw m6, m7 + movdqu m4, [ r0 + 48 ] + movdqu m7, [ r0 + 64 ] + movdqu [ r1 + 64 ], m6 + movdqa m6, m4 + psubsw m4, m7 + paddsw m6, m7 + movdqu [ r1 + 96 ], m6 + + movdqa m0, m1 + punpcklwd m0, m2 + punpckhwd m1, m2 + movdqa m2, m3 + punpcklwd m2, m4 + punpckhwd m3, m4 + + movdqa m6, m0 + movdqa m7, m1 + pmaddwd m6, [ r2 ] + pmaddwd m7, [ r2 ] + movdqa m4, m2 + movdqa m5, m3 + pmaddwd m4, [ r2 + 16 ] + pmaddwd m5, [ r2 + 16 ] + paddd m6, m4 + paddd m7, m5 + paddd m6, [ y262_fdct_rnd1 ] + paddd m7, [ y262_fdct_rnd1 ] + psrad m6, 11 + psrad m7, 11 + packssdw m6, m7 + movdqu [ r1 + 16 ], m6 + add r2, 32 + movdqa m6, m0 + movdqa m7, m1 + pmaddwd m6, [ r2 ] + pmaddwd m7, [ r2 ] + movdqa m4, m2 + movdqa m5, m3 + pmaddwd m4, [ r2 + 16 ] + pmaddwd m5, [ r2 + 16 ] + paddd m6, m4 + paddd m7, m5 + paddd m6, [ y262_fdct_rnd1 ] + paddd m7, [ y262_fdct_rnd1 ] + psrad m6, 11 + psrad m7, 11 + packssdw m6, m7 + movdqu [ r1 + 48 ], m6 + add r2, 32 + movdqa m6, m0 + movdqa m7, m1 + pmaddwd m6, [ r2 ] + pmaddwd m7, [ r2 ] + movdqa m4, m2 + movdqa m5, m3 + pmaddwd m4, [ r2 + 16 ] + pmaddwd m5, [ r2 + 16 ] + paddd m6, m4 + paddd m7, m5 + paddd m6, [ y262_fdct_rnd1 ] + paddd m7, [ y262_fdct_rnd1 ] + psrad m6, 11 + psrad m7, 11 + packssdw m6, m7 + movdqu [ r1 + 80 ], m6 + add r2, 32 + pmaddwd m0, [ r2 ] + pmaddwd m1, [ r2 ] + pmaddwd m2, [ r2 + 16 ] + pmaddwd m3, [ r2 + 16 ] + paddd m0, m2 + paddd m1, m3 + paddd m0, [ y262_fdct_rnd1 ] + paddd m1, [ y262_fdct_rnd1 ] + psrad m0, 11 + psrad m1, 11 + packssdw m0, m1 + movdqu [ r1 + 112 ], m0 + add r2, 32 + + + movdqu m1, [ r1 ] + movdqu m7, [ r1 + 96 ] + movdqa m6, m1 + psubsw m1, m7 + paddsw m6, m7 + movdqu [ r1 ], m6 + movdqu m2, [ r1 + 32 ] + movdqu m7, [ r1 + 64 ] + movdqa m6, m2 + psubsw m2, m7 + paddsw m6, m7 + movdqu [ r1 + 64 ], m6 + + movdqa m0, m1 + punpcklwd m0, m2 + punpckhwd m1, m2 + + movdqa m6, m0 + movdqa m7, m1 + pmaddwd m6, [ r2 ] + pmaddwd m7, [ r2 ] + paddd m6, [ y262_fdct_rnd1 ] + paddd m7, [ y262_fdct_rnd1 ] + psrad m6, 11 + psrad m7, 11 + packssdw m6, m7 + movdqu [ r1 + 32 ], m6 + add r2, 32 + movdqa m6, m0 + movdqa m7, m1 + pmaddwd m6, [ r2 ] + pmaddwd m7, [ r2 ] + paddd m6, [ y262_fdct_rnd1 ] + paddd m7, [ y262_fdct_rnd1 ] + psrad m6, 11 + psrad m7, 11 + packssdw m6, m7 + movdqu [ r1 + 96 ], m6 + add r2, 32 + + + movdqu m0, [ r1 ] + movdqu m2, [ r1 + 64 ] + movdqa m1, m0 + punpcklwd m0, m2 + punpckhwd m1, m2 + + movdqa m6, m0 + movdqa m7, m1 + pmaddwd m6, [ r2 ] + pmaddwd m7, [ r2 ] + paddd m6, [ y262_fdct_rnd1 ] + paddd m7, [ y262_fdct_rnd1 ] + psrad m6, 11 + psrad m7, 11 + packssdw m6, m7 + movdqu [ r1 + 0 ], m6 + add r2, 32 + movdqa m6, m0 + movdqa m7, m1 + pmaddwd m6, [ r2 ] + pmaddwd m7, [ r2 ] + paddd m6, [ y262_fdct_rnd1 ] + paddd m7, [ y262_fdct_rnd1 ] + psrad m6, 11 + psrad m7, 11 + packssdw m6, m7 + movdqu [ r1 + 64 ], m6 + + mov r3, 8 + lea r2, [ y262_fdct_tab2 ] +.y262_fdct_sse2_rowloop: + movq m0, [ r1 ] + movq m2, [ r1 + 8 ] + movdqa m1, m0 + pshuflw m2, m2, 0x1b + psubsw m1, m2 + paddsw m0, m2 + punpckldq m0, m1 + pshufd m1, m0, 0x4e + movdqa m2, [ r2 ] + movdqa m3, [ r2 + 16 ] + movdqa m4, [ r2 + 32 ] + movdqa m5, [ r2 + 48 ] + pmaddwd m2, m0 + pmaddwd m3, m1 + pmaddwd m4, m0 + pmaddwd m5, m1 + paddd m2, m3 + paddd m4, m5 + paddd m2, [ y262_fdct_rnd2 ] + paddd m4, [ y262_fdct_rnd2 ] + psrad m2, 20 + psrad m4, 20 + packssdw m2, m4 + movdqu [ r1 ], m2 + add r1, 16 + + sub r3, 1 + jnz .y262_fdct_sse2_rowloop + + RET + + + +INIT_XMM +; void y262_idct_sse2( int16_t *pi16_src, int16_t *pi_dst ) +cglobal y262_idct_sse2, 2, 5, 8 + lea r2, [ y262_idct_tab1 ] + mov r3, 2 +y262_idct_sse2_loop_v: + movq m0, [ r0 + 16 ] + movq m2, [ r0 + 48 ] + movq m1, [ r0 + 80 ] + movq m3, [ r0 + 112 ] + punpcklwd m0, m2 + punpcklwd m1, m3 + + movdqa m2, [ r2 ] + movdqa m7, [ r2 + 16 ] + pmaddwd m2, m0 + pmaddwd m7, m1 + paddd m2, m7 + + movdqa m3, [ r2 + 32 ] + movdqa m7, [ r2 + 48 ] + pmaddwd m3, m0 + pmaddwd m7, m1 + paddd m3, m7 + + movdqa m4, [ r2 + 64 ] + movdqa m7, [ r2 + 80 ] + pmaddwd m4, m0 + pmaddwd m7, m1 + paddd m4, m7 + + movdqa m5, [ r2 + 96 ] + movdqa m7, [ r2 + 112 ] + pmaddwd m5, m0 + pmaddwd m7, m1 + paddd m5, m7 + + movq m6, [ r0 + 32 ] + movq m0, [ r0 + 96 ] + punpcklwd m6, m0 + pmaddwd m6, [ r2 + 128 ] + + movq m7, [ r0 + 0 ] + movq m0, [ r0 + 64 ] + punpcklwd m7, m0 + pmaddwd m7, [ r2 + 144 ] + + movdqa m0, m6 + paddd m0, m7 + psubd m7, m6 + + movdqa m1, m2 + paddd m1, m0 + psubd m0, m2 + paddd m0, [ y262_idct_rnd1 ] + paddd m1, [ y262_idct_rnd1 ] + psrad m1, 11 + psrad m0, 11 + packssdw m1, m1 + packssdw m0, m0 + movq [ r1 + 112 ], m0 + + movdqa m2, m5 + paddd m2, m7 + psubd m7, m5 + paddd m2, [ y262_idct_rnd1 ] + paddd m7, [ y262_idct_rnd1 ] + psrad m2, 11 + psrad m7, 11 + packssdw m2, m2 + movq [ r1 + 48 ], m2 + packssdw m7, m7 + + movq m6, [ r0 + 32 ] + movq m0, [ r0 + 96 ] + punpcklwd m6, m0 + pmaddwd m6, [ r2 + 160 ] + + movq m2, [ r0 + 0 ] + movq m0, [ r0 + 64 ] + movq [ r1 ], m1 + movq [ r1 + 64 ], m7 + punpcklwd m2, m0 + pmaddwd m2, [ r2 + 176 ] + + movdqa m0, m6 + paddd m0, m2 + psubd m2, m6 + + movdqa m7, m3 + paddd m7, m0 + psubd m0, m3 + paddd m7, [ y262_idct_rnd1 ] + paddd m0, [ y262_idct_rnd1 ] + psrad m7, 11 + psrad m0, 11 + packssdw m7, m7 + packssdw m0, m0 + movq [ r1 + 16 ], m7 + movq [ r1 + 96 ], m0 + + movdqa m1, m4 + paddd m1, m2 + psubd m2, m4 + paddd m1, [ y262_idct_rnd1 ] + paddd m2, [ y262_idct_rnd1 ] + psrad m1, 11 + psrad m2, 11 + packssdw m1, m1 + movq [ r1 + 32 ], m1 + packssdw m2, m2 + movq [ r1 + 80 ], m2 + + add r0, 8 + add r1, 8 + sub r3, 1 + jnz y262_idct_sse2_loop_v + + sub r1, 16 + lea r2, [ y262_idct_tab2 ] + mov r3, 8 +.y262_idct_sse2_loop_h: + movdqu m7, [ r1 ] + pshuflw m0, m7, 0x88 + punpckldq m0, m0 + pshufhw m1, m7, 0x88 + punpckhdq m1, m1 + + pshuflw m2, m7, 0xdd + punpckldq m2, m2 + pshufhw m3, m7, 0xdd + punpckhdq m3, m3 + + pmaddwd m0, [ r2 ] + pmaddwd m1, [ r2 + 16 ] + pmaddwd m2, [ r2 + 32 ] + pmaddwd m3, [ r2 + 48 ] + + paddd m0, m1 + paddd m2, m3 + movdqa m1, m0 + paddd m0, m2 + psubd m1, m2 + pshufd m1, m1, 0x1b + + paddd m0, [ y262_idct_rnd2 ] + paddd m1, [ y262_idct_rnd2 ] + psrad m0, 20 + psrad m1, 20 + packssdw m0, m1 + + movdqu [ r1 ], m0 + add r1, 16 + sub r3, 1 + jnz .y262_idct_sse2_loop_h + + RET + + + + + + + + +; int32_t y262_quant8x8_intra_fw_sse2( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat, uint16_t *pui16_bias ) +INIT_XMM +cglobal y262_quant8x8_intra_fw_sse2, 4, 6, 5 +%ifdef ARCH_X86_64 + movsxd r1, r1d +%endif + pxor xmm3, xmm3 + shl r1, 1 + mov r5, r0 + mov r4w, [ r5 ] +%rep 8 + movdqu xmm0, [ r0 ] + movdqu xmm2, [ r2 ] + movdqu xmm4, [ r3 ] + + pxor xmm1, xmm1 + pcmpgtw xmm1, xmm0 + pxor xmm0, xmm1 + psubw xmm0, xmm1 + paddusw xmm0, xmm4 + pmulhuw xmm0, xmm2 + pxor xmm0, xmm1 + psubw xmm0, xmm1 + + pminsw xmm0, [ g_2047 ] + pmaxsw xmm0, [ g_m2048 ] + + por xmm3, xmm0 + movdqu [ r0 ], xmm0 + + add r0, r1 + add r2, 16 + add r3, 16 +%endrep + + movdqa xmm0, [ minus_1 ] + pxor xmm1, xmm1 + pcmpeqb xmm3, xmm1 + pxor xmm3, xmm0 + + mov [ r5 ], r4w + pmovmskb eax, xmm3 + + RET + +; int32_t y262_quant8x8_inter_fw_sse2( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat ) +INIT_XMM +cglobal y262_quant8x8_inter_fw_sse2, 3, 3, 4 +%ifdef ARCH_X86_64 + movsxd r1, r1d +%endif + pxor xmm3, xmm3 + shl r1, 1 +%rep 8 + movdqu xmm0, [ r0 ] + movdqu xmm2, [ r2 ] + + pxor xmm1, xmm1 + pcmpgtw xmm1, xmm0 + pxor xmm0, xmm1 + psubw xmm0, xmm1 + pmulhuw xmm0, xmm2 + pxor xmm0, xmm1 + psubw xmm0, xmm1 + + pminsw xmm0, [ g_2047 ] + pmaxsw xmm0, [ g_m2048 ] + + por xmm3, xmm0 + movdqu [ r0 ], xmm0 + + add r0, r1 + add r2, 16 +%endrep + + movdqa xmm0, [ minus_1 ] + pxor xmm1, xmm1 + pcmpeqb xmm3, xmm1 + pxor xmm3, xmm0 + + pmovmskb eax, xmm3 + + RET + diff --git a/src/y262/transform_x86.h b/src/y262/transform_x86.h new file mode 100644 index 0000000..49009ea --- /dev/null +++ b/src/y262/transform_x86.h @@ -0,0 +1,35 @@ +/* +Copyright (c) 2013,2016, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +void y262_fdct_sse2( int16_t *pi16_block, int16_t *pi16_dst ); +void y262_idct_sse2( int16_t *pi16_src, int16_t *pi_dst ); + +int32_t y262_quant8x8_intra_fw_sse2( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat, uint16_t *pui16_bias ); +int32_t y262_quant8x8_inter_fw_sse2( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat ); diff --git a/src/y262/types.h b/src/y262/types.h new file mode 100644 index 0000000..61721f1 --- /dev/null +++ b/src/y262/types.h @@ -0,0 +1,778 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +typedef uint32_t bool_t; + +#define TRUE 1 +#define FALSE 0 + +/* main@high max vbv size is 9781248, +3 byte next startcode */ +#define MAX_ELEMENTARY_STREAM_BUFFER ( ( 9781248 / 8 ) + 3 ) + +/* 1920 / 16 */ +#define MAX_MACROBLOCKS_PER_ROW 120 +/* 1152 / 16 */ +#define MAX_MACROBLOCK_COLUMN_HEIGHT 72 + +#define MAX_MACROBLOCKS_PER_PICTURE ( MAX_MACROBLOCKS_PER_ROW * MAX_MACROBLOCK_COLUMN_HEIGHT ) +#define MAX_SLICES_PER_PICTURE ( MAX_MACROBLOCKS_PER_PICTURE ) + +#define MACROBLOCK_SIZE 16 + +/* startcodes */ +#define STARTCODE_PICTURE 0x00 +#define STARTCODE_SLICE_START 0x01 +#define STARTCODE_SLICE_END 0xAF +#define STARTCODE_USER_DATA 0xB2 +#define STARTCODE_SEQUENCE_HEADER 0xB3 +#define STARTCODE_SEQUENCE_ERROR 0xB4 +#define STARTCODE_EXTENSION 0xB5 +#define STARTCODE_SEQUENCE_END 0xB7 +#define STARTCODE_GROUP 0xB8 +#define STARTCODE_STUFFING 0x100 /* not a valid start code */ + +/* y262 bitstream layer types */ + +#define H262_STARTCODE_PREFIX 1 +#define H262_STARTCODE_PREFIX_LENGTH 24 +#define H262_STARTCODE_COMPLETE_BYTE_LENGTH 4 + +#define H262_CHROMA_FORMAT_420 1 +#define H262_CHROMA_FORMAT_422 2 +#define H262_CHROMA_FORMAT_444 3 + +#define MC_BLOCK_16x16 0 +#define MC_BLOCK_16x8 1 +#define MC_BLOCK_8x16 2 +#define MC_BLOCK_8x8 3 +#define MC_BLOCK_8x4 4 + +#define MC_BLOCK_00 0 +#define MC_BLOCK_01 1 +#define MC_BLOCK_10 2 +#define MC_BLOCK_11 3 + +#define MAX_COST ( 0x7fffffff ) + +typedef struct { + uint8_t *pui8_bitstream; + int32_t i_length; + + int32_t i_byte_count; + int32_t i_next_bit; + + uint8_t *pui8_codeword_ptr; + uint32_t ui_codeword; + int32_t i_codeword_fill; + +} y262_bitstream_t; + +/* y262 video sequence layer types */ + +typedef struct { + int32_t i_horizontal_size; /* 12 bit, in pixels */ + int32_t i_vertical_size; /* 12 bit, in pixels */ + + int32_t i_aspect_ratio_information; /* 4 bit */ + int32_t i_frame_rate_code; /* 4 bit */ + int32_t i_bit_rate_value; /* 18 bit */ + bool_t b_marker_bit; /* 1 bit */ + int32_t i_vbv_buffer_size_value; /* 10 bit */ + bool_t b_constrained_parameters_flag; /* 1 bit */ + + bool_t b_load_intra_quantiser_matrix; /* 1 bit */ + uint8_t rgui8_intra_quantiser_matrix[64]; /* only present if load_intra_quantiser_matrix is 1 */ + + bool_t b_load_non_intra_quantiser_matrix; /* 1 bit */ + uint8_t rgui8_non_intra_quantiser_matrix[64]; /* only present if load_non_intra_quantiser_matrix 1 */ + +} y262_sequence_header_t; + + +#define H262_SEQUENCE_EXTENSION_ID 1 +typedef struct { + int32_t i_profile_and_level_indication; /* 8 bit */ + bool_t b_progressive_sequence; /* 1 bit */ + + int32_t i_chroma_format; /* 2 bit */ + int32_t i_horizontal_size_extension; /* 2 bit */ + int32_t i_vertical_size_extension; /* 2 bit */ + int32_t i_bit_rate_extension; /* 12 bit */ + bool_t b_marker_bit; /* 1 bit */ + int32_t i_vbv_buffer_size_extension; /* 8 bit */ + bool_t b_low_delay; /* 1 bit */ + int32_t i_frame_rate_extension_n; /* 2 bit */ + int32_t i_frame_rate_extension_d; /* 5 bit */ +} y262_sequence_extension_t; + + +#define H262_SEQUENCE_DISPLAY_EXTENSION_ID 2 +typedef struct { + int32_t i_video_format; /* 3 bit */ + bool_t b_colour_description; /* 1 bit */ + + struct { + int32_t i_colour_primaries; /* 8 bit */ + int32_t i_transfer_characteristics; /* 8 bit */ + int32_t i_matrix_coefficients; /* 8 bit */ + } s_colour_description; /* only present when colour_description is 1 */ + + int32_t i_display_horizontal_size; /* 14 bit */ + bool_t b_marker_bit; /* 1 bit */ + int32_t i_display_vertical_size; /* 14 bit */ + +} y262_sequence_display_extension_t; + + +typedef struct +{ + int32_t i_time_code; /* 25 bit */ + bool_t b_closed_gop; /* 1 bit */ + bool_t b_broken_link; /* 1 bit */ +} y262_group_of_pictures_header_t; + + +#define PICTURE_CODING_TYPE_I 1 +#define PICTURE_CODING_TYPE_P 2 +#define PICTURE_CODING_TYPE_B 3 + +typedef struct +{ + int32_t i_temporal_reference; /* 10 bit */ + int32_t i_picture_coding_type; /* 3 bit */ + int32_t i_vbv_delay; /* 16 bit */ + bool_t b_full_pel_forward_vector; /* 1 bit, picture coding type == 2 || 3 */ + int32_t i_forward_f_code; /* 3 bit, picture coding type == 2 || 3 */ + bool_t b_full_pel_backward_vector; /* 1 bit, picture coding type == 3 */ + int32_t i_backward_f_code; /* 3 bit, picture coding type == 3 */ + bool_t b_extra_bit_picture; /* 1 bit */ +#define MAX_EXTRA_INFORMATION_PICTURE 0x20 + uint8_t rgui8_extra_information_picture[ MAX_EXTRA_INFORMATION_PICTURE ]; + +} y262_picture_header_t; + +#define H262_PICTURE_CODING_EXTENSION_ID 8 + +#define PICTURE_CODING_FORWARD 0 +#define PICTURE_CODING_BACKWARD 1 +#define PICTURE_CODING_HORIZONTAL 0 +#define PICTURE_CODING_VERTICAL 1 + +#define PICTURE_CODING_STRUCTURE_TOP 1 +#define PICTURE_CODING_STRUCTURE_BOTTOM 2 +#define PICTURE_CODING_STRUCTURE_FRAME 3 + + +typedef struct { + int32_t rgi_f_code[2][2]; /* 4 bits each */ + int32_t i_intra_dc_precision; /* 2 bit */ + int32_t i_picture_structure; /* 2 bit */ + bool_t b_top_field_first; /* 1 bit */ + bool_t b_frame_pred_frame_dct; /* 1 bit */ + bool_t b_concealment_motion_vectors; /* 1 bit */ + bool_t b_q_scale_type; /* 1 bit */ + bool_t b_intra_vlc_format; /* 1 bit */ + bool_t b_alternate_scan; /* 1 bit */ + bool_t b_repeat_first_field; /* 1 bit */ + bool_t b_chroma_420_type; /* 1 bit */ + bool_t b_progressive_frame; /* 1 bit */ + + bool_t b_composite_display_flag; /* 1 bit */ + + struct { + bool_t b_v_axis; /* 1 bit */ + int32_t i_field_sequence; /* 3 bit */ + bool_t b_sub_carrier; /* 1 bit */ + int32_t i_burst_amplitude; /* 7 bit */ + int32_t i_sub_carrier_phase; /* 8 bit */ + } s_composite_display; /* only present when composite_display_flag is 1 */ + +} y262_picture_coding_extension_t; + + +#define H262_QUANT_MATRIX_EXTENSION_ID 3 +#if 0 +typedef struct { + bool_t b_load_intra_quantiser_matrix; /* 1 bit */ + uint8_t rgui8_intra_quantiser_matrix[64]; /* 64 x 8 bit */ + + bool_t b_load_non_intra_quantiser_matrix; /* 1 bit */ + uint8_t rgui8_non_intra_quantiser_matrix[64]; /* 64 x 8 bit */ + + bool_t b_load_chroma_intra_quantiser_matrix; /* 1 bit */ + uint8_t rgui8_chroma_intra_quantiser_matrix[64]; /* 64 x 8 bit */ + + bool_t b_load_chroma_non_intra_quantiser_matrix; /* 1 bit */ + uint8_t rgui8_chroma_non_intra_quantiser_matrix[64]; /* 64 x 8 bit */ + +} y262_quant_matrix_extension_t; +#endif + +#define H262_PICTURE_DISPLAY_EXTENSION_ID 7 +#define PICTURE_DISPLAY_EXTENSION_MAX_FRAME_CENTRE_OFFSETS 3 + +typedef struct { + int32_t i_num_frame_centre_offsets; + struct { + int32_t i_frame_centre_horizontal_offset; /* 16 bit */ + bool_t b_marker_bit_1; /* 1 bit */ + int32_t i_frame_centre_vertical_offset; /* 16 bit */ + bool_t b_marker_bit_2; /* 1 bit */ + } rgs_frame_centre_offsets[3]; +} y262_picture_display_extension_t; + + +#define H262_COPYRIGHT_EXTENSION_ID 4 +typedef struct { + bool_t b_copyright_flag; /* 1 bit */ + int32_t i_copyright_identifier; /* 8 bit */ + bool_t b_original_or_copy; /* 1 bit */ + int32_t i_reserved; /* 7 bit */ + bool_t b_marker_bit_1; /* 1 bit start code emulation prevention */ + int32_t i_copyright_number_1; /* 20 bit */ + bool_t b_marker_bit_2; /* 1 bit start code emulation prevention */ + int32_t i_copyright_number_2; /* 22 bit */ + bool_t b_marker_bit_3; /* 1 bit */ + int32_t i_copyright_number_3; /* 22 bit */ +} y262_copyright_extension_t; + + +typedef struct { + int32_t rgi_mvs[ 2 ][ 2 ]; + int32_t rgi_mv_costs[ 2 ]; + int32_t i_intra_cost; + int32_t i_best_cost; + int32_t i_quantizer_scale; + int32_t i_quantizer_aq_scale; +#define LOOKAHEAD_MODE_INTRA 0 +#define LOOKAHEAD_MODE_INTER_FW 1 +#define LOOKAHEAD_MODE_INTER_BW 2 + int32_t i_best_mode; +} y262_lookahead_mb_t; + + +typedef struct +{ + int32_t i_where; + int32_t i_len; + uint8_t rgui8_user_data[ Y262_MAX_USER_DATA_SIZE ]; +} y262_user_data_t; + +typedef struct { + bool_t b_used; + uint8_t *pui8_luma; + uint8_t *pui8_cb; + uint8_t *pui8_cr; + int32_t i_pon; + int32_t i_temporal_reference; + int32_t i_vbv_delay; + int32_t i_don; + int32_t i_frame_type; + int32_t b_force_new_gop; + int32_t b_progressive_frame; + int32_t b_top_field_first; + int32_t b_repeat_first_field; + int32_t b_backward_pred_only; + + y262_lookahead_mb_t *ps_lookahead; + int32_t i_forward_pon; + int32_t i_backward_pon; + int32_t i_frame_cost; + int32_t i_frame_intra_cost; +#define MAX_BITRATE_CONTROL_LOOKAHEAD_PICTURES 40 + int32_t i_num_lookahead_pictures; + int32_t rgi_lookahead_picture_types[ MAX_BITRATE_CONTROL_LOOKAHEAD_PICTURES ]; + int32_t rgi_lookahead_picture_costs[ MAX_BITRATE_CONTROL_LOOKAHEAD_PICTURES ]; + + int32_t i_num_user_data; +#define MAX_NUM_USER_DATA 4 + y262_user_data_t *rgps_user_data[ MAX_NUM_USER_DATA ]; +} y262_picture_t; + +typedef struct { + uint8_t *pui8_luma; + int32_t i_stride_luma; + uint8_t *pui8_cb; + uint8_t *pui8_cr; + int32_t i_stride_chroma; +} y262_reference_picture_t; + +typedef struct { +#define MECALL_LOOKAHEAD 0 +#define MECALL_MAIN 1 + int32_t i_me_call; + int32_t i_num_candidates_fp; + int32_t rgi_candidates_fp[ 10 ][ 2 ]; + int32_t i_lambda; + int32_t i_pred_mv_x; + int32_t i_pred_mv_y; + + uint8_t *pui8_blk; + int32_t i_blk_stride; +#define BLOCK_TYPE_16x16 0 +#define BLOCK_TYPE_16x8 1 + int32_t i_blk_type; + + int32_t i_x_offset; + int32_t i_y_offset; + int32_t i_min_mv_x, i_min_mv_y; + int32_t i_max_mv_x, i_max_mv_y; + + int32_t i_ref_width; + int32_t i_ref_height; + uint8_t *pui8_ref; + int32_t i_ref_stride; + + int32_t i_best_mv_x, i_best_mv_y; + int32_t i_best_mv_sad; + int32_t i_best_mv_cost; +} y262_me_context_t; + +#define VLC_SENTINEL -1 + +typedef struct { + int32_t i_code; + int32_t i_length; +} y262_vlc_t; + +#define RUN_LEVEL_END_OF_BLOCK -1 +#define RUN_LEVEL_ESCAPE -2 +#define RUN_LEVEL_INVALID -3 + +typedef struct { + int32_t i_run; + int32_t i_level; +} y262_run_level_t; + +#define Y262_MBMODE_SKIP 0 +#define Y262_MBMODE_FW 1 +#define Y262_MBMODE_BW 2 +#define Y262_MBMODE_BI 3 +#define Y262_MBMODE_INTRA 4 +#define Y262_MBMODE_FW_IL 5 +#define Y262_MBMODE_BW_IL 6 +#define Y262_MBMODE_BI_IL 7 +#define Y262_MBMODE_INTRA_IL 8 + +typedef struct { + int32_t i_x; + int32_t i_y; +#define Y262_MV_FRAME_FIELD 0 +#define Y262_MV_TOP_FIELD 0 +#define Y262_MV_BOTTOM_FIELD 1 + int32_t i_field; + int32_t i_cost; +} y262_mv_t; + +typedef struct { + int32_t i_skip_cost; + + int32_t i_fw_cost; + y262_mv_t s_fw_mv; + + int32_t i_bw_cost; + y262_mv_t s_bw_mv; + + int32_t i_bi_cost; + y262_mv_t s_bi_mv[ 2 ]; + + int32_t i_fw_il_cost; + y262_mv_t s_fw_il_mv[ 2 ]; + + int32_t i_bw_il_cost; + y262_mv_t s_bw_il_mv[ 2 ]; + + int32_t i_bi_il_cost; + y262_mv_t s_bi_il_mv[ 2 ][ 2 ]; + + int32_t i_intra_cost; + int32_t i_intra_il_cost; +} y262_mode_decision_t; + + +typedef struct { + int32_t i_mb_x, i_mb_y; + uint8_t *pui8_src_luma; + int32_t i_src_luma_stride; + uint8_t *pui8_src_cb; + uint8_t *pui8_src_cr; + int32_t i_src_chroma_stride; + + uint8_t *pui8_dst_luma; + int32_t i_dst_luma_stride; + uint8_t *pui8_dst_cb; + uint8_t *pui8_dst_cr; + int32_t i_dst_chroma_stride; + + int32_t i_mb_addr; + int32_t i_quantizer; + int32_t i_scaled_quantizer; + +#define MACROBLOCK_QUANT 1 +#define MACROBLOCK_MOTION_FORWARD 2 +#define MACROBLOCK_MOTION_BACKWARD 4 +#define MACROBLOCK_PATTERN 8 +#define MACROBLOCK_INTRA 16 +#define MACROBLOCK_INTERLACED 32 +#define MACROBLOCK_MOTION_TYPE 64 +#define FRAME_MOTION_TYPE_FIELD 1 +#define FRAME_MOTION_TYPE_FRAME 2 +#define FRAME_MOTION_TYPE_DUAL_PRIME 3 +#define FIELD_MOTION_TYPE_FIELD 1 +#define FIELD_MOTION_TYPE_16x8 2 +#define FIELD_MOTION_TYPE_DUAL_PRIME 3 + int32_t i_macroblock_type; + + ALIGNED( 16 ) uint8_t rgui8_prediction[ 3 ][ 4 ][ 8 * 8 ]; + ALIGNED( 16 ) int16_t rgi16_residual[ 3 ][ 4 ][ 8 * 8 ]; + + bool_t rgb_cbp[ 3 ][ 4 ]; + int32_t i_cbp; + ALIGNED( 16 ) int16_t rgi16_coeffs[ 3 ][ 4 ][ 8 * 8 ]; + y262_mv_t rgs_motion[ 2 ][ 2 ]; + +#define Y262_LAMBDA_BITS 6 + int32_t i_lambda; + int32_t i_lambda_sqr; +} y262_macroblock_t; + +typedef struct { + + struct { + int32_t i_slice_vertical_position; + int32_t i_mb_row; + int32_t i_quantizer_scale_code; + bool_t b_intra_slice_flag; + bool_t b_intra_slice; + } s_slice_header; + + int32_t i_quantizer_f8; + /* coding */ + y262_macroblock_t s_macroblock; + y262_mode_decision_t s_mode_decision; + int32_t i_quantizer; + int32_t i_picture_type; + int32_t i_start_mb_addr; + int32_t i_end_mb_addr; + int32_t i_skip_run; + int32_t i_mb_addr; + int32_t rgi_dc_dct_pred[ 3 ]; + int32_t rgi_pmv[ 2 ][ 2 ][ 2 ]; + bool_t b_allow_skip; + int32_t i_last_mb_motion_flags; + +} y262_slice_t; + + +typedef int32_t ( *y262_costfunction_f ) ( uint8_t *pui8_blk1, int32_t i_stride1, uint8_t *pui8_blk2, int32_t i_stride2 ); +typedef void ( *y262_motcomp_f ) ( uint8_t *pui8_src, int32_t i_stride, uint8_t *pui8_dst, int32_t i_dst_stride ); +typedef void ( y262_thread_f ) ( void *p_arg ); + + +typedef struct { + y262_costfunction_f rgf_sad[ 2 ]; + y262_costfunction_f rgf_satd[ 2 ]; + y262_costfunction_f f_ssd_16x16; + y262_costfunction_f f_ssd_8x8; + + int32_t ( *f_variance_16x16 ) ( uint8_t *pui8_blk, int32_t i_blk_stride ); + int32_t ( *f_variance_8x8 ) ( uint8_t *pui8_blk, int32_t i_blk_stride ); + + void ( *f_sub_8x8 ) ( int16_t *pi16_diff, uint8_t *pui8_src1, int32_t i_stride_src1, uint8_t *pui8_src2, int32_t i_stride_src2 ); + void ( *f_add_8x8 ) ( uint8_t *pui8_destination, int32_t i_destination_stride, uint8_t *pui8_base, int32_t i_base_stride, int16_t *pi_difference ); + + int32_t ( *f_quant8x8_inter_fw ) ( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat ); + int32_t ( *f_quant8x8_intra_fw ) ( int16_t *pi_coeffs, int32_t i_stride, uint16_t *pui16_qmat, uint16_t *pui16_bias ); + + void ( *f_fdct_8x8 ) ( int16_t *pi16_source, int16_t *pi16_destination ); + void ( *f_idct_8x8 ) ( int16_t *pi16_source, int16_t *pi16_destination ); + + + pf_error_callback_t pf_error_callback; + pf_result_callback_t pf_result_callback; + pf_rcsample_callback_t pf_rcsample_callback; + + y262_motcomp_f rgf_motcomp_copy[ 5 ][ 4 ]; + y262_motcomp_f rgf_motcomp_avg[ 5 ][ 4 ]; + +} y262_function_toolbox_t; + +typedef struct { + int32_t i_bits; + double d_quantizer; + double d_cplx; + int32_t i_estimated_bits; + uint8_t ui8_frame_type; + int32_t i_satd_cost; +} y262_ratectrl_isample_t; + +typedef struct { + int32_t i_quantizer_f8; + int32_t i_bits; + int32_t i_cplx_f8; + int32_t i_estimated_bits; + uint8_t ui8_frame_type; + int32_t i_satd_cost; +} y262_ratectrl_sample_t; + +typedef struct +{ + int32_t i_satd; + int32_t i_scaled_satd; + int32_t i_quantizer; + int32_t i_coded_bits; + int32_t i_predicted_bits; +} y262_ratectrl_mb_sample_t; + +typedef struct { + int32_t i_mode; + int32_t i_bitrate; + int32_t i_vbvrate; + int32_t i_vbv_size; + int32_t i_vbv_occupancy; + int64_t i64_vbv_occupancy_fractional; + int32_t i_vbv_incoming; + int32_t i_vbv_outgoing; + int32_t i_vbv_occupancy_overflow; + int64_t i64_vbv_occupancy_overflow_fractional; + int32_t i_timescale; + int32_t i_picture_duration; + int32_t i_pulldown_timescale; + int32_t i_pulldown_picture_duration; + int32_t i_quantizer; + +#define MAX_LOOKAHEAD_SAMPLES 50 + int32_t i_num_lookahead_samples; + struct { + int32_t i_frame_type; + int32_t i_frame_cost; + } rgs_lookahead_samples[ MAX_LOOKAHEAD_SAMPLES ]; + + int64_t i64_output_ticks; + int64_t i64_output_frames; + int64_t i64_output_seconds; + + double d_target_bits; + double d_output_bits; + double d_qb_qplx; + + double d_target_bits_2p; + double d_estimated_bits; + double d_qb_qplx_2p; + + double rgd_satd_predictors[ 4 ]; + double rgd_satd_predictors_weight[ 4 ]; + + double d_confidence_predict_behind; + double d_confidence_predict_ahead; + + int32_t i_i_picture_baseline_bits; + int32_t i_min_satd_for_satd_prediction; + int32_t i_min_bits_for_satd_prediction; + + /* picture coding state */ + int32_t i_picture_bit_budget; + int32_t i_predicted_frame_size; + int32_t i_picture_adjusted_bit_budget; + int32_t i_picture_coded_scaled_satd; + int32_t i_picture_coded_size; + int32_t i_picture_scaled_satd; + int32_t i_picture_accumulated_quantizer; + int32_t i_picture_num_accumulated_quantizer; + double d_picture_accumulated_quantizer_bits; + double d_picture_accumulated_bits_quantizer_over_satd; + int32_t i_num_picture_accumulated_bits_quantizer_over_satd; + bool_t b_picture_bad_encountered; + int32_t i_picture_uncoded_size; + int32_t i_predicted_frame_size_behind; + int32_t i_predicted_frame_size_ahead; + + + int32_t i_num_samples; + int32_t i_current_sample; + y262_ratectrl_isample_t *ps_samples; + + double rgd_last_ref_quantizers[ 2 ]; + int rgi_last_ref_quantizers_pons[ 2 ]; + + bool_t b_picture_reencode_pass; + y262_ratectrl_mb_sample_t *ps_mb_samples; +} y262_bitrate_control_t; + +typedef struct { + int32_t i_slice_bit_budget; + int32_t i_slice_bit_budget_extra; + int32_t i_slice_coded_scaled_satd; + int32_t i_slice_coded_size; + int32_t i_slice_scaled_satd; + int32_t i_slice_accumulated_quantizer; + int32_t i_slice_num_accumulated_quantizer; + double d_slice_accumulated_quantizer_bits; + int32_t i_mb_queued_quantizer_f8; + double d_slice_accumulated_bits_quantizer_over_satd; + int32_t i_num_slice_accumulated_bits_quantizer_over_satd; + bool_t b_slice_bad_encountered; + bool_t b_reencode_pass; + int32_t i_slice_accumulated_predicted_size; +} y262_slice_encoder_bitrate_control_t; + +typedef struct { + int32_t i_slice_encoder_idx; +#define Y262_SLICE_THREAD_CMD_LOOKAHEAD 0 +#define Y262_SLICE_THREAD_CMD_ENCODE 1 +#define Y262_SLICE_THREAD_CMD_EXIT 2 + int32_t i_command; + void *p_thread; + void *p_start_event; + void *p_finished_event; + struct y262_s *ps_y262; + int32_t i_picture_type; + int32_t i_first_slice_row; + int32_t i_last_slice_row; + /* lookahead */ + y262_picture_t *ps_pic; + y262_picture_t *ps_fw_ref; + y262_picture_t *ps_bw_ref; +} y262_slice_thread_t; + + +typedef struct +{ +#define Y262_LOOKAHEAD_THREAD_CMD_LOOKAHEAD 0 +#define Y262_LOOKAHEAD_THREAD_CMD_EXIT 1 + int32_t i_command; + void *p_thread; + void *p_start_event; + void *p_finished_event; +} y262_lookahead_thread_t; + + +typedef struct y262_s { + void *p_cb_handle; + y262_function_toolbox_t s_funcs; + y262_bitstream_t s_bitstream; + y262_bitrate_control_t s_ratectrl; + y262_slice_encoder_bitrate_control_t s_slice_encoder_ratectrl; + y262_picture_t *ps_input_picture; + + bool_t b_multithreading; + void *p_resource_mutex; +#define MAX_NUM_SLICE_ENCODERS 8 + int32_t i_num_slice_encoders; + struct y262_s *rgps_slice_encoders[ MAX_NUM_SLICE_ENCODERS ]; + y262_slice_thread_t rgs_slice_threads[ MAX_NUM_SLICE_ENCODERS ]; + + bool_t b_lookahead_running; + int32_t i_num_lookahead_encoders; + y262_lookahead_thread_t s_lookahead_thread; + y262_slice_thread_t rgs_lookahead_threads[ MAX_NUM_SLICE_ENCODERS ]; + +#define MAX_BUFFERED_INPUT_PICTURES 128 + int32_t i_num_lookahead_pictures; + int32_t i_max_buffered_input_pictures; + int32_t i_current_input_pon; + int32_t i_current_input_field; + int32_t i_lookahead_next_ref; + int32_t i_lookahead_next_pon; + int32_t i_leading_lookahead_don; + int32_t i_current_lookahead_don; + int32_t i_current_encoder_don; + int32_t i_keyframe_countdown; + int32_t i_last_keyframe_temporal_reference; + int32_t i_current_eof_pon; + int32_t i_current_eof_don; + y262_picture_t rgs_buffered_input_pictures[ MAX_BUFFERED_INPUT_PICTURES ]; + bool_t b_flushing; + + bool_t b_next_reference_picture_toggle; + y262_reference_picture_t rgs_frame_buffer[ 3 ]; + + int32_t *rgpi_mbtree_references[ 4 ]; + + y262_reference_picture_t *ps_refpic_forward; + y262_reference_picture_t *ps_refpic_backward; + y262_reference_picture_t *ps_refpic_dst; + + bool_t b_sequence_mpeg1; + int32_t i_sequence_display_width; + int32_t i_sequence_display_height; + int32_t i_sequence_width; + int32_t i_sequence_height; + int32_t i_sequence_chroma_width; + int32_t i_sequence_chroma_height; + int32_t i_sequence_chroma_format; + int32_t i_sequence_video_format; + int32_t i_sequence_frame_rate_code; + int32_t i_sequence_pulldown_frame_rate_code; + int32_t i_sequence_frame_rate_extension_n; + int32_t i_sequence_frame_rate_extension_d; + int32_t i_sequence_aspect_ratio_information; + int32_t i_sequence_derived_picture_duration; + int32_t i_sequence_derived_timescale; + int32_t i_sequence_derived_pulldown_picture_duration; + int32_t i_sequence_derived_pulldown_timescale; + int32_t i_sequence_num_bframes; + int32_t i_sequence_keyframe_distance; + + int32_t i_derived_profile; + int32_t i_derived_level; + + int32_t rgi_fcode[ 2 ][ 2 ]; + int32_t i_intra_dc_precision; + bool_t b_progressive_sequence; + bool_t b_frame_pred_frame_dct; + bool_t b_qscale_type; + bool_t b_intra_vlc_format; + bool_t b_closed_gop; + bool_t b_sequence_cbr; + int32_t rgi_y262_motion_bits_x[ 2048 + 1 + 2048 ]; + int32_t rgi_y262_motion_bits_y[ 2048 + 1 + 2048 ]; + + uint8_t rgui8_intra_quantiser_matrix[64]; + uint8_t rgui8_non_intra_quantiser_matrix[64]; + ALIGNED( 16 ) uint16_t rgui16_intra_quantizer_matrices[ 122 ][ 64 ]; + ALIGNED( 16 ) uint16_t rgui16_intra_quantizer_matrices_bias[ 122 ][ 64 ]; + ALIGNED( 16 ) uint16_t rgui16_intra_quantizer_matrices_trellis_bias[ 122 ][ 64 ]; + ALIGNED( 16 ) uint16_t rgui16_non_intra_quantizer_matrices[ 122 ][ 64 ]; + + int32_t i_quantizer; + + bool_t b_variance_aq; + int32_t i_psyrd_strength; + int32_t i_quality_for_speed; + + struct { + int8_t rgi8_path_active[ 2 ][ 65 ]; + int8_t rgi8_path_idx[ 65 ][ 65 ]; + int16_t rgi16_path_level[ 65 ][ 65 ]; + int32_t rgi_path_cost[ 65 ]; + } trellis; + +} y262_t; diff --git a/src/y262/x86inc.asm b/src/y262/x86inc.asm new file mode 100644 index 0000000..ee3eca9 --- /dev/null +++ b/src/y262/x86inc.asm @@ -0,0 +1,622 @@ +;***************************************************************************** +;* x86inc.asm +;***************************************************************************** +;* Copyright (C) 2005-2008 x264 project +;* +;* Authors: Loren Merritt +;* Anton Mitrofanov +;* +;* Permission to use, copy, modify, and/or distribute this software for any +;* purpose with or without fee is hereby granted, provided that the above +;* copyright notice and this permission notice appear in all copies. +;* +;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +;***************************************************************************** + +; This is a header file for the x264ASM assembly language, which uses +; NASM/YASM syntax combined with a large number of macros to provide easy +; abstraction between different calling conventions (x86_32, win64, linux64). +; It also has various other useful features to simplify writing the kind of +; DSP functions that are most often used in x264. + +; Unlike the rest of x264, this file is available under an ISC license, as it +; has significant usefulness outside of x264 and we want it to be available +; to the largest audience possible. Of course, if you modify it for your own +; purposes to add a new feature, we strongly encourage contributing a patch +; as this feature might be useful for others as well. Send patches or ideas +; to x264-devel@videolan.org . + +%ifdef ARCH_X86_64 + %ifidn __OUTPUT_FORMAT__,win32 + %define WIN64 + %else + %define UNIX64 + %endif +%endif + +%ifdef PREFIX + %define mangle(x) _ %+ x +%else + %define mangle(x) x +%endif + +; FIXME: All of the 64bit asm functions that take a stride as an argument +; via register, assume that the high dword of that register is filled with 0. +; This is true in practice (since we never do any 64bit arithmetic on strides, +; and x264's strides are all positive), but is not guaranteed by the ABI. + +; Name of the .rodata section. +; Kludge: Something on OS X fails to align .rodata even given an align attribute, +; so use a different read-only section. +%macro SECTION_RODATA 0-1 16 + %ifidn __OUTPUT_FORMAT__,macho64 + SECTION .text align=%1 + %elifidn __OUTPUT_FORMAT__,macho + SECTION .text align=%1 + fakegot: + %else + SECTION .rodata align=%1 + %endif +%endmacro + +%ifdef WIN64 + %define PIC +%elifndef ARCH_X86_64 +; x86_32 doesn't require PIC. +; Some distros prefer shared objects to be PIC, but nothing breaks if +; the code contains a few textrels, so we'll skip that complexity. + %undef PIC +%endif +%ifdef PIC + default rel +%endif + +; Macros to eliminate most code duplication between x86_32 and x86_64: +; Currently this works only for leaf functions which load all their arguments +; into registers at the start, and make no other use of the stack. Luckily that +; covers most of x264's asm. + +; PROLOGUE: +; %1 = number of arguments. loads them from stack if needed. +; %2 = number of registers used. pushes callee-saved regs if needed. +; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed. +; %4 = list of names to define to registers +; PROLOGUE can also be invoked by adding the same options to cglobal + +; e.g. +; cglobal foo, 2,3,0, dst, src, tmp +; declares a function (foo), taking two args (dst and src) and one local variable (tmp) + +; TODO Some functions can use some args directly from the stack. If they're the +; last args then you can just not declare them, but if they're in the middle +; we need more flexible macro. + +; RET: +; Pops anything that was pushed by PROLOGUE + +; REP_RET: +; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons +; which are slow when a normal ret follows a branch. + +; registers: +; rN and rNq are the native-size register holding function argument N +; rNd, rNw, rNb are dword, word, and byte size +; rNm is the original location of arg N (a register or on the stack), dword +; rNmp is native size + +%macro DECLARE_REG 6 + %define r%1q %2 + %define r%1d %3 + %define r%1w %4 + %define r%1b %5 + %define r%1m %6 + %ifid %6 ; i.e. it's a register + %define r%1mp %2 + %elifdef ARCH_X86_64 ; memory + %define r%1mp qword %6 + %else + %define r%1mp dword %6 + %endif + %define r%1 %2 +%endmacro + +%macro DECLARE_REG_SIZE 2 + %define r%1q r%1 + %define e%1q r%1 + %define r%1d e%1 + %define e%1d e%1 + %define r%1w %1 + %define e%1w %1 + %define r%1b %2 + %define e%1b %2 +%ifndef ARCH_X86_64 + %define r%1 e%1 +%endif +%endmacro + +DECLARE_REG_SIZE ax, al +DECLARE_REG_SIZE bx, bl +DECLARE_REG_SIZE cx, cl +DECLARE_REG_SIZE dx, dl +DECLARE_REG_SIZE si, sil +DECLARE_REG_SIZE di, dil +DECLARE_REG_SIZE bp, bpl + +; t# defines for when per-arch register allocation is more complex than just function arguments + +%macro DECLARE_REG_TMP 1-* + %assign %%i 0 + %rep %0 + CAT_XDEFINE t, %%i, r%1 + %assign %%i %%i+1 + %rotate 1 + %endrep +%endmacro + +%macro DECLARE_REG_TMP_SIZE 0-* + %rep %0 + %define t%1q t%1 %+ q + %define t%1d t%1 %+ d + %define t%1w t%1 %+ w + %define t%1b t%1 %+ b + %rotate 1 + %endrep +%endmacro + +DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7 + +%ifdef ARCH_X86_64 + %define gprsize 8 +%else + %define gprsize 4 +%endif + +%macro PUSH 1 + push %1 + %assign stack_offset stack_offset+gprsize +%endmacro + +%macro POP 1 + pop %1 + %assign stack_offset stack_offset-gprsize +%endmacro + +%macro SUB 2 + sub %1, %2 + %ifidn %1, rsp + %assign stack_offset stack_offset+(%2) + %endif +%endmacro + +%macro ADD 2 + add %1, %2 + %ifidn %1, rsp + %assign stack_offset stack_offset-(%2) + %endif +%endmacro + +%macro movifnidn 2 + %ifnidn %1, %2 + mov %1, %2 + %endif +%endmacro + +%macro movsxdifnidn 2 + %ifnidn %1, %2 + movsxd %1, %2 + %endif +%endmacro + +%macro ASSERT 1 + %if (%1) == 0 + %error assert failed + %endif +%endmacro + +%macro DEFINE_ARGS 0-* + %ifdef n_arg_names + %assign %%i 0 + %rep n_arg_names + CAT_UNDEF arg_name %+ %%i, q + CAT_UNDEF arg_name %+ %%i, d + CAT_UNDEF arg_name %+ %%i, w + CAT_UNDEF arg_name %+ %%i, b + CAT_UNDEF arg_name %+ %%i, m + CAT_UNDEF arg_name, %%i + %assign %%i %%i+1 + %endrep + %endif + + %assign %%i 0 + %rep %0 + %xdefine %1q r %+ %%i %+ q + %xdefine %1d r %+ %%i %+ d + %xdefine %1w r %+ %%i %+ w + %xdefine %1b r %+ %%i %+ b + %xdefine %1m r %+ %%i %+ m + CAT_XDEFINE arg_name, %%i, %1 + %assign %%i %%i+1 + %rotate 1 + %endrep + %assign n_arg_names %%i +%endmacro + +%ifdef WIN64 ; Windows x64 ;================================================= + +DECLARE_REG 0, rcx, ecx, cx, cl, ecx +DECLARE_REG 1, rdx, edx, dx, dl, edx +DECLARE_REG 2, r8, r8d, r8w, r8b, r8d +DECLARE_REG 3, r9, r9d, r9w, r9b, r9d +DECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40] +DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48] +DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56] +%define r7m [rsp + stack_offset + 64] +%define r8m [rsp + stack_offset + 72] + +%macro LOAD_IF_USED 2 ; reg_id, number_of_args + %if %1 < %2 + mov r%1, [rsp + stack_offset + 8 + %1*8] + %endif +%endmacro + +%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names... + ASSERT %2 >= %1 + %assign regs_used %2 + ASSERT regs_used <= 7 + %assign xmm_regs_used %3 + ASSERT xmm_regs_used <= 16 + %if regs_used > 4 + push r4 + push r5 + %assign stack_offset stack_offset+16 + %endif + %if xmm_regs_used > 6 + sub rsp, (xmm_regs_used-6)*16+16 + %assign stack_offset stack_offset+(xmm_regs_used-6)*16+16 + %assign %%i xmm_regs_used + %rep (xmm_regs_used-6) + %assign %%i %%i-1 + movdqa [rsp + (%%i-6)*16+8], xmm %+ %%i + %endrep + %endif + LOAD_IF_USED 4, %1 + LOAD_IF_USED 5, %1 + LOAD_IF_USED 6, %1 + DEFINE_ARGS %4 +%endmacro + +%macro RESTORE_XMM_INTERNAL 1 + %if xmm_regs_used > 6 + %assign %%i xmm_regs_used + %rep (xmm_regs_used-6) + %assign %%i %%i-1 + movdqa xmm %+ %%i, [%1 + (%%i-6)*16+8] + %endrep + add %1, (xmm_regs_used-6)*16+16 + %endif +%endmacro + +%macro RESTORE_XMM 1 + RESTORE_XMM_INTERNAL %1 + %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16 + %assign xmm_regs_used 0 +%endmacro + +%macro RET 0 + RESTORE_XMM_INTERNAL rsp + %if regs_used > 4 + pop r5 + pop r4 + %endif + ret +%endmacro + +%macro REP_RET 0 + %if regs_used > 4 || xmm_regs_used > 6 + RET + %else + rep ret + %endif +%endmacro + +%elifdef ARCH_X86_64 ; *nix x64 ;============================================= + +DECLARE_REG 0, rdi, edi, di, dil, edi +DECLARE_REG 1, rsi, esi, si, sil, esi +DECLARE_REG 2, rdx, edx, dx, dl, edx +DECLARE_REG 3, rcx, ecx, cx, cl, ecx +DECLARE_REG 4, r8, r8d, r8w, r8b, r8d +DECLARE_REG 5, r9, r9d, r9w, r9b, r9d +DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8] +%define r7m [rsp + stack_offset + 16] +%define r8m [rsp + stack_offset + 24] + +%macro LOAD_IF_USED 2 ; reg_id, number_of_args + %if %1 < %2 + mov r%1, [rsp - 40 + %1*8] + %endif +%endmacro + +%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... + ASSERT %2 >= %1 + ASSERT %2 <= 7 + LOAD_IF_USED 6, %1 + DEFINE_ARGS %4 +%endmacro + +%macro RET 0 + ret +%endmacro + +%macro REP_RET 0 + rep ret +%endmacro + +%else ; X86_32 ;============================================================== + +DECLARE_REG 0, eax, eax, ax, al, [esp + stack_offset + 4] +DECLARE_REG 1, ecx, ecx, cx, cl, [esp + stack_offset + 8] +DECLARE_REG 2, edx, edx, dx, dl, [esp + stack_offset + 12] +DECLARE_REG 3, ebx, ebx, bx, bl, [esp + stack_offset + 16] +DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20] +DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24] +DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28] +%define r7m [esp + stack_offset + 32] +%define r8m [esp + stack_offset + 36] +%define rsp esp + +%macro PUSH_IF_USED 1 ; reg_id + %if %1 < regs_used + push r%1 + %assign stack_offset stack_offset+4 + %endif +%endmacro + +%macro POP_IF_USED 1 ; reg_id + %if %1 < regs_used + pop r%1 + %endif +%endmacro + +%macro LOAD_IF_USED 2 ; reg_id, number_of_args + %if %1 < %2 + mov r%1, [esp + stack_offset + 4 + %1*4] + %endif +%endmacro + +%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... + ASSERT %2 >= %1 + %assign regs_used %2 + ASSERT regs_used <= 7 + PUSH_IF_USED 3 + PUSH_IF_USED 4 + PUSH_IF_USED 5 + PUSH_IF_USED 6 + LOAD_IF_USED 0, %1 + LOAD_IF_USED 1, %1 + LOAD_IF_USED 2, %1 + LOAD_IF_USED 3, %1 + LOAD_IF_USED 4, %1 + LOAD_IF_USED 5, %1 + LOAD_IF_USED 6, %1 + DEFINE_ARGS %4 +%endmacro + +%macro RET 0 + POP_IF_USED 6 + POP_IF_USED 5 + POP_IF_USED 4 + POP_IF_USED 3 + ret +%endmacro + +%macro REP_RET 0 + %if regs_used > 3 + RET + %else + rep ret + %endif +%endmacro + +%endif ;====================================================================== + + + +;============================================================================= +; arch-independent part +;============================================================================= + +%assign function_align 16 + +; Symbol prefix for C linkage +%macro cglobal 1-2+ + %xdefine %1 mangle(%1) + %xdefine %1.skip_prologue %1 %+ .skip_prologue + %ifidn __OUTPUT_FORMAT__,elf + global %1:function hidden + %else + global %1 + %endif + align function_align + %1: + RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer + %assign stack_offset 0 + %if %0 > 1 + PROLOGUE %2 + %endif +%endmacro + +%macro cextern 1 + %xdefine %1 mangle(%1) + extern %1 +%endmacro + +; This is needed for ELF, otherwise the GNU linker assumes the stack is +; executable by default. +%ifidn __OUTPUT_FORMAT__,elf +SECTION .note.GNU-stack noalloc noexec nowrite progbits +%endif + +; merge mmx and sse* + +%macro CAT_XDEFINE 3 + %xdefine %1%2 %3 +%endmacro + +%macro CAT_UNDEF 2 + %undef %1%2 +%endmacro + +%macro INIT_MMX 0 + %define RESET_MM_PERMUTATION INIT_MMX + %define mmsize 8 + %define num_mmregs 8 + %define mova movq + %define movu movq + %define movh movd + %define movnt movntq + %assign %%i 0 + %rep 8 + CAT_XDEFINE m, %%i, mm %+ %%i + CAT_XDEFINE nmm, %%i, %%i + %assign %%i %%i+1 + %endrep + %rep 8 + CAT_UNDEF m, %%i + CAT_UNDEF nmm, %%i + %assign %%i %%i+1 + %endrep +%endmacro + +%macro INIT_XMM 0 + %define RESET_MM_PERMUTATION INIT_XMM + %define mmsize 16 + %define num_mmregs 8 + %ifdef ARCH_X86_64 + %define num_mmregs 16 + %endif + %define mova movdqa + %define movu movdqu + %define movh movq + %define movnt movntdq + %assign %%i 0 + %rep num_mmregs + CAT_XDEFINE m, %%i, xmm %+ %%i + CAT_XDEFINE nxmm, %%i, %%i + %assign %%i %%i+1 + %endrep +%endmacro + +INIT_MMX + +; I often want to use macros that permute their arguments. e.g. there's no +; efficient way to implement butterfly or transpose or dct without swapping some +; arguments. +; +; I would like to not have to manually keep track of the permutations: +; If I insert a permutation in the middle of a function, it should automatically +; change everything that follows. For more complex macros I may also have multiple +; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations. +; +; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that +; permutes its arguments. It's equivalent to exchanging the contents of the +; registers, except that this way you exchange the register names instead, so it +; doesn't cost any cycles. + +%macro PERMUTE 2-* ; takes a list of pairs to swap +%rep %0/2 + %xdefine tmp%2 m%2 + %xdefine ntmp%2 nm%2 + %rotate 2 +%endrep +%rep %0/2 + %xdefine m%1 tmp%2 + %xdefine nm%1 ntmp%2 + %undef tmp%2 + %undef ntmp%2 + %rotate 2 +%endrep +%endmacro + +%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs) +%rep %0-1 +%ifdef m%1 + %xdefine tmp m%1 + %xdefine m%1 m%2 + %xdefine m%2 tmp + CAT_XDEFINE n, m%1, %1 + CAT_XDEFINE n, m%2, %2 +%else + ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here. + ; Be careful using this mode in nested macros though, as in some cases there may be + ; other copies of m# that have already been dereferenced and don't get updated correctly. + %xdefine %%n1 n %+ %1 + %xdefine %%n2 n %+ %2 + %xdefine tmp m %+ %%n1 + CAT_XDEFINE m, %%n1, m %+ %%n2 + CAT_XDEFINE m, %%n2, tmp + CAT_XDEFINE n, m %+ %%n1, %%n1 + CAT_XDEFINE n, m %+ %%n2, %%n2 +%endif + %undef tmp + %rotate 1 +%endrep +%endmacro + +; If SAVE_MM_PERMUTATION is placed at the end of a function and given the +; function name, then any later calls to that function will automatically +; load the permutation, so values can be returned in mmregs. +%macro SAVE_MM_PERMUTATION 1 ; name to save as + %assign %%i 0 + %rep num_mmregs + CAT_XDEFINE %1_m, %%i, m %+ %%i + %assign %%i %%i+1 + %endrep +%endmacro + +%macro LOAD_MM_PERMUTATION 1 ; name to load from + %assign %%i 0 + %rep num_mmregs + CAT_XDEFINE m, %%i, %1_m %+ %%i + CAT_XDEFINE n, m %+ %%i, %%i + %assign %%i %%i+1 + %endrep +%endmacro + +%macro call 1 + call %1 + %ifdef %1_m0 + LOAD_MM_PERMUTATION %1 + %endif +%endmacro + +; Substitutions that reduce instruction size but are functionally equivalent +%macro add 2 + %ifnum %2 + %if %2==128 + sub %1, -128 + %else + add %1, %2 + %endif + %else + add %1, %2 + %endif +%endmacro + +%macro sub 2 + %ifnum %2 + %if %2==128 + add %1, -128 + %else + sub %1, %2 + %endif + %else + sub %1, %2 + %endif +%endmacro diff --git a/src/y262/y262.c b/src/y262/y262.c new file mode 100644 index 0000000..764ed9d --- /dev/null +++ b/src/y262/y262.c @@ -0,0 +1,3430 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + + + +int32_t y262_get_mbmode_cost( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_mbmode ); + + +void y262_error( y262_t *ps_y262, int32_t i_error_code, int8_t* pi8_format, ... ) +{ + if( ps_y262->s_funcs.pf_error_callback ) + { + va_list p_va; + int8_t rgi8_tmpbuffer[ 0x2000 ]; + + va_start( p_va, pi8_format ); + + vsnprintf( rgi8_tmpbuffer, 0x2000, pi8_format, p_va ); + + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, i_error_code, rgi8_tmpbuffer ); + } +} + + + +void y262_slice_reset_predictors_intra( y262_t *ps_y262, y262_slice_t *ps_slice ) +{ + ps_slice->rgi_dc_dct_pred[ 0 ] = 1 << ( 7 + ps_y262->i_intra_dc_precision ); + ps_slice->rgi_dc_dct_pred[ 1 ] = 1 << ( 7 + ps_y262->i_intra_dc_precision ); + ps_slice->rgi_dc_dct_pred[ 2 ] = 1 << ( 7 + ps_y262->i_intra_dc_precision ); +} + +void y262_slice_reset_predictors_inter( y262_t *ps_y262, y262_slice_t *ps_slice ) +{ + ps_slice->rgi_pmv[ 0 ][ 0 ][ 0 ] = 0; + ps_slice->rgi_pmv[ 0 ][ 0 ][ 1 ] = 0; + ps_slice->rgi_pmv[ 0 ][ 1 ][ 0 ] = 0; + ps_slice->rgi_pmv[ 0 ][ 1 ][ 1 ] = 0; + ps_slice->rgi_pmv[ 1 ][ 0 ][ 0 ] = 0; + ps_slice->rgi_pmv[ 1 ][ 0 ][ 1 ] = 0; + ps_slice->rgi_pmv[ 1 ][ 1 ][ 0 ] = 0; + ps_slice->rgi_pmv[ 1 ][ 1 ][ 1 ] = 0; +} + + +void y262_init_macroblock( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_mb_idx ) +{ + int32_t i_pel_x, i_pel_y, i_chroma_pel_x, i_chroma_pel_y, i_stride_chroma; + y262_macroblock_t *ps_mb; + + ps_mb = &ps_slice->s_macroblock; + ps_mb->i_mb_addr = i_mb_idx; + + i_pel_x = ( i_mb_idx % ( ps_y262->i_sequence_width >> 4 ) ) << 4; + i_pel_y = ( i_mb_idx / ( ps_y262->i_sequence_width >> 4 ) ) << 4; + + ps_mb->pui8_src_luma = ps_y262->ps_input_picture->pui8_luma + i_pel_x + ( i_pel_y * ps_y262->i_sequence_width ); + ps_mb->i_src_luma_stride = ps_y262->i_sequence_width; + + ps_mb->pui8_dst_luma = ps_y262->ps_refpic_dst->pui8_luma + i_pel_x + ( i_pel_y * ps_y262->ps_refpic_dst->i_stride_luma ); + ps_mb->i_dst_luma_stride = ps_y262->ps_refpic_dst->i_stride_luma; + + ps_mb->i_mb_x = i_pel_x; + ps_mb->i_mb_y = i_pel_y; + + switch( ps_y262->i_sequence_chroma_format ) + { + case Y262_CHROMA_FORMAT_420: + i_chroma_pel_x = i_pel_x >> 1; + i_chroma_pel_y = i_pel_y >> 1; + break; + case Y262_CHROMA_FORMAT_422: + i_chroma_pel_x = i_pel_x >> 1; + i_chroma_pel_y = i_pel_y; + break; + case Y262_CHROMA_FORMAT_444: + i_chroma_pel_x = i_pel_x; + i_chroma_pel_y = i_pel_y; + break; + } + i_stride_chroma = ps_y262->i_sequence_chroma_width; + + ps_mb->pui8_src_cb = ps_y262->ps_input_picture->pui8_cb + i_chroma_pel_x + ( i_chroma_pel_y * i_stride_chroma ); + ps_mb->pui8_src_cr = ps_y262->ps_input_picture->pui8_cr + i_chroma_pel_x + ( i_chroma_pel_y * i_stride_chroma ); + ps_mb->i_src_chroma_stride = i_stride_chroma; + + ps_mb->pui8_dst_cb = ps_y262->ps_refpic_dst->pui8_cb + i_chroma_pel_x + ( i_chroma_pel_y * ps_y262->ps_refpic_dst->i_stride_chroma ); + ps_mb->pui8_dst_cr = ps_y262->ps_refpic_dst->pui8_cr + i_chroma_pel_x + ( i_chroma_pel_y * ps_y262->ps_refpic_dst->i_stride_chroma ); + ps_mb->i_dst_chroma_stride = ps_y262->ps_refpic_dst->i_stride_chroma; + + +} + + +uint8_t *y262_blk_pointer_adjust( y262_t *ps_y262, uint8_t *pui8_ptr, int32_t i_stride, int32_t i_plane_idx, int32_t i_blk_idx, bool_t b_interlaced, int32_t *pi_adjusted_stride ) +{ + int32_t i_blk_stride; + uint8_t *pui8_blk; + + if( !b_interlaced ) + { + if( i_plane_idx == 0 ) + { + i_blk_stride = i_stride; + pui8_blk = pui8_ptr + rgui_y262_luma_blk_offsets[ i_blk_idx ][ 0 ] + ( rgui_y262_luma_blk_offsets[ i_blk_idx ][ 1 ] * i_blk_stride ); + } + else + { + i_blk_stride = i_stride; + pui8_blk = pui8_ptr + rgui_y262_chroma_blk_offsets[ ps_y262->i_sequence_chroma_format ][ i_blk_idx ][ 0 ] + ( rgui_y262_chroma_blk_offsets[ ps_y262->i_sequence_chroma_format ][ i_blk_idx ][ 1 ] * i_blk_stride ); + } + } + else + { + if( i_plane_idx == 0 ) + { + pui8_blk = pui8_ptr + rgui_y262_luma_il_blk_offsets[ i_blk_idx ][ 0 ] + ( rgui_y262_luma_il_blk_offsets[ i_blk_idx ][ 1 ] * i_stride ); + i_blk_stride = i_stride * 2; + } + else + { + i_blk_stride = i_stride; + pui8_blk = pui8_ptr + rgui_y262_chroma_il_blk_offsets[ ps_y262->i_sequence_chroma_format ][ i_blk_idx ][ 0 ] + ( rgui_y262_chroma_il_blk_offsets[ ps_y262->i_sequence_chroma_format ][ i_blk_idx ][ 1 ] * i_blk_stride ); + switch( ps_y262->i_sequence_chroma_format ) + { + case Y262_CHROMA_FORMAT_420: + i_blk_stride = i_stride; + break; + case Y262_CHROMA_FORMAT_422: + case Y262_CHROMA_FORMAT_444: + default: + i_blk_stride = i_stride * 2; + break; + } + } + } + *pi_adjusted_stride = i_blk_stride; + return pui8_blk; +} + +void y262_encode_macroblock_intra( y262_t *ps_y262, y262_slice_t *ps_slice, bool_t b_interlaced ) +{ + int32_t i_blk_idx, i_x, i_y, i_plane_idx, i_num_blocks, i_chroma_format; + y262_macroblock_t *ps_mb; + + ps_mb = &ps_slice->s_macroblock; + i_chroma_format = ps_y262->i_sequence_chroma_format; + + ps_mb->i_macroblock_type = MACROBLOCK_INTRA; + if( b_interlaced ) + { + ps_mb->i_macroblock_type |= MACROBLOCK_INTERLACED; + } + + ps_mb->i_cbp = 0; + for( i_plane_idx = 0; i_plane_idx < 3; i_plane_idx++ ) + { + i_num_blocks = i_plane_idx == 0 ? 4 : rgui_num_chroma_blk[ ps_y262->i_sequence_chroma_format ]; + for( i_blk_idx = 0; i_blk_idx < i_num_blocks; i_blk_idx++ ) + { + uint8_t *pui8_blk; + int32_t i_blk_stride; + + switch( i_plane_idx ) + { + case 0: + pui8_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, i_plane_idx, i_blk_idx, b_interlaced, &i_blk_stride ); + break; + case 1: + pui8_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_src_cb, ps_mb->i_src_chroma_stride, i_plane_idx, i_blk_idx, b_interlaced, &i_blk_stride ); + break; + case 2: + pui8_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_src_cr, ps_mb->i_src_chroma_stride, i_plane_idx, i_blk_idx, b_interlaced, &i_blk_stride ); + break; + } + + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ][ i_x + i_y * 8 ] = pui8_blk[ i_x + i_y * i_blk_stride ]; + } + } + ps_y262->s_funcs.f_fdct_8x8( ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ] ); + + ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ 0 ] = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ 0 ] >> ( 3 - ps_y262->i_intra_dc_precision ); + if( ps_y262->i_quality_for_speed < 0 ) + { + y262_quant8x8_intra_fw( ps_y262, ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ], 8, ps_y262->rgui16_intra_quantizer_matrices[ ps_mb->i_scaled_quantizer ], ps_y262->rgui16_intra_quantizer_matrices_bias[ ps_mb->i_scaled_quantizer ] ); + } + else + { + y262_quant8x8_trellis_fw( ps_y262, ps_slice, ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ], 8, ps_mb->i_scaled_quantizer, TRUE ); + } + + + ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] = TRUE; + ps_mb->i_cbp |= 1 << ( i_plane_idx * 4 + i_blk_idx ); + + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ][ i_x + i_y * 8 ] = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ i_x + i_y * 8 ]; + } + } + + ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ][ 0 ] = ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ][ 0 ] << ( 3 - ps_y262->i_intra_dc_precision ); + y262_quant8x8_intra_bw( ps_y262, ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], 8, ps_mb->i_scaled_quantizer, ps_y262->rgui8_intra_quantiser_matrix ); + + ps_y262->s_funcs.f_idct_8x8( ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ] ); + + switch( i_plane_idx ) + { + case 0: + pui8_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_dst_luma, ps_mb->i_dst_luma_stride, i_plane_idx, i_blk_idx, b_interlaced, &i_blk_stride ); + break; + case 1: + pui8_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_dst_cb, ps_mb->i_dst_chroma_stride, i_plane_idx, i_blk_idx, b_interlaced, &i_blk_stride ); + break; + case 2: + pui8_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_dst_cr, ps_mb->i_dst_chroma_stride, i_plane_idx, i_blk_idx, b_interlaced, &i_blk_stride ); + break; + } + + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + int32_t i_rec; + i_rec = ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ][ i_x + i_y * 8 ]; + pui8_blk[ i_x + i_y * i_blk_stride ] = i_rec < 0 ? 0 : ( i_rec > 255 ? 255 : i_rec ); + } + } + } + } +} + + +int32_t y262_get_inter_block_bits( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_plane_idx, int32_t i_blk_idx ); + +bool_t y262_encode_macroblock_inter( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_mbmode ) +{ + int32_t i_blk_idx, i_field_idx, i_idx, i_x, i_y, i_plane_idx, i_num_blocks, i_num_preds, i_blk_type, i_chroma_blk_type; + int32_t i_chroma_mv_x, i_chroma_mv_y; + int32_t rgi_strides[ 3 ][ 2 ], rgi_dst_strides[ 3 ], i_shift_chroma_x, i_shift_chroma_y; + uint8_t *rgpui8_src[ 3 ][ 2 ], *rgpui8_dst[ 3 ]; + y262_macroblock_t *ps_mb; + y262_reference_picture_t *rgps_refs[ 2 ]; + y262_mv_t *rgps_mvs[ 2 ], s_skip_mvs[ 2 ]; + bool_t b_skip_cbf = FALSE; + + ps_mb = &ps_slice->s_macroblock; + + if( i_mbmode == Y262_MBMODE_SKIP || i_mbmode == Y262_MBMODE_FW || i_mbmode == Y262_MBMODE_BW || i_mbmode == Y262_MBMODE_BI ) + { + i_blk_type = MC_BLOCK_16x16; + switch( ps_y262->i_sequence_chroma_format ) + { + case Y262_CHROMA_FORMAT_420: + i_chroma_blk_type = MC_BLOCK_8x8; + i_shift_chroma_x = 1; + i_shift_chroma_y = 1; + break; + case Y262_CHROMA_FORMAT_422: + i_chroma_blk_type = MC_BLOCK_8x16; + i_shift_chroma_x = 1; + i_shift_chroma_y = 0; + break; + case Y262_CHROMA_FORMAT_444: + i_chroma_blk_type = MC_BLOCK_16x16; + i_shift_chroma_x = 0; + i_shift_chroma_y = 0; + break; + } + + rgi_dst_strides[ 0 ] = ps_y262->ps_refpic_dst->i_stride_luma; + rgi_dst_strides[ 1 ] = ps_y262->ps_refpic_dst->i_stride_chroma; + rgi_dst_strides[ 2 ] = ps_y262->ps_refpic_dst->i_stride_chroma; + rgpui8_dst[ 0 ] = ps_y262->ps_refpic_dst->pui8_luma + ps_mb->i_mb_x + ( ps_mb->i_mb_y * rgi_dst_strides[ 0 ] ); + rgpui8_dst[ 1 ] = ps_y262->ps_refpic_dst->pui8_cb + ( ps_mb->i_mb_x >> i_shift_chroma_x ) + ( ( ps_mb->i_mb_y >> i_shift_chroma_y ) * rgi_dst_strides[ 1 ] ); + rgpui8_dst[ 2 ] = ps_y262->ps_refpic_dst->pui8_cr + ( ps_mb->i_mb_x >> i_shift_chroma_x ) + ( ( ps_mb->i_mb_y >> i_shift_chroma_y ) * rgi_dst_strides[ 2 ] ); + + if( i_mbmode == Y262_MBMODE_FW ) + { + ps_mb->i_macroblock_type = MACROBLOCK_MOTION_FORWARD; + ps_mb->i_macroblock_type |= FRAME_MOTION_TYPE_FRAME * MACROBLOCK_MOTION_TYPE; + i_num_preds = 1; + rgps_refs[ 0 ] = ps_y262->ps_refpic_forward; + rgps_mvs[ 0 ] = &ps_slice->s_mode_decision.s_fw_mv; + ps_mb->rgs_motion[ 0 ][ 0 ] = ps_slice->s_mode_decision.s_fw_mv; + } + else if( i_mbmode == Y262_MBMODE_BW ) + { + ps_mb->i_macroblock_type = MACROBLOCK_MOTION_BACKWARD; + ps_mb->i_macroblock_type |= FRAME_MOTION_TYPE_FRAME * MACROBLOCK_MOTION_TYPE; + i_num_preds = 1; + rgps_refs[ 0 ] = ps_y262->ps_refpic_backward; + rgps_mvs[ 0 ] = &ps_slice->s_mode_decision.s_bw_mv; + ps_mb->rgs_motion[ 0 ][ 1 ] = ps_slice->s_mode_decision.s_bw_mv; + } + else if( i_mbmode == Y262_MBMODE_BI ) + { + ps_mb->i_macroblock_type = MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD; + ps_mb->i_macroblock_type |= FRAME_MOTION_TYPE_FRAME * MACROBLOCK_MOTION_TYPE; + i_num_preds = 2; + rgps_refs[ 0 ] = ps_y262->ps_refpic_forward; + rgps_mvs[ 0 ] = &ps_slice->s_mode_decision.s_bi_mv[ 0 ]; + rgps_refs[ 1 ] = ps_y262->ps_refpic_backward; + rgps_mvs[ 1 ] = &ps_slice->s_mode_decision.s_bi_mv[ 1 ]; + ps_mb->rgs_motion[ 0 ][ 0 ] = ps_slice->s_mode_decision.s_bi_mv[ 0 ]; + ps_mb->rgs_motion[ 0 ][ 1 ] = ps_slice->s_mode_decision.s_bi_mv[ 1 ]; + } + else if( i_mbmode == Y262_MBMODE_SKIP ) + { + if( ps_slice->i_picture_type == PICTURE_CODING_TYPE_P ) + { + ps_mb->i_macroblock_type = MACROBLOCK_MOTION_FORWARD; + ps_mb->i_macroblock_type |= FRAME_MOTION_TYPE_FRAME * MACROBLOCK_MOTION_TYPE; + i_num_preds = 1; + rgps_refs[ 0 ] = ps_y262->ps_refpic_forward; + s_skip_mvs[ 0 ].i_x = 0; + s_skip_mvs[ 0 ].i_y = 0; + rgps_mvs[ 0 ] = &s_skip_mvs[ 0 ]; + ps_mb->rgs_motion[ 0 ][ 0 ] = s_skip_mvs[ 0 ]; + } + else + { + assert( ps_slice->i_picture_type == PICTURE_CODING_TYPE_B ); + i_num_preds = 0; + ps_mb->i_macroblock_type = 0; + if( ps_slice->i_last_mb_motion_flags & MACROBLOCK_MOTION_FORWARD ) + { + ps_mb->i_macroblock_type |= MACROBLOCK_MOTION_FORWARD; + ps_mb->i_macroblock_type |= FRAME_MOTION_TYPE_FRAME * MACROBLOCK_MOTION_TYPE; + rgps_refs[ i_num_preds ] = ps_y262->ps_refpic_forward; + s_skip_mvs[ i_num_preds ].i_x = ps_slice->rgi_pmv[ 0 ][ 0 ][ 0 ]; + s_skip_mvs[ i_num_preds ].i_y = ps_slice->rgi_pmv[ 0 ][ 0 ][ 1 ]; + rgps_mvs[ i_num_preds ] = &s_skip_mvs[ i_num_preds ]; + ps_mb->rgs_motion[ 0 ][ 0 ] = s_skip_mvs[ i_num_preds ]; + i_num_preds++; + } + if( ps_slice->i_last_mb_motion_flags & MACROBLOCK_MOTION_BACKWARD ) + { + ps_mb->i_macroblock_type |= MACROBLOCK_MOTION_BACKWARD; + ps_mb->i_macroblock_type |= FRAME_MOTION_TYPE_FRAME * MACROBLOCK_MOTION_TYPE; + rgps_refs[ i_num_preds ] = ps_y262->ps_refpic_backward; + s_skip_mvs[ i_num_preds ].i_x = ps_slice->rgi_pmv[ 0 ][ 1 ][ 0 ]; + s_skip_mvs[ i_num_preds ].i_y = ps_slice->rgi_pmv[ 0 ][ 1 ][ 1 ]; + rgps_mvs[ i_num_preds ] = &s_skip_mvs[ i_num_preds ]; + ps_mb->rgs_motion[ 0 ][ 1 ] = s_skip_mvs[ i_num_preds ]; + i_num_preds++; + } + } + } + else + { + assert( FALSE ); + } + + + + for( i_idx = 0; i_idx < i_num_preds; i_idx++ ) + { + int32_t i_hpel_idx, i_chroma_hpel_idx; + + rgi_strides[ 0 ][ 0 ] = rgps_refs[ i_idx ]->i_stride_luma; + rgi_strides[ 1 ][ 0 ] = rgps_refs[ i_idx ]->i_stride_chroma; + rgi_strides[ 2 ][ 0 ] = rgps_refs[ i_idx ]->i_stride_chroma; + rgpui8_src[ 0 ][ 0 ] = rgps_refs[ i_idx ]->pui8_luma + ps_mb->i_mb_x + ( ps_mb->i_mb_y * rgi_strides[ 0 ][ 0 ] ); + rgpui8_src[ 1 ][ 0 ] = rgps_refs[ i_idx ]->pui8_cb + ( ps_mb->i_mb_x >> i_shift_chroma_x ) + ( ( ps_mb->i_mb_y >> i_shift_chroma_y ) * rgi_strides[ 1 ][ 0 ] ); + rgpui8_src[ 2 ][ 0 ] = rgps_refs[ i_idx ]->pui8_cr + ( ps_mb->i_mb_x >> i_shift_chroma_x ) + ( ( ps_mb->i_mb_y >> i_shift_chroma_y ) * rgi_strides[ 2 ][ 0 ] ); + + i_hpel_idx = ( rgps_mvs[ i_idx ]->i_x & 1 ) | ( ( rgps_mvs[ i_idx ]->i_y & 1 ) << 1 ); + rgpui8_src[ 0 ][ 0 ] += ( rgps_mvs[ i_idx ]->i_x >> 1 ) + ( ( rgps_mvs[ i_idx ]->i_y >> 1 ) * rgi_strides[ 0 ][ 0 ] ); + + i_chroma_mv_x = ( rgps_mvs[ i_idx ]->i_x / ( 1 << i_shift_chroma_x ) ); + i_chroma_mv_y = ( rgps_mvs[ i_idx ]->i_y / ( 1 << i_shift_chroma_y ) ); + i_chroma_hpel_idx = ( i_chroma_mv_x & 1 ) | ( ( i_chroma_mv_y & 1 ) << 1 ); + rgpui8_src[ 1 ][ 0 ] += ( i_chroma_mv_x >> 1 ) + ( ( i_chroma_mv_y >> 1 ) * rgi_strides[ 1 ][ 0 ] ); + rgpui8_src[ 2 ][ 0 ] += ( i_chroma_mv_x >> 1 ) + ( ( i_chroma_mv_y >> 1 ) * rgi_strides[ 1 ][ 0 ] ); + + if( i_idx == 0 ) + { + ps_y262->s_funcs.rgf_motcomp_copy[ i_blk_type ][ i_hpel_idx ]( rgpui8_src[ 0 ][ 0 ], rgi_strides[ 0 ][ 0 ], rgpui8_dst[ 0 ], rgi_dst_strides[ 0 ] ); + ps_y262->s_funcs.rgf_motcomp_copy[ i_chroma_blk_type ][ i_chroma_hpel_idx ]( rgpui8_src[ 1 ][ 0 ], rgi_strides[ 1 ][ 0 ], rgpui8_dst[ 1 ], rgi_dst_strides[ 1 ] ); + ps_y262->s_funcs.rgf_motcomp_copy[ i_chroma_blk_type ][ i_chroma_hpel_idx ]( rgpui8_src[ 2 ][ 0 ], rgi_strides[ 2 ][ 0 ], rgpui8_dst[ 2 ], rgi_dst_strides[ 2 ] ); + } + else + { + ps_y262->s_funcs.rgf_motcomp_avg[ i_blk_type ][ i_hpel_idx ]( rgpui8_src[ 0 ][ 0 ], rgi_strides[ 0 ][ 0 ], rgpui8_dst[ 0 ], rgi_dst_strides[ 0 ] ); + ps_y262->s_funcs.rgf_motcomp_avg[ i_chroma_blk_type ][ i_chroma_hpel_idx ]( rgpui8_src[ 1 ][ 0 ], rgi_strides[ 1 ][ 0 ], rgpui8_dst[ 1 ], rgi_dst_strides[ 1 ] ); + ps_y262->s_funcs.rgf_motcomp_avg[ i_chroma_blk_type ][ i_chroma_hpel_idx ]( rgpui8_src[ 2 ][ 0 ], rgi_strides[ 2 ][ 0 ], rgpui8_dst[ 2 ], rgi_dst_strides[ 2 ] ); + } + } + ps_mb->i_cbp = 0; + for( i_plane_idx = 0; i_plane_idx < 3; i_plane_idx++ ) + { + i_num_blocks = i_plane_idx == 0 ? 4 : rgui_num_chroma_blk[ ps_y262->i_sequence_chroma_format ]; + for( i_blk_idx = 0; i_blk_idx < i_num_blocks; i_blk_idx++ ) + { + uint8_t *pui8_src_blk, *pui8_dst_blk, *pui8_pred; + int32_t i_src_blk_stride, i_dst_blk_stride, i_pred_stride, i_zdist, i_cdist; + uint8_t rgui8_recon[ 8 * 8 ]; + + if( i_mbmode == Y262_MBMODE_SKIP && b_skip_cbf ) + { + continue; + } + + pui8_pred = y262_blk_pointer_adjust( ps_y262, rgpui8_dst[ i_plane_idx ], rgi_dst_strides[ i_plane_idx ], i_plane_idx, i_blk_idx, FALSE, &i_pred_stride ); + + switch( i_plane_idx ) + { + case 0: + pui8_src_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, i_plane_idx, i_blk_idx, FALSE, &i_src_blk_stride ); + break; + case 1: + pui8_src_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_src_cb, ps_mb->i_src_chroma_stride, i_plane_idx, i_blk_idx, FALSE, &i_src_blk_stride ); + break; + case 2: + pui8_src_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_src_cr, ps_mb->i_src_chroma_stride, i_plane_idx, i_blk_idx, FALSE, &i_src_blk_stride ); + break; + } + + if( ps_y262->i_quality_for_speed >= 8 ) + { + i_zdist = ps_y262->s_funcs.f_ssd_8x8( pui8_src_blk, i_src_blk_stride, pui8_pred, i_pred_stride ); + } + + ps_y262->s_funcs.f_sub_8x8( ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], pui8_src_blk, i_src_blk_stride, pui8_pred, i_pred_stride ); + + ps_y262->s_funcs.f_fdct_8x8( ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ] ); + if( ps_y262->i_quality_for_speed < 0 ) + { + ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] = !!y262_quant8x8_inter_fw( ps_y262, ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ], 8, ps_y262->rgui16_non_intra_quantizer_matrices[ ps_mb->i_scaled_quantizer ] ); + } + else + { + ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] = !!y262_quant8x8_trellis_fw( ps_y262, ps_slice, ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ], 8, ps_mb->i_scaled_quantizer, FALSE ); + } + + if( i_mbmode == Y262_MBMODE_SKIP ) + { + if( ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] ) + { + ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] = FALSE; + b_skip_cbf = TRUE; + } + continue; + } + + if( ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] ) + { + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ][ i_x + i_y * 8 ] = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ i_x + i_y * 8 ]; + } + } + + y262_quant8x8_inter_bw( ps_y262, ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], 8, ps_mb->i_scaled_quantizer, ps_y262->rgui8_non_intra_quantiser_matrix ); + ps_y262->s_funcs.f_idct_8x8( ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ] ); + + pui8_dst_blk = y262_blk_pointer_adjust( ps_y262, rgpui8_dst[ i_plane_idx ], rgi_dst_strides[ i_plane_idx ], i_plane_idx, i_blk_idx, FALSE, &i_dst_blk_stride ); + + if( ps_y262->i_quality_for_speed >= 8 ) + { + ps_y262->s_funcs.f_add_8x8( rgui8_recon, 8, pui8_dst_blk, i_dst_blk_stride, ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ] ); + + i_cdist = ps_y262->s_funcs.f_ssd_8x8( pui8_src_blk, i_src_blk_stride, rgui8_recon, 8 ); + i_cdist += ( ps_mb->i_lambda * ( y262_get_inter_block_bits( ps_y262, ps_slice, i_plane_idx, i_blk_idx ) + 1 ) ) >> Y262_LAMBDA_BITS; + if( i_cdist <= i_zdist ) + { + for( i_y = 0; i_y < 8; i_y++ ) + { + memcpy( pui8_dst_blk + ( i_dst_blk_stride * i_y ), &rgui8_recon[ i_y * 8 ], sizeof( uint8_t ) * 8 ); + } + ps_mb->i_cbp |= ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] << ( i_plane_idx * 4 + i_blk_idx ); + } + else + { + ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] = 0; + } + } + else + { + ps_y262->s_funcs.f_add_8x8( pui8_dst_blk, i_dst_blk_stride, pui8_dst_blk, i_dst_blk_stride, ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ] ); + ps_mb->i_cbp |= ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] << ( i_plane_idx * 4 + i_blk_idx ); + } + } + } + } + } + else if( i_mbmode == Y262_MBMODE_FW_IL || i_mbmode == Y262_MBMODE_BW_IL || i_mbmode == Y262_MBMODE_BI_IL ) + { + i_blk_type = MC_BLOCK_16x8; + switch( ps_y262->i_sequence_chroma_format ) + { + case Y262_CHROMA_FORMAT_420: + i_chroma_blk_type = MC_BLOCK_8x4; + i_shift_chroma_x = 1; + i_shift_chroma_y = 1; + break; + case Y262_CHROMA_FORMAT_422: + i_chroma_blk_type = MC_BLOCK_8x8; + i_shift_chroma_x = 1; + i_shift_chroma_y = 0; + break; + case Y262_CHROMA_FORMAT_444: + i_chroma_blk_type = MC_BLOCK_16x8; + i_shift_chroma_x = 0; + i_shift_chroma_y = 0; + break; + } + + rgi_dst_strides[ 0 ] = ps_y262->ps_refpic_dst->i_stride_luma; + rgi_dst_strides[ 1 ] = ps_y262->ps_refpic_dst->i_stride_chroma; + rgi_dst_strides[ 2 ] = ps_y262->ps_refpic_dst->i_stride_chroma; + rgpui8_dst[ 0 ] = ps_y262->ps_refpic_dst->pui8_luma + ps_mb->i_mb_x + ( ps_mb->i_mb_y * rgi_dst_strides[ 0 ] ); + rgpui8_dst[ 1 ] = ps_y262->ps_refpic_dst->pui8_cb + ( ps_mb->i_mb_x >> i_shift_chroma_x ) + ( ( ps_mb->i_mb_y >> i_shift_chroma_y ) * rgi_dst_strides[ 1 ] ); + rgpui8_dst[ 2 ] = ps_y262->ps_refpic_dst->pui8_cr + ( ps_mb->i_mb_x >> i_shift_chroma_x ) + ( ( ps_mb->i_mb_y >> i_shift_chroma_y ) * rgi_dst_strides[ 2 ] ); + + for( i_field_idx = 0; i_field_idx < 2; i_field_idx++ ) + { + if( i_mbmode == Y262_MBMODE_FW_IL ) + { + ps_mb->i_macroblock_type = MACROBLOCK_MOTION_FORWARD | MACROBLOCK_INTERLACED; + ps_mb->i_macroblock_type |= FRAME_MOTION_TYPE_FIELD * MACROBLOCK_MOTION_TYPE; + i_num_preds = 1; + rgps_refs[ 0 ] = ps_y262->ps_refpic_forward; + rgps_mvs[ 0 ] = &ps_slice->s_mode_decision.s_fw_il_mv[ i_field_idx ]; + ps_mb->rgs_motion[ i_field_idx ][ 0 ] = ps_slice->s_mode_decision.s_fw_il_mv[ i_field_idx ]; + } + else if( i_mbmode == Y262_MBMODE_BW_IL ) + { + ps_mb->i_macroblock_type = MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_INTERLACED; + ps_mb->i_macroblock_type |= FRAME_MOTION_TYPE_FIELD * MACROBLOCK_MOTION_TYPE; + i_num_preds = 1; + rgps_refs[ 0 ] = ps_y262->ps_refpic_backward; + rgps_mvs[ 0 ] = &ps_slice->s_mode_decision.s_bw_il_mv[ i_field_idx ]; + ps_mb->rgs_motion[ i_field_idx ][ 1 ] = ps_slice->s_mode_decision.s_bw_il_mv[ i_field_idx ]; + } + else if( i_mbmode == Y262_MBMODE_BI_IL ) + { + ps_mb->i_macroblock_type = MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_INTERLACED; + ps_mb->i_macroblock_type |= FRAME_MOTION_TYPE_FIELD * MACROBLOCK_MOTION_TYPE; + i_num_preds = 2; + rgps_refs[ 0 ] = ps_y262->ps_refpic_forward; + rgps_mvs[ 0 ] = &ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 0 ]; + rgps_refs[ 1 ] = ps_y262->ps_refpic_backward; + rgps_mvs[ 1 ] = &ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 1 ]; + ps_mb->rgs_motion[ i_field_idx ][ 0 ] = ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 0 ]; + ps_mb->rgs_motion[ i_field_idx ][ 1 ] = ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 1 ]; + } + else + { + assert( FALSE ); + } + + + + for( i_idx = 0; i_idx < i_num_preds; i_idx++ ) + { + int32_t i_hpel_idx, i_chroma_hpel_idx; + + rgi_strides[ 0 ][ 0 ] = rgps_refs[ i_idx ]->i_stride_luma << 1; + rgi_strides[ 1 ][ 0 ] = rgps_refs[ i_idx ]->i_stride_chroma << 1; + rgi_strides[ 2 ][ 0 ] = rgps_refs[ i_idx ]->i_stride_chroma << 1; + rgpui8_src[ 0 ][ 0 ] = rgps_refs[ i_idx ]->pui8_luma + ps_mb->i_mb_x + ( ps_mb->i_mb_y * rgps_refs[ i_idx ]->i_stride_luma ); + rgpui8_src[ 1 ][ 0 ] = rgps_refs[ i_idx ]->pui8_cb + ( ps_mb->i_mb_x >> i_shift_chroma_x ) + ( ( ps_mb->i_mb_y >> i_shift_chroma_y ) * rgps_refs[ i_idx ]->i_stride_chroma ); + rgpui8_src[ 2 ][ 0 ] = rgps_refs[ i_idx ]->pui8_cr + ( ps_mb->i_mb_x >> i_shift_chroma_x ) + ( ( ps_mb->i_mb_y >> i_shift_chroma_y ) * rgps_refs[ i_idx ]->i_stride_chroma ); + + rgpui8_src[ 0 ][ 0 ] += rgps_mvs[ i_idx ]->i_field * rgps_refs[ i_idx ]->i_stride_luma; + rgpui8_src[ 1 ][ 0 ] += rgps_mvs[ i_idx ]->i_field * rgps_refs[ i_idx ]->i_stride_chroma; + rgpui8_src[ 2 ][ 0 ] += rgps_mvs[ i_idx ]->i_field * rgps_refs[ i_idx ]->i_stride_chroma; + + i_hpel_idx = ( rgps_mvs[ i_idx ]->i_x & 1 ) | ( ( rgps_mvs[ i_idx ]->i_y & 1 ) << 1 ); + rgpui8_src[ 0 ][ 0 ] += ( rgps_mvs[ i_idx ]->i_x >> 1 ) + ( ( rgps_mvs[ i_idx ]->i_y >> 1 ) * rgi_strides[ 0 ][ 0 ] ); + + i_chroma_mv_x = ( rgps_mvs[ i_idx ]->i_x / ( 1 << i_shift_chroma_x ) ); + i_chroma_mv_y = ( rgps_mvs[ i_idx ]->i_y / ( 1 << i_shift_chroma_y ) ); + i_chroma_hpel_idx = ( i_chroma_mv_x & 1 ) | ( ( i_chroma_mv_y & 1 ) << 1 ); + rgpui8_src[ 1 ][ 0 ] += ( i_chroma_mv_x >> 1 ) + ( ( i_chroma_mv_y >> 1 ) * rgi_strides[ 1 ][ 0 ] ); + rgpui8_src[ 2 ][ 0 ] += ( i_chroma_mv_x >> 1 ) + ( ( i_chroma_mv_y >> 1 ) * rgi_strides[ 2 ][ 0 ] ); + + if( i_idx == 0 ) + { + ps_y262->s_funcs.rgf_motcomp_copy[ i_blk_type ][ i_hpel_idx ]( rgpui8_src[ 0 ][ 0 ], rgi_strides[ 0 ][ 0 ], rgpui8_dst[ 0 ] + ( i_field_idx * rgi_dst_strides[ 0 ] ), rgi_dst_strides[ 0 ] << 1 ); + ps_y262->s_funcs.rgf_motcomp_copy[ i_chroma_blk_type ][ i_chroma_hpel_idx ]( rgpui8_src[ 1 ][ 0 ], rgi_strides[ 1 ][ 0 ], rgpui8_dst[ 1 ] + ( i_field_idx * rgi_dst_strides[ 1 ] ), rgi_dst_strides[ 1 ] << 1 ); + ps_y262->s_funcs.rgf_motcomp_copy[ i_chroma_blk_type ][ i_chroma_hpel_idx ]( rgpui8_src[ 2 ][ 0 ], rgi_strides[ 2 ][ 0 ], rgpui8_dst[ 2 ] + ( i_field_idx * rgi_dst_strides[ 2 ] ), rgi_dst_strides[ 2 ] << 1 ); + } + else + { + ps_y262->s_funcs.rgf_motcomp_avg[ i_blk_type ][ i_hpel_idx ]( rgpui8_src[ 0 ][ 0 ], rgi_strides[ 0 ][ 0 ], rgpui8_dst[ 0 ] + ( i_field_idx * rgi_dst_strides[ 0 ] ), rgi_dst_strides[ 0 ] << 1 ); + ps_y262->s_funcs.rgf_motcomp_avg[ i_chroma_blk_type ][ i_chroma_hpel_idx ]( rgpui8_src[ 1 ][ 0 ], rgi_strides[ 1 ][ 0 ], rgpui8_dst[ 1 ] + ( i_field_idx * rgi_dst_strides[ 1 ] ), rgi_dst_strides[ 1 ] << 1 ); + ps_y262->s_funcs.rgf_motcomp_avg[ i_chroma_blk_type ][ i_chroma_hpel_idx ]( rgpui8_src[ 2 ][ 0 ], rgi_strides[ 2 ][ 0 ], rgpui8_dst[ 2 ] + ( i_field_idx * rgi_dst_strides[ 2 ] ), rgi_dst_strides[ 2 ] << 1 ); + } + } + } + ps_mb->i_cbp = 0; + for( i_plane_idx = 0; i_plane_idx < 3; i_plane_idx++ ) + { + i_num_blocks = i_plane_idx == 0 ? 4 : rgui_num_chroma_blk[ ps_y262->i_sequence_chroma_format ]; + for( i_blk_idx = 0; i_blk_idx < i_num_blocks; i_blk_idx++ ) + { + uint8_t *pui8_blk, *pui8_pred; + int32_t i_blk_stride, i_pred_stride; + + pui8_pred = y262_blk_pointer_adjust( ps_y262, rgpui8_dst[ i_plane_idx ], rgi_dst_strides[ i_plane_idx ], i_plane_idx, i_blk_idx, TRUE, &i_pred_stride ); + + switch( i_plane_idx ) + { + case 0: + pui8_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, i_plane_idx, i_blk_idx, TRUE, &i_blk_stride ); + break; + case 1: + pui8_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_src_cb, ps_mb->i_src_chroma_stride, i_plane_idx, i_blk_idx, TRUE, &i_blk_stride ); + break; + case 2: + pui8_blk = y262_blk_pointer_adjust( ps_y262, ps_mb->pui8_src_cr, ps_mb->i_src_chroma_stride, i_plane_idx, i_blk_idx, TRUE, &i_blk_stride ); + break; + } + + ps_y262->s_funcs.f_sub_8x8( ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], pui8_blk, i_blk_stride, pui8_pred, i_pred_stride ); + + ps_y262->s_funcs.f_fdct_8x8( ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ] ); + if( ps_y262->i_quality_for_speed < 0 ) + { + ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] = !!y262_quant8x8_inter_fw( ps_y262, ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ], 8, ps_y262->rgui16_non_intra_quantizer_matrices[ ps_mb->i_scaled_quantizer ] ); + } + else + { + ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] = !!y262_quant8x8_trellis_fw( ps_y262, ps_slice, ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ], 8, ps_mb->i_scaled_quantizer, FALSE ); + } + ps_mb->i_cbp |= ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] << ( i_plane_idx * 4 + i_blk_idx ); + + if( ps_mb->rgb_cbp[ i_plane_idx ][ i_blk_idx ] ) + { + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ][ i_x + i_y * 8 ] = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ i_x + i_y * 8 ]; + } + } + + y262_quant8x8_inter_bw( ps_y262, ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], 8, ps_mb->i_scaled_quantizer, ps_y262->rgui8_non_intra_quantiser_matrix ); + ps_y262->s_funcs.f_idct_8x8( ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ], ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ] ); + + pui8_blk = y262_blk_pointer_adjust( ps_y262, rgpui8_dst[ i_plane_idx ], rgi_dst_strides[ i_plane_idx ], i_plane_idx, i_blk_idx, TRUE, &i_blk_stride ); + + /* + for( i_y = 0; i_y < 8; i_y++ ) + { + for( i_x = 0; i_x < 8; i_x++ ) + { + int32_t i_rec; + i_rec = pui8_blk[ i_x + i_y * i_blk_stride ] + ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ][ i_x + i_y * 8 ]; + pui8_blk[ i_x + i_y * i_blk_stride ] = i_rec < 0 ? 0 : ( i_rec > 255 ? 255 : i_rec ); + } + } + */ + ps_y262->s_funcs.f_add_8x8( pui8_blk, i_blk_stride, pui8_blk, i_blk_stride, ps_mb->rgi16_residual[ i_plane_idx ][ i_blk_idx ] ); + } + } + } + } + else + { + assert( FALSE ); + } + return b_skip_cbf; +} + + +void y262_get_mbmode_motion( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_mbmode ) +{ + int32_t i_blk_type, i_fcode_x, i_fcode_y; + y262_reference_picture_t *ps_ref; + y262_me_context_t s_me; + y262_macroblock_t *ps_mb; + + ps_mb = &ps_slice->s_macroblock; + + if( i_mbmode == Y262_MBMODE_FW || i_mbmode == Y262_MBMODE_BW ) + { + if( i_mbmode == Y262_MBMODE_FW ) + { + ps_ref = ps_y262->ps_refpic_forward; + i_blk_type = BLOCK_TYPE_16x16; + i_fcode_x = ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 0 ]; + i_fcode_y = ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 1 ]; + s_me.i_pred_mv_x = ps_slice->rgi_pmv[ 0 ][ 0 ][ 0 ]; + s_me.i_pred_mv_y = ps_slice->rgi_pmv[ 0 ][ 0 ][ 1 ]; + s_me.i_num_candidates_fp = 1; + s_me.rgi_candidates_fp[ 0 ][ 0 ] = ps_y262->ps_input_picture->ps_lookahead[ ps_mb->i_mb_addr ].rgi_mvs[ 0 ][ 0 ]; + s_me.rgi_candidates_fp[ 0 ][ 1 ] = ps_y262->ps_input_picture->ps_lookahead[ ps_mb->i_mb_addr ].rgi_mvs[ 0 ][ 1 ]; + } + else if( i_mbmode == Y262_MBMODE_BW ) + { + ps_ref = ps_y262->ps_refpic_backward; + i_blk_type = BLOCK_TYPE_16x16; + i_fcode_x = ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 0 ]; + i_fcode_y = ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 1 ]; + s_me.i_pred_mv_x = ps_slice->rgi_pmv[ 0 ][ 1 ][ 0 ]; + s_me.i_pred_mv_y = ps_slice->rgi_pmv[ 0 ][ 1 ][ 1 ]; + s_me.i_num_candidates_fp = 1; + s_me.rgi_candidates_fp[ 0 ][ 0 ] = ps_y262->ps_input_picture->ps_lookahead[ ps_mb->i_mb_addr ].rgi_mvs[ 1 ][ 0 ]; + s_me.rgi_candidates_fp[ 0 ][ 1 ] = ps_y262->ps_input_picture->ps_lookahead[ ps_mb->i_mb_addr ].rgi_mvs[ 1 ][ 1 ]; + } + else + { + assert( FALSE ); + } + + s_me.pui8_blk = ps_mb->pui8_src_luma; + s_me.i_blk_stride = ps_mb->i_src_luma_stride; + s_me.i_blk_type = i_blk_type; + s_me.i_min_mv_x = -( 1 << ( 3 + i_fcode_x - 1 ) ); + s_me.i_min_mv_y = -( 1 << ( 3 + i_fcode_y - 1 ) ); + s_me.i_max_mv_x = ( 1 << ( 3 + i_fcode_x - 1 ) ) - 1; + s_me.i_max_mv_y = ( 1 << ( 3 + i_fcode_y - 1 ) ) - 1; + s_me.i_x_offset = ps_mb->i_mb_x; + s_me.i_y_offset = ps_mb->i_mb_y; + s_me.i_lambda = ps_mb->i_lambda_sqr; + s_me.i_ref_width = ps_y262->i_sequence_width; + s_me.i_ref_height = ps_y262->i_sequence_height; + s_me.i_ref_stride = ps_ref->i_stride_luma; + s_me.pui8_ref = ps_ref->pui8_luma; + s_me.i_me_call = MECALL_MAIN; + + y262_motion_search( ps_y262, &s_me ); + + if( i_mbmode == Y262_MBMODE_FW ) + { + ps_slice->s_mode_decision.s_fw_mv.i_x = s_me.i_best_mv_x; + ps_slice->s_mode_decision.s_fw_mv.i_y = s_me.i_best_mv_y; + ps_slice->s_mode_decision.s_fw_mv.i_field = Y262_MV_FRAME_FIELD; + ps_slice->s_mode_decision.s_fw_mv.i_cost = s_me.i_best_mv_sad; + ps_slice->s_mode_decision.i_fw_cost = s_me.i_best_mv_sad; + } + else if( i_mbmode == Y262_MBMODE_BW ) + { + ps_slice->s_mode_decision.s_bw_mv.i_x = s_me.i_best_mv_x; + ps_slice->s_mode_decision.s_bw_mv.i_y = s_me.i_best_mv_y; + ps_slice->s_mode_decision.s_bw_mv.i_field = Y262_MV_FRAME_FIELD; + ps_slice->s_mode_decision.s_bw_mv.i_cost = s_me.i_best_mv_sad; + ps_slice->s_mode_decision.i_bw_cost = s_me.i_best_mv_sad; + } + else + { + assert( FALSE ); + } + } + else if( i_mbmode == Y262_MBMODE_FW_IL || i_mbmode == Y262_MBMODE_BW_IL ) + { + int32_t i_field_idx, i_search_field_idx, i_cost = 0; + int32_t i_top_field_sad; + + for( i_field_idx = 0; i_field_idx < 2; i_field_idx++ ) + { + if( i_mbmode == Y262_MBMODE_FW_IL ) + { + ps_ref = ps_y262->ps_refpic_forward; + i_blk_type = BLOCK_TYPE_16x8; + i_fcode_x = ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 0 ]; + i_fcode_y = ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 1 ] - 1; + s_me.i_pred_mv_x = ps_slice->rgi_pmv[ i_field_idx ][ 0 ][ 0 ]; + s_me.i_pred_mv_y = ps_slice->rgi_pmv[ i_field_idx ][ 0 ][ 1 ]; + s_me.i_num_candidates_fp = 1; + s_me.rgi_candidates_fp[ 0 ][ 0 ] = ps_y262->ps_input_picture->ps_lookahead[ ps_mb->i_mb_addr ].rgi_mvs[ 0 ][ 0 ]; + s_me.rgi_candidates_fp[ 0 ][ 1 ] = ps_y262->ps_input_picture->ps_lookahead[ ps_mb->i_mb_addr ].rgi_mvs[ 0 ][ 1 ] >> 1; + } + else if( i_mbmode == Y262_MBMODE_BW_IL ) + { + ps_ref = ps_y262->ps_refpic_backward; + i_blk_type = BLOCK_TYPE_16x8; + i_fcode_x = ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 0 ]; + i_fcode_y = ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 1 ] - 1; + s_me.i_pred_mv_x = ps_slice->rgi_pmv[ i_field_idx ][ 1 ][ 0 ]; + s_me.i_pred_mv_y = ps_slice->rgi_pmv[ i_field_idx ][ 1 ][ 1 ]; + s_me.i_num_candidates_fp = 1; + s_me.rgi_candidates_fp[ 0 ][ 0 ] = ps_y262->ps_input_picture->ps_lookahead[ ps_mb->i_mb_addr ].rgi_mvs[ 1 ][ 0 ]; + s_me.rgi_candidates_fp[ 0 ][ 1 ] = ps_y262->ps_input_picture->ps_lookahead[ ps_mb->i_mb_addr ].rgi_mvs[ 1 ][ 1 ] >> 1; + } + else + { + assert( FALSE ); + } + + s_me.pui8_blk = ps_mb->pui8_src_luma + ( ps_mb->i_src_luma_stride * i_field_idx ); + s_me.i_blk_stride = ps_mb->i_src_luma_stride << 1; + s_me.i_blk_type = i_blk_type; + s_me.i_min_mv_x = -( 1 << ( 3 + i_fcode_x - 1 ) ); + s_me.i_min_mv_y = -( 1 << ( 3 + i_fcode_y - 1 ) ); + s_me.i_max_mv_x = ( 1 << ( 3 + i_fcode_x - 1 ) ) - 1; + s_me.i_max_mv_y = ( 1 << ( 3 + i_fcode_y - 1 ) ) - 1; + s_me.i_x_offset = ps_mb->i_mb_x; + s_me.i_y_offset = ps_mb->i_mb_y >> 1; + s_me.i_lambda = ps_mb->i_lambda_sqr; + s_me.i_me_call = MECALL_MAIN; + + for( i_search_field_idx = 0; i_search_field_idx < 2; i_search_field_idx++ ) + { + + s_me.i_ref_width = ps_y262->i_sequence_width; + s_me.i_ref_height = ps_y262->i_sequence_height >> 1; + s_me.i_ref_stride = ps_ref->i_stride_luma << 1; + s_me.pui8_ref = ps_ref->pui8_luma + ( i_search_field_idx * ps_ref->i_stride_luma ); + + y262_motion_search( ps_y262, &s_me ); + + if( i_mbmode == Y262_MBMODE_FW_IL && ( i_search_field_idx == 0 || s_me.i_best_mv_sad < i_top_field_sad ) ) + { + ps_slice->s_mode_decision.s_fw_il_mv[ i_field_idx ].i_x = s_me.i_best_mv_x; + ps_slice->s_mode_decision.s_fw_il_mv[ i_field_idx ].i_y = s_me.i_best_mv_y; + ps_slice->s_mode_decision.s_fw_il_mv[ i_field_idx ].i_field = i_search_field_idx == 0 ? Y262_MV_TOP_FIELD : Y262_MV_BOTTOM_FIELD; + i_top_field_sad = s_me.i_best_mv_sad; + } + else if( i_mbmode == Y262_MBMODE_BW_IL && ( i_search_field_idx == 0 || s_me.i_best_mv_sad < i_top_field_sad ) ) + { + ps_slice->s_mode_decision.s_bw_il_mv[ i_field_idx ].i_x = s_me.i_best_mv_x; + ps_slice->s_mode_decision.s_bw_il_mv[ i_field_idx ].i_y = s_me.i_best_mv_y; + ps_slice->s_mode_decision.s_bw_il_mv[ i_field_idx ].i_field = i_search_field_idx == 0 ? Y262_MV_TOP_FIELD : Y262_MV_BOTTOM_FIELD; + i_top_field_sad = s_me.i_best_mv_sad; + } + } + i_cost += i_top_field_sad; + } + if( i_mbmode == Y262_MBMODE_FW_IL ) + { + ps_slice->s_mode_decision.s_fw_il_mv[ 0 ].i_cost = i_cost; + ps_slice->s_mode_decision.s_fw_il_mv[ 1 ].i_cost = i_cost; + ps_slice->s_mode_decision.i_fw_il_cost = i_cost; + } + else + { + ps_slice->s_mode_decision.s_bw_il_mv[ 0 ].i_cost = i_cost; + ps_slice->s_mode_decision.s_bw_il_mv[ 1 ].i_cost = i_cost; + ps_slice->s_mode_decision.i_bw_il_cost = i_cost; + } + } +} + +void y262_get_mbmode_satd_cost( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_mbmode ) +{ + y262_macroblock_t *ps_mb; + + ps_mb = &ps_slice->s_macroblock; + + if( i_mbmode == Y262_MBMODE_SKIP ) + { + int32_t i_idx, i_num_preds, i_blk_type; + int32_t rgi_strides[ 2 ]; + uint8_t *rgpui8_src[ 2 ]; + y262_reference_picture_t *rgps_refs[ 2 ]; + y262_mv_t *rgps_mvs[ 2 ], s_skip_mvs[ 2 ]; + ALIGNED( 16 ) uint8_t rgui8_pred[ 16 * 16 ]; + + i_blk_type = MC_BLOCK_16x16; + + ps_mb = &ps_slice->s_macroblock; + + if( ps_slice->i_picture_type == PICTURE_CODING_TYPE_P ) + { + rgps_refs[ 0 ] = ps_y262->ps_refpic_forward; + s_skip_mvs[ 0 ].i_x = 0; + s_skip_mvs[ 0 ].i_y = 0; + rgps_mvs[ 0 ] = &s_skip_mvs[ 0 ]; + i_num_preds = 1; + } + else + { + assert( ps_slice->i_picture_type == PICTURE_CODING_TYPE_B ); + i_num_preds = 0; + if( ps_slice->i_last_mb_motion_flags & MACROBLOCK_MOTION_FORWARD ) + { + rgps_refs[ i_num_preds ] = ps_y262->ps_refpic_forward; + s_skip_mvs[ i_num_preds ].i_x = ps_slice->rgi_pmv[ 0 ][ 0 ][ 0 ]; + s_skip_mvs[ i_num_preds ].i_y = ps_slice->rgi_pmv[ 0 ][ 0 ][ 1 ]; + rgps_mvs[ i_num_preds ] = &s_skip_mvs[ i_num_preds ]; + i_num_preds++; + } + if( ps_slice->i_last_mb_motion_flags & MACROBLOCK_MOTION_BACKWARD ) + { + rgps_refs[ i_num_preds ] = ps_y262->ps_refpic_backward; + s_skip_mvs[ i_num_preds ].i_x = ps_slice->rgi_pmv[ 0 ][ 1 ][ 0 ]; + s_skip_mvs[ i_num_preds ].i_y = ps_slice->rgi_pmv[ 0 ][ 1 ][ 1 ]; + rgps_mvs[ i_num_preds ] = &s_skip_mvs[ i_num_preds ]; + i_num_preds++; + } + } + + for( i_idx = 0; i_idx < i_num_preds; i_idx++ ) + { + int32_t i_hpel_idx; + + rgi_strides[ 0 ] = rgps_refs[ i_idx ]->i_stride_luma; + rgpui8_src[ 0 ] = rgps_refs[ i_idx ]->pui8_luma + ps_mb->i_mb_x + ( ps_mb->i_mb_y * rgi_strides[ 0 ] ); + + i_hpel_idx = ( rgps_mvs[ i_idx ]->i_x & 1 ) | ( ( rgps_mvs[ i_idx ]->i_y & 1 ) << 1 ); + rgpui8_src[ 0 ] += ( rgps_mvs[ i_idx ]->i_x >> 1 ) + ( ( rgps_mvs[ i_idx ]->i_y >> 1 ) * rgi_strides[ 0 ] ); + + if( i_idx == 0 ) + { + ps_y262->s_funcs.rgf_motcomp_copy[ i_blk_type ][ i_hpel_idx ]( rgpui8_src[ 0 ], rgi_strides[ 0 ], rgui8_pred, 16 ); + } + else + { + ps_y262->s_funcs.rgf_motcomp_avg[ i_blk_type ][ i_hpel_idx ]( rgpui8_src[ 0 ], rgi_strides[ 0 ], rgui8_pred, 16 ); + } + } + ps_slice->s_mode_decision.i_skip_cost = ps_y262->s_funcs.rgf_satd[ i_blk_type ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, rgui8_pred, 16 ); + + } + else if( i_mbmode == Y262_MBMODE_INTRA ) + { + ALIGNED( 16 ) static const uint8_t rgui8_zero[ 16 ] = { 0 }; + int32_t i_sad; + ps_slice->s_mode_decision.i_intra_cost = ps_y262->s_funcs.rgf_satd[ MC_BLOCK_16x16 ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, ( uint8_t *)rgui8_zero, 0 ); + i_sad = ps_y262->s_funcs.rgf_sad[ MC_BLOCK_16x16 ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, ( uint8_t *)rgui8_zero, 0 ); + ps_slice->s_mode_decision.i_intra_cost -= i_sad >> 2; + ps_slice->s_mode_decision.i_intra_cost += ps_mb->i_lambda_sqr * 13; + } + else if( i_mbmode == Y262_MBMODE_INTRA_IL ) + { + ALIGNED( 16 ) static const uint8_t rgui8_zero[ 16 ] = { 0 }; + int32_t i_satd, i_sad; + + i_satd = ps_y262->s_funcs.rgf_satd[ MC_BLOCK_16x8 ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ); + i_sad = ps_y262->s_funcs.rgf_sad[ MC_BLOCK_16x8 ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ); + ps_slice->s_mode_decision.i_intra_il_cost = i_satd - ( i_sad >> 2 ); + + i_satd = ps_y262->s_funcs.rgf_satd[ MC_BLOCK_16x8 ]( ps_mb->pui8_src_luma + ps_mb->i_src_luma_stride, ps_mb->i_src_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ); + i_sad = ps_y262->s_funcs.rgf_sad[ MC_BLOCK_16x8 ]( ps_mb->pui8_src_luma + ps_mb->i_src_luma_stride, ps_mb->i_src_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ); + ps_slice->s_mode_decision.i_intra_il_cost += i_satd - ( i_sad >> 2 ); + ps_slice->s_mode_decision.i_intra_il_cost += ps_mb->i_lambda_sqr * 13; + } +} + + +void y262_get_mbmode_motion_bi( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_mbmode ) +{ + uint8_t *pui8_ref, *pui8_deltasrc; + int32_t i_blk_type, i_iter, i_fcode_x, i_fcode_y, i_mv_x, i_mv_y, i_hpelidx, i_deltasrc_stride, i_max_iter, i_cost = 0; + y262_reference_picture_t *ps_ref, *ps_otherref; + y262_me_context_t s_me; + y262_macroblock_t *ps_mb; + + ps_mb = &ps_slice->s_macroblock; + + i_max_iter = MAX( 0, ps_y262->i_quality_for_speed / 5 ); + + if( i_mbmode == Y262_MBMODE_BI ) + { + ALIGNED( 16 ) uint8_t rgui8_delta[ 16 * 16 ]; + ps_slice->s_mode_decision.s_bi_mv[ 0 ].i_x = ps_slice->s_mode_decision.s_fw_mv.i_x; + ps_slice->s_mode_decision.s_bi_mv[ 0 ].i_y = ps_slice->s_mode_decision.s_fw_mv.i_y; + ps_slice->s_mode_decision.s_bi_mv[ 0 ].i_field = Y262_MV_FRAME_FIELD; + ps_slice->s_mode_decision.s_bi_mv[ 1 ].i_x = ps_slice->s_mode_decision.s_bw_mv.i_x; + ps_slice->s_mode_decision.s_bi_mv[ 1 ].i_y = ps_slice->s_mode_decision.s_bw_mv.i_y; + ps_slice->s_mode_decision.s_bi_mv[ 1 ].i_field = Y262_MV_FRAME_FIELD; + + for( i_iter = 0; i_iter < i_max_iter; i_iter++ ) + { + int32_t i_dir, i_best_mv_x, i_best_mv_y; + + i_dir = i_iter & 1; + i_blk_type = BLOCK_TYPE_16x16; + s_me.pui8_blk = rgui8_delta; + s_me.i_blk_stride = 16; + s_me.i_blk_type = i_blk_type; + s_me.i_x_offset = ps_mb->i_mb_x; + s_me.i_y_offset = ps_mb->i_mb_y; + s_me.i_num_candidates_fp = 0; + s_me.i_lambda = ps_mb->i_lambda_sqr; + + if( i_dir == 0 ) + { + ps_ref = ps_y262->ps_refpic_forward; + ps_otherref = ps_y262->ps_refpic_backward; + i_fcode_x = ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 0 ]; + i_fcode_y = ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 1 ]; + s_me.i_pred_mv_x = ps_slice->rgi_pmv[ 0 ][ 0 ][ 0 ]; + s_me.i_pred_mv_y = ps_slice->rgi_pmv[ 0 ][ 0 ][ 1 ]; + } + else + { + ps_ref = ps_y262->ps_refpic_backward; + ps_otherref = ps_y262->ps_refpic_forward; + i_fcode_x = ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 0 ]; + i_fcode_y = ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 1 ]; + s_me.i_pred_mv_x = ps_slice->rgi_pmv[ 0 ][ 1 ][ 0 ]; + s_me.i_pred_mv_y = ps_slice->rgi_pmv[ 0 ][ 1 ][ 1 ]; + } + + s_me.i_min_mv_x = -( 1 << ( 3 + i_fcode_x - 1 ) ); + s_me.i_min_mv_y = -( 1 << ( 3 + i_fcode_y - 1 ) ); + s_me.i_max_mv_x = ( 1 << ( 3 + i_fcode_x - 1 ) ) - 1; + s_me.i_max_mv_y = ( 1 << ( 3 + i_fcode_y - 1 ) ) - 1; + s_me.i_ref_width = ps_y262->i_sequence_width; + s_me.i_ref_height = ps_y262->i_sequence_height; + s_me.i_ref_stride = ps_ref->i_stride_luma; + s_me.pui8_ref = ps_ref->pui8_luma; + i_best_mv_x = s_me.i_best_mv_x = ps_slice->s_mode_decision.s_bi_mv[ i_dir ].i_x; + i_best_mv_y = s_me.i_best_mv_y = ps_slice->s_mode_decision.s_bi_mv[ i_dir ].i_y; + + i_mv_x = ps_slice->s_mode_decision.s_bi_mv[ !i_dir ].i_x; + i_mv_y = ps_slice->s_mode_decision.s_bi_mv[ !i_dir ].i_y; + pui8_ref = ps_otherref->pui8_luma + ( s_me.i_x_offset + ( i_mv_x >> 1 ) ) + ( ( s_me.i_y_offset + ( i_mv_y >> 1 ) ) * ps_otherref->i_stride_luma ); + i_hpelidx = ( ( i_mv_x ) & 1 ) | ( ( ( i_mv_y ) & 1 ) << 1 ); + if( i_hpelidx ) + { + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ i_hpelidx ]( pui8_ref, ps_otherref->i_stride_luma, rgui8_delta, 16 ); + pui8_deltasrc = rgui8_delta; + i_deltasrc_stride = 16; + } + else + { + pui8_deltasrc = pui8_ref; + i_deltasrc_stride = ps_otherref->i_stride_luma; + } + for( i_mv_y = 0; i_mv_y < 16; i_mv_y++ ) + { + for( i_mv_x = 0; i_mv_x < 16; i_mv_x++ ) + { + rgui8_delta[ i_mv_x + i_mv_y * 16 ] = MIN( 255, MAX( 0, ( ps_mb->pui8_src_luma[ i_mv_x + i_mv_y * ps_mb->i_src_luma_stride ] << 1 ) - pui8_deltasrc[ i_mv_x + i_mv_y * i_deltasrc_stride ] ) ); + } + } + + y262_hpel_motion_search( ps_y262, &s_me ); + + ps_slice->s_mode_decision.s_bi_mv[ i_dir ].i_x = s_me.i_best_mv_x; + ps_slice->s_mode_decision.s_bi_mv[ i_dir ].i_y = s_me.i_best_mv_y; + ps_slice->s_mode_decision.s_bi_mv[ i_dir ].i_field = Y262_MV_FRAME_FIELD; + if( s_me.i_best_mv_x == i_best_mv_x && s_me.i_best_mv_y == i_best_mv_y ) + { + break; + } + } + if( 1 ) + { + int32_t i_dir, i_bits = 0; + for( i_dir = 0; i_dir < 2; i_dir++ ) + { + if( i_dir == 0 ) + { + ps_ref = ps_y262->ps_refpic_forward; + } + else + { + ps_ref = ps_y262->ps_refpic_backward; + } + i_mv_x = ps_slice->s_mode_decision.s_bi_mv[ i_dir ].i_x; + i_mv_y = ps_slice->s_mode_decision.s_bi_mv[ i_dir ].i_y; + pui8_ref = ps_ref->pui8_luma + ( ps_mb->i_mb_x + ( i_mv_x >> 1 ) ) + ( ( ps_mb->i_mb_y + ( i_mv_y >> 1 ) ) * ps_ref->i_stride_luma ); + i_hpelidx = ( ( i_mv_x ) & 1 ) | ( ( ( i_mv_y ) & 1 ) << 1 ); + if( i_dir == 0 ) + { + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x16 ][ i_hpelidx ]( pui8_ref, ps_ref->i_stride_luma, rgui8_delta, 16 ); + } + else + { + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x16 ][ i_hpelidx ]( pui8_ref, ps_ref->i_stride_luma, rgui8_delta, 16 ); + } + i_bits += ps_y262->rgi_y262_motion_bits_x[ 2048 + i_mv_x - ps_slice->rgi_pmv[ 0 ][ i_dir ][ 0 ] ]; + i_bits += ps_y262->rgi_y262_motion_bits_y[ 2048 + i_mv_y - ps_slice->rgi_pmv[ 0 ][ i_dir ][ 1 ] ]; + } + i_cost = ps_y262->s_funcs.rgf_satd[ MC_BLOCK_16x16 ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, rgui8_delta, 16 ); + i_cost += i_bits * ps_mb->i_lambda_sqr; + } + ps_slice->s_mode_decision.i_bi_cost = i_cost; + } + else if( i_mbmode == Y262_MBMODE_BI_IL ) + { + int32_t i_field_idx, i_search_field_idx; + + for( i_field_idx = 0; i_field_idx < 2; i_field_idx++ ) + { + int32_t i_dir, i_best_mv_x, i_best_mv_y; + + ALIGNED( 16 ) uint8_t rgui8_delta[ 16 * 8 ]; + ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 0 ].i_x = ps_slice->s_mode_decision.s_fw_il_mv[ i_field_idx ].i_x; + ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 0 ].i_y = ps_slice->s_mode_decision.s_fw_il_mv[ i_field_idx ].i_y; + ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 0 ].i_field = ps_slice->s_mode_decision.s_fw_il_mv[ i_field_idx ].i_field; + ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 1 ].i_x = ps_slice->s_mode_decision.s_bw_il_mv[ i_field_idx ].i_x; + ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 1 ].i_y = ps_slice->s_mode_decision.s_bw_il_mv[ i_field_idx ].i_y; + ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ 1 ].i_field = ps_slice->s_mode_decision.s_bw_il_mv[ i_field_idx ].i_field; + + for( i_iter = 0; i_iter < i_max_iter; i_iter++ ) + { + i_dir = i_iter & 1; + + if( i_dir == 0 ) + { + ps_ref = ps_y262->ps_refpic_forward; + ps_otherref = ps_y262->ps_refpic_backward; + i_blk_type = BLOCK_TYPE_16x8; + i_fcode_x = ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 0 ]; + i_fcode_y = ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 1 ] - 1; + s_me.i_pred_mv_x = ps_slice->rgi_pmv[ i_field_idx ][ 0 ][ 0 ]; + s_me.i_pred_mv_y = ps_slice->rgi_pmv[ i_field_idx ][ 0 ][ 1 ]; + } + else if( i_dir == 1 ) + { + ps_ref = ps_y262->ps_refpic_backward; + ps_otherref = ps_y262->ps_refpic_forward; + i_blk_type = BLOCK_TYPE_16x8; + i_fcode_x = ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 0 ]; + i_fcode_y = ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 1 ] - 1; + s_me.i_pred_mv_x = ps_slice->rgi_pmv[ i_field_idx ][ 1 ][ 0 ]; + s_me.i_pred_mv_y = ps_slice->rgi_pmv[ i_field_idx ][ 1 ][ 1 ]; + } + else + { + assert( FALSE ); + } + + s_me.pui8_blk = rgui8_delta; + s_me.i_blk_stride = 16; + s_me.i_blk_type = i_blk_type; + s_me.i_min_mv_x = -( 1 << ( 3 + i_fcode_x - 1 ) ); + s_me.i_min_mv_y = -( 1 << ( 3 + i_fcode_y - 1 ) ); + s_me.i_max_mv_x = ( 1 << ( 3 + i_fcode_x - 1 ) ) - 1; + s_me.i_max_mv_y = ( 1 << ( 3 + i_fcode_y - 1 ) ) - 1; + s_me.i_x_offset = ps_mb->i_mb_x; + s_me.i_y_offset = ps_mb->i_mb_y >> 1; + s_me.i_num_candidates_fp = 0; + s_me.i_lambda = ps_mb->i_lambda_sqr; + + i_mv_x = ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ !i_dir ].i_x; + i_mv_y = ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ !i_dir ].i_y; + pui8_ref = ps_otherref->pui8_luma + ( s_me.i_x_offset + ( i_mv_x >> 1 ) ) + ( ( s_me.i_y_offset + ( i_mv_y >> 1 ) ) * ps_otherref->i_stride_luma * 2 ); + if( ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ !i_dir ].i_field == Y262_MV_BOTTOM_FIELD ) + { + pui8_ref += ps_otherref->i_stride_luma; + } + i_hpelidx = ( ( i_mv_x ) & 1 ) | ( ( ( i_mv_y ) & 1 ) << 1 ); + if( i_hpelidx ) + { + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ i_hpelidx ]( pui8_ref, ps_otherref->i_stride_luma * 2, rgui8_delta, 16 ); + pui8_deltasrc = rgui8_delta; + i_deltasrc_stride = 16; + } + else + { + pui8_deltasrc = pui8_ref; + i_deltasrc_stride = ps_otherref->i_stride_luma * 2; + } + for( i_mv_y = 0; i_mv_y < 8; i_mv_y++ ) + { + for( i_mv_x = 0; i_mv_x < 16; i_mv_x++ ) + { + rgui8_delta[ i_mv_x + i_mv_y * 16 ] = MIN( 255, MAX( 0, ( ps_mb->pui8_src_luma[ i_mv_x + i_mv_y * ps_mb->i_src_luma_stride * 2 + ( ps_mb->i_src_luma_stride * i_field_idx ) ] << 1 ) - pui8_deltasrc[ i_mv_x + i_mv_y * i_deltasrc_stride ] ) ); + } + } + + i_best_mv_x = ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ i_dir ].i_x; + i_best_mv_y = ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ i_dir ].i_y; + i_search_field_idx = ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ i_dir ].i_field == Y262_MV_TOP_FIELD ? 0 : 1; + + s_me.i_ref_width = ps_y262->i_sequence_width; + s_me.i_ref_height = ps_y262->i_sequence_height >> 1; + s_me.i_ref_stride = ps_ref->i_stride_luma << 1; + s_me.pui8_ref = ps_ref->pui8_luma + ( i_search_field_idx * ps_ref->i_stride_luma ); + s_me.i_best_mv_x = i_best_mv_x; + s_me.i_best_mv_y = i_best_mv_y; + + y262_hpel_motion_search( ps_y262, &s_me ); + + ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ i_dir ].i_x = s_me.i_best_mv_x; + ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ i_dir ].i_y = s_me.i_best_mv_y; + ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ i_dir ].i_field = i_search_field_idx == 0 ? Y262_MV_TOP_FIELD : Y262_MV_BOTTOM_FIELD; + + if( s_me.i_best_mv_x == i_best_mv_x && s_me.i_best_mv_y == i_best_mv_y ) + { + break; + } + } + if( 1 ) + { + int32_t i_bits = 0; + for( i_dir = 0; i_dir < 2; i_dir++ ) + { + if( i_dir == 0 ) + { + ps_ref = ps_y262->ps_refpic_forward; + } + else + { + ps_ref = ps_y262->ps_refpic_backward; + } + i_mv_x = ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ i_dir ].i_x; + i_mv_y = ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ i_dir ].i_y; + pui8_ref = ps_ref->pui8_luma + ( ps_mb->i_mb_x + ( i_mv_x >> 1 ) ) + ( ( ( ps_mb->i_mb_y >> 1 ) + ( i_mv_y >> 1 ) ) * ps_ref->i_stride_luma * 2 ); + if( ps_slice->s_mode_decision.s_bi_il_mv[ i_field_idx ][ i_dir ].i_field == Y262_MV_BOTTOM_FIELD ) + { + pui8_ref += ps_ref->i_stride_luma; + } + i_hpelidx = ( ( i_mv_x ) & 1 ) | ( ( ( i_mv_y ) & 1 ) << 1 ); + if( i_dir == 0 ) + { + ps_y262->s_funcs.rgf_motcomp_copy[ MC_BLOCK_16x8 ][ i_hpelidx ]( pui8_ref, ps_ref->i_stride_luma * 2, rgui8_delta, 16 ); + } + else + { + ps_y262->s_funcs.rgf_motcomp_avg[ MC_BLOCK_16x8 ][ i_hpelidx ]( pui8_ref, ps_ref->i_stride_luma * 2, rgui8_delta, 16 ); + } + i_bits += ps_y262->rgi_y262_motion_bits_x[ 2048 + i_mv_x - ps_slice->rgi_pmv[ i_field_idx ][ i_dir ][ 0 ] ]; + i_bits += ps_y262->rgi_y262_motion_bits_y[ 2048 + i_mv_y - ps_slice->rgi_pmv[ i_field_idx ][ i_dir ][ 1 ] ]; + } + i_cost += ps_y262->s_funcs.rgf_satd[ MC_BLOCK_16x8 ]( ps_mb->pui8_src_luma + ( ps_mb->i_src_luma_stride * i_field_idx ), ps_mb->i_src_luma_stride * 2, rgui8_delta, 16 ); + i_cost += i_bits * ps_mb->i_lambda_sqr; + } + } + ps_slice->s_mode_decision.i_bi_il_cost = i_cost; + } + else + { + assert( FALSE ); + } +} + +static int32_t rgi_quantizer_delta[ 64 ] = { 0 }; +void y262_qprd_mbmode( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_mbmode, int32_t i_mbmode_cost ) +{ + int32_t i_original_quantizer, i_quantizer, i_best_quantizer, i_best_cost, i_cost, i_dir, i_down, i_up; + y262_macroblock_t *ps_mb; + + ps_mb = &ps_slice->s_macroblock; + + i_original_quantizer = i_best_quantizer = ps_mb->i_quantizer; + i_best_cost = i_mbmode_cost; + + if( i_original_quantizer > 1 ) + { + ps_mb->i_quantizer = i_original_quantizer - 1; + ps_mb->i_scaled_quantizer = rgi8_y262_quantiser_scale_table[ ps_y262->b_qscale_type ][ ps_mb->i_quantizer ]; + i_down = y262_get_mbmode_cost( ps_y262, ps_slice, i_mbmode ); + if( i_down < i_best_cost ) + { + i_best_cost = i_down; + i_best_quantizer = ps_mb->i_quantizer; + } + } + else + { + i_down = MAX_COST; + } + + if( i_original_quantizer < 31 ) + { + ps_mb->i_quantizer = i_original_quantizer + 1; + ps_mb->i_scaled_quantizer = rgi8_y262_quantiser_scale_table[ ps_y262->b_qscale_type ][ ps_mb->i_quantizer ]; + i_up = y262_get_mbmode_cost( ps_y262, ps_slice, i_mbmode ); + if( i_up < i_best_cost ) + { + i_best_cost = i_up; + i_best_quantizer = ps_mb->i_quantizer; + } + } + else + { + i_up = MAX_COST; + } + + if( i_best_quantizer != i_original_quantizer ) + { + if( i_down < i_up ) + { + i_dir = -1; + } + else + { + i_dir = 1; + } + while( 1 ) + { + i_quantizer = i_best_quantizer + i_dir; + if( i_quantizer > 0 && i_quantizer < 32 ) + { + ps_mb->i_quantizer = i_quantizer; + ps_mb->i_scaled_quantizer = rgi8_y262_quantiser_scale_table[ ps_y262->b_qscale_type ][ ps_mb->i_quantizer ]; + i_cost = y262_get_mbmode_cost( ps_y262, ps_slice, i_mbmode ); + if( i_cost < i_best_cost ) + { + i_best_cost = i_cost; + i_best_quantizer = i_quantizer; + } + else + { + break; + } + } + else + { + break; + } + } + } + + ps_mb->i_quantizer = i_best_quantizer; + ps_mb->i_scaled_quantizer = rgi8_y262_quantiser_scale_table[ ps_y262->b_qscale_type ][ ps_mb->i_quantizer ]; + rgi_quantizer_delta[ i_best_quantizer - i_original_quantizer + 32 ]++; +} + +bool_t y262_bskip_valid( y262_t *ps_y262, y262_slice_t *ps_slice ) +{ + bool_t b_allow_skip; + y262_macroblock_t *ps_mb; + ps_mb = &ps_slice->s_macroblock; + + b_allow_skip = TRUE; + if( ps_slice->i_last_mb_motion_flags & MACROBLOCK_MOTION_FORWARD ) + { + if( ( ps_slice->rgi_pmv[ 0 ][ 0 ][ 0 ] + ( ps_mb->i_mb_x << 1 ) ) < 0 || + ( ps_slice->rgi_pmv[ 0 ][ 0 ][ 0 ] + ( ( 16 + ps_mb->i_mb_x ) << 1 ) ) > ( ps_y262->i_sequence_width << 1 ) ) + { + b_allow_skip = FALSE; + } + if( ( ps_slice->rgi_pmv[ 0 ][ 0 ][ 1 ] + ( ps_mb->i_mb_y << 1 ) ) < 0 || + ( ps_slice->rgi_pmv[ 0 ][ 0 ][ 1 ] + ( ( 16 + ps_mb->i_mb_y ) << 1 ) ) > ( ps_y262->i_sequence_height << 1 ) ) + { + b_allow_skip = FALSE; + } + } + if( ps_slice->i_last_mb_motion_flags & MACROBLOCK_MOTION_BACKWARD ) + { + if( ( ps_slice->rgi_pmv[ 0 ][ 1 ][ 0 ] + ( ps_mb->i_mb_x << 1 ) ) < 0 || + ( ps_slice->rgi_pmv[ 0 ][ 1 ][ 0 ] + ( ( 16 + ps_mb->i_mb_x ) << 1 ) ) > ( ps_y262->i_sequence_width << 1 ) ) + { + b_allow_skip = FALSE; + } + if( ( ps_slice->rgi_pmv[ 0 ][ 1 ][ 1 ] + ( ps_mb->i_mb_y << 1 ) ) < 0 || + ( ps_slice->rgi_pmv[ 0 ][ 1 ][ 1 ] + ( ( 16 + ps_mb->i_mb_y ) << 1 ) ) > ( ps_y262->i_sequence_height << 1 ) ) + { + b_allow_skip = FALSE; + } + } + return b_allow_skip; +} + + +void y262_encode_macroblock( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_mb_idx, bool_t *pb_skip ) +{ + int32_t i_best_mbmode, i_best_cost, i_best_satd_cost; + bool_t b_no_frame, b_no_field; + y262_macroblock_t *ps_mb; + + *pb_skip = FALSE; + ps_mb = &ps_slice->s_macroblock; + + b_no_frame = b_no_field = FALSE; + if( !ps_y262->b_frame_pred_frame_dct && ps_y262->i_quality_for_speed < -20 ) + { + b_no_frame = y262_16x16_frame_field_pel_decision( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride ); + b_no_field = !b_no_frame; + } + + if( ps_slice->i_picture_type == PICTURE_CODING_TYPE_I ) + { + if( ps_y262->b_frame_pred_frame_dct == FALSE ) + { + i_best_satd_cost = MAX_COST; + i_best_mbmode = Y262_MBMODE_INTRA; + + if( !b_no_frame ) + { + y262_get_mbmode_satd_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA ); + if( ps_slice->s_mode_decision.i_intra_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_intra_cost; + i_best_mbmode = Y262_MBMODE_INTRA; + } + } + else + { + ps_slice->s_mode_decision.i_intra_cost = MAX_COST; + } + + if( !b_no_field ) + { + y262_get_mbmode_satd_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA_IL ); + if( ps_slice->s_mode_decision.i_intra_il_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_intra_il_cost; + i_best_mbmode = Y262_MBMODE_INTRA_IL; + } + } + else + { + ps_slice->s_mode_decision.i_intra_il_cost = MAX_COST; + } + + if( ps_y262->i_quality_for_speed >= -25 ) + { + i_best_cost = MAX_COST; + if( ps_slice->s_mode_decision.i_intra_cost <= ( i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( 40 + ps_y262->i_quality_for_speed ) ) ) ) + { + y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_intra_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_intra_cost; + i_best_mbmode = Y262_MBMODE_INTRA; + } + } + if( ps_slice->s_mode_decision.i_intra_il_cost <= ( i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( 40 + ps_y262->i_quality_for_speed ) ) ) ) + { + y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA_IL ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_intra_il_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_intra_il_cost; + i_best_mbmode = Y262_MBMODE_INTRA_IL; + } + } + } + + if( ps_y262->i_quality_for_speed > 10 ) + { + y262_qprd_mbmode( ps_y262, ps_slice, i_best_mbmode, i_best_cost ); + } + + if( i_best_mbmode == Y262_MBMODE_INTRA ) + { + y262_encode_macroblock_intra( ps_y262, ps_slice, FALSE ); + } + else + { + y262_encode_macroblock_intra( ps_y262, ps_slice, TRUE ); + } + } + else + { + /* qprd ? */ + y262_encode_macroblock_intra( ps_y262, ps_slice, FALSE ); + } + } + else if( ps_slice->i_picture_type == PICTURE_CODING_TYPE_P ) + { + if( !b_no_frame ) + { + y262_get_mbmode_motion( ps_y262, ps_slice, Y262_MBMODE_FW ); + i_best_satd_cost = ps_slice->s_mode_decision.i_fw_cost; + i_best_mbmode = Y262_MBMODE_FW; + } + else + { + ps_slice->s_mode_decision.i_fw_cost = MAX_COST; + + y262_get_mbmode_motion( ps_y262, ps_slice, Y262_MBMODE_FW_IL ); + i_best_satd_cost = ps_slice->s_mode_decision.i_fw_il_cost; + i_best_mbmode = Y262_MBMODE_FW_IL; + } + + if( ps_slice->b_allow_skip ) + { + y262_get_mbmode_satd_cost( ps_y262, ps_slice, Y262_MBMODE_SKIP ); + if( i_best_satd_cost > ps_slice->s_mode_decision.i_skip_cost ) + { + if( !y262_encode_macroblock_inter( ps_y262, ps_slice, Y262_MBMODE_SKIP ) ) + { + //i_best_satd_cost = ps_slice->s_mode_decision.i_skip_cost; + i_best_mbmode = Y262_MBMODE_SKIP; + if( ps_y262->i_quality_for_speed < -5 ) + { + goto fast_skip_p; + } + } + } + } + + if( !b_no_frame ) + { + y262_get_mbmode_satd_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA ); + if( ps_slice->s_mode_decision.i_intra_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_intra_cost; + i_best_mbmode = Y262_MBMODE_INTRA; + } + } + else + { + ps_slice->s_mode_decision.i_intra_cost = MAX_COST; + } + + if( ps_y262->b_frame_pred_frame_dct == FALSE && !b_no_field ) + { + if( !b_no_frame ) + { + y262_get_mbmode_motion( ps_y262, ps_slice, Y262_MBMODE_FW_IL ); + if( ps_slice->s_mode_decision.i_fw_il_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_fw_il_cost; + i_best_mbmode = Y262_MBMODE_FW_IL; + } + } + y262_get_mbmode_satd_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA_IL ); + if( ps_slice->s_mode_decision.i_intra_il_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_intra_il_cost; + i_best_mbmode = Y262_MBMODE_INTRA_IL; + } + } + else + { + ps_slice->s_mode_decision.i_fw_il_cost = MAX_COST; + ps_slice->s_mode_decision.i_intra_il_cost = MAX_COST; + } + + if( ps_y262->i_quality_for_speed >= -30 ) + { + i_best_cost = MAX_COST; + if( ps_slice->s_mode_decision.i_fw_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 20 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + ps_slice->s_mode_decision.i_fw_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_FW ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_fw_cost < i_best_cost ) + { + i_best_mbmode = Y262_MBMODE_FW; + i_best_cost = ps_slice->s_mode_decision.i_fw_cost; + } + } + + if( ps_slice->s_mode_decision.i_intra_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 50 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + ps_slice->s_mode_decision.i_intra_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_intra_cost < i_best_cost ) + { + i_best_mbmode = Y262_MBMODE_INTRA; + i_best_cost = ps_slice->s_mode_decision.i_intra_cost; + } + } + + if( ps_y262->b_frame_pred_frame_dct == FALSE ) + { + if( ps_slice->s_mode_decision.i_fw_il_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 20 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + ps_slice->s_mode_decision.i_fw_il_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_FW_IL ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_fw_il_cost < i_best_cost ) + { + i_best_mbmode = Y262_MBMODE_FW_IL; + i_best_cost = ps_slice->s_mode_decision.i_fw_il_cost; + } + } + + if( ps_slice->s_mode_decision.i_intra_il_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * MAX( 0, 20 + ps_y262->i_quality_for_speed ) ) ) ) + { + ps_slice->s_mode_decision.i_intra_il_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA_IL ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_intra_il_cost < i_best_cost ) + { + i_best_mbmode = Y262_MBMODE_INTRA_IL; + i_best_cost = ps_slice->s_mode_decision.i_intra_il_cost; + } + } + } + + if( ps_slice->b_allow_skip ) + { + ps_slice->s_mode_decision.i_skip_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_SKIP ); + if( ps_slice->s_mode_decision.i_skip_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_skip_cost; + i_best_mbmode = Y262_MBMODE_SKIP; + } + } + } + +fast_skip_p: + if( i_best_mbmode != Y262_MBMODE_INTRA && i_best_mbmode != Y262_MBMODE_INTRA_IL ) + { + y262_encode_macroblock_inter( ps_y262, ps_slice, i_best_mbmode ); + + if( i_best_mbmode != Y262_MBMODE_SKIP && ps_mb->i_cbp && ps_y262->i_quality_for_speed > 10 ) + { + y262_qprd_mbmode( ps_y262, ps_slice, i_best_mbmode, i_best_cost ); + y262_encode_macroblock_inter( ps_y262, ps_slice, i_best_mbmode ); + } + + if( i_best_mbmode == Y262_MBMODE_SKIP ) + { + *pb_skip = TRUE; + } + } + else + { + if( ps_y262->i_quality_for_speed > 10 ) + { + y262_qprd_mbmode( ps_y262, ps_slice, i_best_mbmode, i_best_cost ); + } + y262_encode_macroblock_intra( ps_y262, ps_slice, i_best_mbmode == Y262_MBMODE_INTRA_IL ); + } + } + else + { + bool_t b_allow_skip, b_backward_pred_only; + assert( ps_slice->i_picture_type == PICTURE_CODING_TYPE_B ); + + b_backward_pred_only = ps_y262->ps_input_picture->b_backward_pred_only; + + ps_slice->s_mode_decision.i_fw_cost = MAX_COST; + ps_slice->s_mode_decision.i_bw_cost = MAX_COST; + ps_slice->s_mode_decision.i_bi_cost = MAX_COST; + ps_slice->s_mode_decision.i_fw_il_cost = MAX_COST; + ps_slice->s_mode_decision.i_bw_il_cost = MAX_COST; + ps_slice->s_mode_decision.i_bi_il_cost = MAX_COST; + ps_slice->s_mode_decision.i_intra_il_cost = MAX_COST; + ps_slice->s_mode_decision.i_skip_cost = MAX_COST; + + + if( !b_no_frame ) + { + y262_get_mbmode_motion( ps_y262, ps_slice, Y262_MBMODE_BW ); + i_best_satd_cost = ps_slice->s_mode_decision.i_bw_cost; + i_best_mbmode = Y262_MBMODE_BW; + + if( !b_backward_pred_only ) + { + y262_get_mbmode_motion( ps_y262, ps_slice, Y262_MBMODE_FW ); + if( ps_slice->s_mode_decision.i_fw_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_fw_cost; + i_best_mbmode = Y262_MBMODE_FW; + } + + y262_get_mbmode_motion_bi( ps_y262, ps_slice, Y262_MBMODE_BI ); + if( ps_slice->s_mode_decision.i_bi_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_bi_cost; + i_best_mbmode = Y262_MBMODE_BI; + } + } + } + else + { + if( b_backward_pred_only ) + { + y262_get_mbmode_motion( ps_y262, ps_slice, Y262_MBMODE_BW_IL ); + i_best_satd_cost = ps_slice->s_mode_decision.i_bw_il_cost; + i_best_mbmode = Y262_MBMODE_BW_IL; + } + else + { + y262_get_mbmode_motion( ps_y262, ps_slice, Y262_MBMODE_FW_IL ); + i_best_satd_cost = ps_slice->s_mode_decision.i_fw_il_cost; + i_best_mbmode = Y262_MBMODE_FW_IL; + } + } + + if( ps_slice->b_allow_skip ) + { + b_allow_skip = y262_bskip_valid( ps_y262, ps_slice ); + } + else + { + b_allow_skip = FALSE; + } + + if( ps_slice->b_allow_skip & b_allow_skip ) + { + y262_get_mbmode_satd_cost( ps_y262, ps_slice, Y262_MBMODE_SKIP ); + ps_slice->s_mode_decision.i_skip_cost -= ps_mb->i_lambda_sqr * 3; /* wink wink nudge nudge */ + if( i_best_satd_cost > ps_slice->s_mode_decision.i_skip_cost ) + { + if( !y262_encode_macroblock_inter( ps_y262, ps_slice, Y262_MBMODE_SKIP ) ) + { + //i_best_satd_cost = ps_slice->s_mode_decision.i_skip_cost; + i_best_mbmode = Y262_MBMODE_SKIP; + if( ps_y262->i_quality_for_speed < -5 ) + { + goto fast_skip_b; + } + } + } + else + { + i_best_satd_cost = i_best_satd_cost; + } + } + + if( !b_no_frame ) + { + y262_get_mbmode_satd_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA ); + if( ps_slice->s_mode_decision.i_intra_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_intra_cost; + i_best_mbmode = Y262_MBMODE_INTRA; + } + } + else + { + ps_slice->s_mode_decision.i_intra_cost = MAX_COST; + } + + if( ps_y262->b_frame_pred_frame_dct == FALSE && !b_no_field ) + { + if( !b_no_frame ) + { + if( !b_backward_pred_only ) + { + y262_get_mbmode_motion( ps_y262, ps_slice, Y262_MBMODE_FW_IL ); + if( ps_slice->s_mode_decision.i_fw_il_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_fw_il_cost; + i_best_mbmode = Y262_MBMODE_FW_IL; + } + } + } + + y262_get_mbmode_motion( ps_y262, ps_slice, Y262_MBMODE_BW_IL ); + if( ps_slice->s_mode_decision.i_bw_il_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_bw_il_cost; + i_best_mbmode = Y262_MBMODE_BW_IL; + } + + if( !b_backward_pred_only ) + { + y262_get_mbmode_motion_bi( ps_y262, ps_slice, Y262_MBMODE_BI_IL ); + if( ps_slice->s_mode_decision.i_bi_il_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_bi_il_cost; + i_best_mbmode = Y262_MBMODE_BI_IL; + } + } + + y262_get_mbmode_satd_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA_IL ); + if( ps_slice->s_mode_decision.i_intra_il_cost < i_best_satd_cost ) + { + i_best_satd_cost = ps_slice->s_mode_decision.i_intra_il_cost; + i_best_mbmode = Y262_MBMODE_INTRA_IL; + } + } + + if( ps_y262->i_quality_for_speed >= -30 ) + { + i_best_cost = MAX_COST; + if( ps_slice->s_mode_decision.i_fw_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 20 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + ps_slice->s_mode_decision.i_fw_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_FW ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_fw_cost < i_best_cost ) + { + i_best_mbmode = Y262_MBMODE_FW; + i_best_cost = ps_slice->s_mode_decision.i_fw_cost; + } + } + + if( ps_slice->s_mode_decision.i_bw_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 20 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + ps_slice->s_mode_decision.i_bw_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_BW ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_bw_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_bw_cost; + i_best_mbmode = Y262_MBMODE_BW; + } + } + + if( ps_slice->s_mode_decision.i_bi_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 20 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + ps_slice->s_mode_decision.i_bi_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_BI ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_bi_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_bi_cost; + i_best_mbmode = Y262_MBMODE_BI; + } + } + + if( ps_slice->s_mode_decision.i_intra_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 50 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + ps_slice->s_mode_decision.i_intra_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_intra_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_intra_cost; + i_best_mbmode = Y262_MBMODE_INTRA; + } + } + + if( ps_y262->b_frame_pred_frame_dct == FALSE ) + { + bool_t b_try_bi = FALSE; + if( ps_slice->s_mode_decision.i_fw_il_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 20 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + b_try_bi = TRUE; + ps_slice->s_mode_decision.i_fw_il_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_FW_IL ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_fw_il_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_fw_il_cost; + i_best_mbmode = Y262_MBMODE_FW_IL; + } + } + + if( ps_slice->s_mode_decision.i_bw_il_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 20 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + b_try_bi = TRUE; + ps_slice->s_mode_decision.i_bw_il_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_BW_IL ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_bw_il_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_bw_il_cost; + i_best_mbmode = Y262_MBMODE_BW_IL; + } + } + + if( ps_slice->s_mode_decision.i_bi_il_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 20 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + ps_slice->s_mode_decision.i_bi_il_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_BI_IL ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_bi_il_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_bi_il_cost; + i_best_mbmode = Y262_MBMODE_BI_IL; + } + } + + if( ps_slice->s_mode_decision.i_intra_il_cost <= ( ( ps_mb->i_lambda_sqr * 5 ) + i_best_satd_cost + ( ( i_best_satd_cost / 100 ) * ( MAX( 0, 50 + ps_y262->i_quality_for_speed ) ) ) ) ) + { + ps_slice->s_mode_decision.i_intra_il_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_INTRA_IL ); + if( i_best_cost == MAX_COST || ps_slice->s_mode_decision.i_intra_il_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_intra_il_cost; + i_best_mbmode = Y262_MBMODE_INTRA_IL; + } + } + } + + if( ps_slice->b_allow_skip & b_allow_skip ) + { + ps_slice->s_mode_decision.i_skip_cost = y262_get_mbmode_cost( ps_y262, ps_slice, Y262_MBMODE_SKIP ); + if( ps_slice->s_mode_decision.i_skip_cost < i_best_cost ) + { + i_best_cost = ps_slice->s_mode_decision.i_skip_cost; + i_best_mbmode = Y262_MBMODE_SKIP; + } + } + } + +fast_skip_b: + if( i_best_mbmode != Y262_MBMODE_INTRA && i_best_mbmode != Y262_MBMODE_INTRA_IL ) + { + y262_encode_macroblock_inter( ps_y262, ps_slice, i_best_mbmode ); + + if( i_best_mbmode != Y262_MBMODE_SKIP && ps_mb->i_cbp && ps_y262->i_quality_for_speed > 10 ) + { + y262_qprd_mbmode( ps_y262, ps_slice, i_best_mbmode, i_best_cost ); + y262_encode_macroblock_inter( ps_y262, ps_slice, i_best_mbmode ); + } + + if( b_backward_pred_only ) + { + if( ps_mb->i_macroblock_type & MACROBLOCK_MOTION_FORWARD ) + { + int32_t *pi_null = NULL; /* fatal */ + *pi_null = 0; + } + } + + if( i_best_mbmode == Y262_MBMODE_SKIP ) + { + *pb_skip = TRUE; + } + } + else + { + if( ps_y262->i_quality_for_speed > 10 ) + { + y262_qprd_mbmode( ps_y262, ps_slice, i_best_mbmode, i_best_cost ); + } + y262_encode_macroblock_intra( ps_y262, ps_slice, i_best_mbmode == Y262_MBMODE_INTRA_IL ); + } + } +} + + +int32_t y262_write_intra_block_get_vlc0_idx( y262_t *ps_y262, int32_t i_run, int32_t i_level ) +{ + int32_t i_rl_idx; + for( i_rl_idx = 0; rgs_y262_dct_coefficients_table_zero[ i_rl_idx ].i_code != VLC_SENTINEL; i_rl_idx++ ) + { + if( rgs_y262_dct_coefficients_lookup_table_zero[ i_rl_idx ].i_level == i_level && + rgs_y262_dct_coefficients_lookup_table_zero[ i_rl_idx ].i_run == i_run ) + { + return i_rl_idx; + } + } + return -1; +} + +void y262_write_intra_block_mpeg2( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_plane_idx, int32_t i_blk_idx ) +{ + int32_t i_dct_differential, i_dct_co, i_sign, i_dc_size, i_delta, i_idx; + y262_bitstream_t *ps_bitstream; + y262_macroblock_t *ps_mb; + + ps_bitstream = &ps_y262->s_bitstream; + ps_mb = &ps_slice->s_macroblock; + + i_dct_co = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ 0 ]; + i_delta = i_dct_co - ps_slice->rgi_dc_dct_pred[ i_plane_idx ]; + ps_slice->rgi_dc_dct_pred[ i_plane_idx ] = i_dct_co; + + i_sign = i_delta < 0; + i_delta = i_delta < 0 ? ( -i_delta ) : i_delta; + + i_dc_size = 0; + while( ( i_delta ) >= ( 1 << i_dc_size ) ) + { + i_dc_size++; + } + i_dct_differential = i_delta; + if( i_sign ) + { + i_dct_differential = ( -i_dct_differential - 1 ) + ( 1 << i_dc_size ); + } + + if( i_plane_idx == 0 ) + { + for( i_idx = 0; rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgi_y262_dct_dc_size_luminance_lookup_table[ i_idx ] == i_dc_size ) + { + break; + } + } + if( rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_code == VLC_SENTINEL ) + { + assert( FALSE ); + } + y262_bitstream_write( ps_bitstream, rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_code, rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_length ); + if( i_dc_size > 0 ) + { + y262_bitstream_write( ps_bitstream, i_dct_differential, i_dc_size ); + } + } + else + { + for( i_idx = 0; rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgi_y262_dct_dc_size_chrominance_lookup_table[ i_idx ] == i_dc_size ) + { + break; + } + } + if( rgs_y262_dct_dc_size_chrominance_table[ i_idx ].i_code == VLC_SENTINEL ) + { + assert( FALSE ); + } + y262_bitstream_write( ps_bitstream, rgs_y262_dct_dc_size_chrominance_table[ i_idx ].i_code, rgs_y262_dct_dc_size_chrominance_table[ i_idx ].i_length ); + if( i_dc_size > 0 ) + { + y262_bitstream_write( ps_bitstream, i_dct_differential, i_dc_size ); + } + } + + if( ps_y262->b_intra_vlc_format ) + { + assert( FALSE ); + } + else + { + int32_t i_run, i_level, i_level_sign, i_sign, i_rl_idx, i_escape, i_eob; + + i_escape = y262_write_intra_block_get_vlc0_idx( ps_y262, RUN_LEVEL_ESCAPE, RUN_LEVEL_ESCAPE ); + i_eob = y262_write_intra_block_get_vlc0_idx( ps_y262, RUN_LEVEL_END_OF_BLOCK, RUN_LEVEL_END_OF_BLOCK ); + + assert( i_escape >= 0 ); + assert( i_eob >= 0 ); + i_run = 0; + for( i_idx = 1; i_idx < 64; i_idx++ ) + { + i_level = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ rgui8_y262_scan_0_table[ i_idx ] ]; + if( i_level != 0 ) + { + i_level_sign = i_level; + + if( i_level < 0 ) + { + i_sign = 1; + i_level = -i_level; + } + else + { + i_sign = 0; + } + + i_rl_idx = y262_write_intra_block_get_vlc0_idx( ps_y262, i_run, i_level ); + + if( i_rl_idx < 0 ) + { + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_escape ].i_code, rgs_y262_dct_coefficients_table_zero[ i_escape ].i_length ); + y262_bitstream_write( ps_bitstream, i_run, 6 ); + + y262_bitstream_write( ps_bitstream, i_level_sign & 0xfff, 12 ); + } + else + { + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_rl_idx ].i_code, rgs_y262_dct_coefficients_table_zero[ i_rl_idx ].i_length ); + y262_bitstream_write( ps_bitstream, i_sign, 1 ); + } + + i_run = 0; + } + else + { + i_run++; + } + } + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_eob ].i_code, rgs_y262_dct_coefficients_table_zero[ i_eob ].i_length ); + } +} + + +void y262_write_intra_block_mpeg1( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_plane_idx, int32_t i_blk_idx ) +{ + int32_t i_dct_differential, i_dct_co, i_sign, i_dc_size, i_delta, i_idx; + y262_bitstream_t *ps_bitstream; + y262_macroblock_t *ps_mb; + + ps_bitstream = &ps_y262->s_bitstream; + ps_mb = &ps_slice->s_macroblock; + + i_dct_co = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ 0 ]; + i_delta = i_dct_co - ps_slice->rgi_dc_dct_pred[ i_plane_idx ]; + ps_slice->rgi_dc_dct_pred[ i_plane_idx ] = i_dct_co; + + i_sign = i_delta < 0; + i_delta = i_delta < 0 ? ( -i_delta ) : i_delta; + + i_dc_size = 0; + while( ( i_delta ) >= ( 1 << i_dc_size ) ) + { + i_dc_size++; + } + i_dct_differential = i_delta; + if( i_sign ) + { + i_dct_differential = ( -i_dct_differential - 1 ) + ( 1 << i_dc_size ); + } + + if( i_plane_idx == 0 ) + { + for( i_idx = 0; rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgi_y262_dct_dc_size_luminance_lookup_table[ i_idx ] == i_dc_size ) + { + break; + } + } + if( rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_code == VLC_SENTINEL ) + { + assert( FALSE ); + } + y262_bitstream_write( ps_bitstream, rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_code, rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_length ); + if( i_dc_size > 0 ) + { + y262_bitstream_write( ps_bitstream, i_dct_differential, i_dc_size ); + } + } + else + { + for( i_idx = 0; rgs_y262_dct_dc_size_luminance_table[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgi_y262_dct_dc_size_chrominance_lookup_table[ i_idx ] == i_dc_size ) + { + break; + } + } + if( rgs_y262_dct_dc_size_chrominance_table[ i_idx ].i_code == VLC_SENTINEL ) + { + assert( FALSE ); + } + y262_bitstream_write( ps_bitstream, rgs_y262_dct_dc_size_chrominance_table[ i_idx ].i_code, rgs_y262_dct_dc_size_chrominance_table[ i_idx ].i_length ); + if( i_dc_size > 0 ) + { + y262_bitstream_write( ps_bitstream, i_dct_differential, i_dc_size ); + } + } + + if( ps_y262->b_intra_vlc_format ) + { + assert( FALSE ); + } + else + { + int32_t i_run, i_level, i_level_sign, i_sign, i_rl_idx, i_escape, i_eob; + + i_escape = y262_write_intra_block_get_vlc0_idx( ps_y262, RUN_LEVEL_ESCAPE, RUN_LEVEL_ESCAPE ); + i_eob = y262_write_intra_block_get_vlc0_idx( ps_y262, RUN_LEVEL_END_OF_BLOCK, RUN_LEVEL_END_OF_BLOCK ); + + assert( i_escape >= 0 ); + assert( i_eob >= 0 ); + i_run = 0; + for( i_idx = 1; i_idx < 64; i_idx++ ) + { + i_level = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ rgui8_y262_scan_0_table[ i_idx ] ]; + if( i_level != 0 ) + { + i_level_sign = i_level; + + if( i_level < 0 ) + { + i_sign = 1; + i_level = -i_level; + } + else + { + i_sign = 0; + } + + i_rl_idx = y262_write_intra_block_get_vlc0_idx( ps_y262, i_run, i_level ); + + if( i_rl_idx < 0 ) + { + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_escape ].i_code, rgs_y262_dct_coefficients_table_zero[ i_escape ].i_length ); + y262_bitstream_write( ps_bitstream, i_run, 6 ); + + if( i_level_sign < -127 ) + { + y262_bitstream_write( ps_bitstream, ( -127 & 0xff ), 8 ); + y262_bitstream_write( ps_bitstream, i_level_sign & 0xff, 8 ); + } + else if( i_level_sign < 128 ) + { + y262_bitstream_write( ps_bitstream, i_level_sign & 0xff, 8 ); + } + else + { + y262_bitstream_write( ps_bitstream, 0, 8 ); + y262_bitstream_write( ps_bitstream, i_level_sign & 0xff, 8 ); + } + } + else + { + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_rl_idx ].i_code, rgs_y262_dct_coefficients_table_zero[ i_rl_idx ].i_length ); + y262_bitstream_write( ps_bitstream, i_sign, 1 ); + } + + i_run = 0; + } + else + { + i_run++; + } + } + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_eob ].i_code, rgs_y262_dct_coefficients_table_zero[ i_eob ].i_length ); + } +} + +void y262_write_intra_block( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_plane_idx, int32_t i_blk_idx ) +{ + if( !ps_y262->b_sequence_mpeg1 ) + { + y262_write_intra_block_mpeg2( ps_y262, ps_slice, i_plane_idx, i_blk_idx ); + } + else + { + y262_write_intra_block_mpeg1( ps_y262, ps_slice, i_plane_idx, i_blk_idx ); + } +} + + +void y262_write_inter_block_mpeg2( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_plane_idx, int32_t i_blk_idx ) +{ + int32_t i_sign, i_idx; + int32_t i_run, i_level, i_level_sign, i_rl_idx, i_escape, i_eob; + y262_bitstream_t *ps_bitstream; + y262_macroblock_t *ps_mb; + + ps_bitstream = &ps_y262->s_bitstream; + ps_mb = &ps_slice->s_macroblock; + + i_escape = y262_write_intra_block_get_vlc0_idx( ps_y262, RUN_LEVEL_ESCAPE, RUN_LEVEL_ESCAPE ); + i_eob = y262_write_intra_block_get_vlc0_idx( ps_y262, RUN_LEVEL_END_OF_BLOCK, RUN_LEVEL_END_OF_BLOCK ); + + assert( i_escape >= 0 ); + assert( i_eob >= 0 ); + i_run = 0; + for( i_idx = 0; i_idx < 64; i_idx++ ) + { + i_level = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ rgui8_y262_scan_0_table[ i_idx ] ]; + if( i_level != 0 ) + { + i_level_sign = i_level; + + if( i_level < 0 ) + { + i_sign = 1; + i_level = -i_level; + } + else + { + i_sign = 0; + } + + i_rl_idx = y262_write_intra_block_get_vlc0_idx( ps_y262, i_run, i_level ); + + if( i_rl_idx < 0 ) + { + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_escape ].i_code, rgs_y262_dct_coefficients_table_zero[ i_escape ].i_length ); + y262_bitstream_write( ps_bitstream, i_run, 6 ); + + y262_bitstream_write( ps_bitstream, i_level_sign & 0xfff, 12 ); + } + else + { + if( i_idx == 0 && i_level == 1 ) + { + y262_bitstream_write( ps_bitstream, 1, 1 ); /* special case */ + } + else + { + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_rl_idx ].i_code, rgs_y262_dct_coefficients_table_zero[ i_rl_idx ].i_length ); + } + + y262_bitstream_write( ps_bitstream, i_sign, 1 ); + } + + i_run = 0; + } + else + { + i_run++; + } + } + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_eob ].i_code, rgs_y262_dct_coefficients_table_zero[ i_eob ].i_length ); +} + +void y262_write_inter_block_mpeg1( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_plane_idx, int32_t i_blk_idx ) +{ + int32_t i_sign, i_idx; + int32_t i_run, i_level, i_level_sign, i_rl_idx, i_escape, i_eob; + y262_bitstream_t *ps_bitstream; + y262_macroblock_t *ps_mb; + + ps_bitstream = &ps_y262->s_bitstream; + ps_mb = &ps_slice->s_macroblock; + + i_escape = y262_write_intra_block_get_vlc0_idx( ps_y262, RUN_LEVEL_ESCAPE, RUN_LEVEL_ESCAPE ); + i_eob = y262_write_intra_block_get_vlc0_idx( ps_y262, RUN_LEVEL_END_OF_BLOCK, RUN_LEVEL_END_OF_BLOCK ); + + assert( i_escape >= 0 ); + assert( i_eob >= 0 ); + i_run = 0; + for( i_idx = 0; i_idx < 64; i_idx++ ) + { + i_level = ps_mb->rgi16_coeffs[ i_plane_idx ][ i_blk_idx ][ rgui8_y262_scan_0_table[ i_idx ] ]; + if( i_level != 0 ) + { + i_level_sign = i_level; + + if( i_level < 0 ) + { + i_sign = 1; + i_level = -i_level; + } + else + { + i_sign = 0; + } + + i_rl_idx = y262_write_intra_block_get_vlc0_idx( ps_y262, i_run, i_level ); + + if( i_rl_idx < 0 ) + { + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_escape ].i_code, rgs_y262_dct_coefficients_table_zero[ i_escape ].i_length ); + y262_bitstream_write( ps_bitstream, i_run, 6 ); + + if( i_level_sign < -127 ) + { + y262_bitstream_write( ps_bitstream, ( -127 & 0xff ), 8 ); + y262_bitstream_write( ps_bitstream, i_level_sign & 0xff, 8 ); + } + else if( i_level_sign < 128 ) + { + y262_bitstream_write( ps_bitstream, i_level_sign & 0xff, 8 ); + } + else + { + y262_bitstream_write( ps_bitstream, 0, 8 ); + y262_bitstream_write( ps_bitstream, i_level_sign & 0xff, 8 ); + } + } + else + { + if( i_idx == 0 && i_level == 1 ) + { + y262_bitstream_write( ps_bitstream, 1, 1 ); /* special case */ + } + else + { + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_rl_idx ].i_code, rgs_y262_dct_coefficients_table_zero[ i_rl_idx ].i_length ); + } + + y262_bitstream_write( ps_bitstream, i_sign, 1 ); + } + + i_run = 0; + } + else + { + i_run++; + } + } + y262_bitstream_write( ps_bitstream, rgs_y262_dct_coefficients_table_zero[ i_eob ].i_code, rgs_y262_dct_coefficients_table_zero[ i_eob ].i_length ); +} + + +void y262_write_inter_block( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_plane_idx, int32_t i_blk_idx ) +{ + if( !ps_y262->b_sequence_mpeg1 ) + { + y262_write_inter_block_mpeg2( ps_y262, ps_slice, i_plane_idx, i_blk_idx ); + } + else + { + y262_write_inter_block_mpeg1( ps_y262, ps_slice, i_plane_idx, i_blk_idx ); + } +} + + + +void y262_write_motion_vector_delta( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_fcode, int32_t i_mv ) +{ + int32_t i_idx, i_mv_delta, i_residual, i_sign, i_fcode_minus_one; + y262_bitstream_t *ps_bitstream; + y262_macroblock_t *ps_mb; + + ps_bitstream = &ps_y262->s_bitstream; + ps_mb = &ps_slice->s_macroblock; + + i_fcode_minus_one = i_fcode - 1; + + if( i_mv == 0 ) + { + i_mv_delta = 0; + } + else + { + if( i_mv < -( 1 << ( 4 + i_fcode_minus_one ) ) ) + { + i_mv += ( 1 << ( 5 + i_fcode_minus_one ) ); + assert( i_mv >= -( 1 << ( 4 + i_fcode_minus_one ) ) ); + } + else if( i_mv >= ( 1 << ( 4 + i_fcode_minus_one ) ) ) + { + i_mv -= ( 1 << ( 5 + i_fcode_minus_one ) ); + assert( i_mv < ( 1 << ( 4 + i_fcode_minus_one ) ) ); + } + + if( i_mv < 0 ) + { + i_sign = 1; + i_mv = -i_mv; + } + else + { + i_sign = 0; + } + i_residual = ( i_mv - 1 ) & ( ( 1 << i_fcode_minus_one ) - 1 ); + i_mv_delta = 1 + ( ( i_mv - 1 ) >> i_fcode_minus_one ); + if( i_sign ) + { + i_mv_delta = -i_mv_delta; + } + } + for( i_idx = 0; rgs_y262_motion_code_table[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgi_y262_motion_delta_lookup_table[ i_idx ] == i_mv_delta ) + { + break; + } + } + if( rgs_y262_motion_code_table[ i_idx ].i_code == VLC_SENTINEL ) + { + assert( FALSE ); + } + y262_bitstream_write( ps_bitstream, rgs_y262_motion_code_table[ i_idx ].i_code, rgs_y262_motion_code_table[ i_idx ].i_length ); + if( i_mv_delta != 0 ) + { + y262_bitstream_write( ps_bitstream, i_residual, i_fcode_minus_one ); + } +} + + +int32_t y262_write_macroblock( y262_t *ps_y262, y262_slice_t *ps_slice ) +{ + int32_t i_idx, i_bits_start, i_num_mv; + int32_t i_motion_type; + uint32_t ui_mb_type; + y262_bitstream_t *ps_bitstream; + y262_macroblock_t *ps_mb; + + ps_bitstream = &ps_y262->s_bitstream; + ps_mb = &ps_slice->s_macroblock; + + i_bits_start = ( int32_t )( ( ( ps_bitstream->pui8_codeword_ptr - ps_bitstream->pui8_bitstream ) * 8 ) + ps_bitstream->i_codeword_fill ); + + while( ps_slice->i_skip_run >= 0 ) + { + int32_t i_mb_inc; + + if( ps_slice->i_skip_run > 33 ) + { + i_mb_inc = 33; + } + else + { + i_mb_inc = ps_slice->i_skip_run; + } + y262_bitstream_write( ps_bitstream, rgs_y262_macroblock_address_increment_table[ i_mb_inc ].i_code, rgs_y262_macroblock_address_increment_table[ i_mb_inc ].i_length ); + + ps_slice->i_skip_run -= i_mb_inc; + if( i_mb_inc < 33 ) + { + break; + } + } + + ui_mb_type = ps_mb->i_macroblock_type & 0x1f; + i_motion_type = ps_mb->i_macroblock_type / MACROBLOCK_MOTION_TYPE; + if( ps_mb->i_quantizer != ps_slice->i_quantizer && ps_mb->i_cbp ) + { + ui_mb_type |= MACROBLOCK_QUANT; + } + if( !( ui_mb_type & MACROBLOCK_INTRA ) && ps_mb->i_cbp ) + { + ui_mb_type |= MACROBLOCK_PATTERN; + } + if( ps_slice->i_picture_type == PICTURE_CODING_TYPE_I ) + { + for( i_idx = 0; rgs_y262_macroblock_type_i_picture_table[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgui_y262_macroblock_type_i_picture_flags_table[ i_idx ] == ui_mb_type ) + { + break; + } + } + if( rgs_y262_macroblock_type_i_picture_table[ i_idx ].i_code == VLC_SENTINEL ) + { + assert( FALSE ); + } + y262_bitstream_write( ps_bitstream, rgs_y262_macroblock_type_i_picture_table[ i_idx ].i_code, rgs_y262_macroblock_type_i_picture_table[ i_idx ].i_length ); + if( !ps_y262->b_frame_pred_frame_dct && 1 /* PICTURE_CODING_STRUCTURE_FRAME */ ) + { + y262_bitstream_write( ps_bitstream, ( ps_mb->i_macroblock_type & MACROBLOCK_INTERLACED ) ? 1 : 0, 1 ); + } + } + else if( ps_slice->i_picture_type == PICTURE_CODING_TYPE_P ) + { + for( i_idx = 0; rgs_y262_macroblock_type_p_picture_table[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgui_y262_macroblock_type_p_picture_flags_table[ i_idx ] == ui_mb_type ) + { + break; + } + } + if( rgs_y262_macroblock_type_p_picture_table[ i_idx ].i_code == VLC_SENTINEL ) + { + assert( FALSE ); + } + y262_bitstream_write( ps_bitstream, rgs_y262_macroblock_type_p_picture_table[ i_idx ].i_code, rgs_y262_macroblock_type_p_picture_table[ i_idx ].i_length ); + if( !ps_y262->b_frame_pred_frame_dct && 1 /* PICTURE_CODING_STRUCTURE_FRAME */ ) + { + if( ui_mb_type & MACROBLOCK_MOTION_FORWARD ) + { + y262_bitstream_write( ps_bitstream, i_motion_type, 2 ); + } + if( ui_mb_type & ( MACROBLOCK_INTRA | MACROBLOCK_PATTERN ) ) + { + y262_bitstream_write( ps_bitstream, ( ps_mb->i_macroblock_type & MACROBLOCK_INTERLACED ) ? 1 : 0, 1 ); + } + i_num_mv = i_motion_type == FRAME_MOTION_TYPE_FRAME ? 1 : 2; + } + else + { + i_num_mv = 1; + } + } + else + { + assert( ps_slice->i_picture_type == PICTURE_CODING_TYPE_B ); + + for( i_idx = 0; rgs_y262_macroblock_type_b_picture_table[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgui_y262_macroblock_type_b_picture_flags_table[ i_idx ] == ui_mb_type ) + { + break; + } + } + if( rgs_y262_macroblock_type_b_picture_table[ i_idx ].i_code == VLC_SENTINEL ) + { + assert( FALSE ); + } + y262_bitstream_write( ps_bitstream, rgs_y262_macroblock_type_b_picture_table[ i_idx ].i_code, rgs_y262_macroblock_type_b_picture_table[ i_idx ].i_length ); + if( !ps_y262->b_frame_pred_frame_dct && 1 /* PICTURE_CODING_STRUCTURE_FRAME */ ) + { + if( !( ui_mb_type & MACROBLOCK_INTRA ) ) + { + y262_bitstream_write( ps_bitstream, i_motion_type, 2 ); + } + if( ui_mb_type & ( MACROBLOCK_INTRA | MACROBLOCK_PATTERN ) ) + { + y262_bitstream_write( ps_bitstream, ( ps_mb->i_macroblock_type & MACROBLOCK_INTERLACED ) ? 1 : 0, 1 ); + } + i_num_mv = i_motion_type == FRAME_MOTION_TYPE_FRAME ? 1 : 2; + } + else + { + i_num_mv = 1; + } + } + + if( ui_mb_type & MACROBLOCK_QUANT ) + { + y262_bitstream_write( ps_bitstream, ps_mb->i_quantizer, 5 ); + ps_slice->i_quantizer = ps_mb->i_quantizer; + } + + if( ui_mb_type & MACROBLOCK_MOTION_FORWARD ) + { + if( i_num_mv == 1 ) + { + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 0 ], ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_x - ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_FORWARD ][ 0 ] ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 1 ], ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_y - ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_FORWARD ][ 1 ] ); + ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_FORWARD ][ 0 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_x; + ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_FORWARD ][ 1 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_y; + ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_FORWARD ][ 0 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_x; + ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_FORWARD ][ 1 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_y; + } + else + { + y262_bitstream_write( ps_bitstream, ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_field, 1 ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 0 ], ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_x - ( ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_FORWARD ][ 0 ] ) ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 1 ], ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_y - ( ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_FORWARD ][ 1 ] >> 1 ) ); + ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_FORWARD ][ 0 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_x; + ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_FORWARD ][ 1 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_FORWARD ].i_y << 1; + y262_bitstream_write( ps_bitstream, ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_FORWARD ].i_field, 1 ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 0 ], ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_FORWARD ].i_x - ( ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_FORWARD ][ 0 ] ) ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_FORWARD ][ 1 ], ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_FORWARD ].i_y - ( ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_FORWARD ][ 1 ] >> 1 ) ); + ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_FORWARD ][ 0 ] = ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_FORWARD ].i_x; + ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_FORWARD ][ 1 ] = ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_FORWARD ].i_y << 1; + } + } + if( ui_mb_type & MACROBLOCK_MOTION_BACKWARD ) + { + if( i_num_mv == 1 ) + { + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 0 ], ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_x - ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_BACKWARD ][ 0 ] ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 1 ], ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_y - ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_BACKWARD ][ 1 ] ); + ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_BACKWARD ][ 0 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_x; + ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_BACKWARD ][ 1 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_y; + ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_BACKWARD ][ 0 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_x; + ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_BACKWARD ][ 1 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_y; + } + else + { + y262_bitstream_write( ps_bitstream, ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_field, 1 ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 0 ], ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_x - ( ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_BACKWARD ][ 0 ] ) ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 1 ], ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_y - ( ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_BACKWARD ][ 1 ] >> 1 ) ); + ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_BACKWARD ][ 0 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_x; + ps_slice->rgi_pmv[ 0 ][ PICTURE_CODING_BACKWARD ][ 1 ] = ps_mb->rgs_motion[ 0 ][ PICTURE_CODING_BACKWARD ].i_y << 1; + y262_bitstream_write( ps_bitstream, ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_BACKWARD ].i_field, 1 ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 0 ], ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_BACKWARD ].i_x - ( ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_BACKWARD ][ 0 ] ) ); + y262_write_motion_vector_delta( ps_y262, ps_slice, ps_y262->rgi_fcode[ PICTURE_CODING_BACKWARD ][ 1 ], ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_BACKWARD ].i_y - ( ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_BACKWARD ][ 1 ] >> 1 ) ); + ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_BACKWARD ][ 0 ] = ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_BACKWARD ].i_x; + ps_slice->rgi_pmv[ 1 ][ PICTURE_CODING_BACKWARD ][ 1 ] = ps_mb->rgs_motion[ 1 ][ PICTURE_CODING_BACKWARD ].i_y << 1; + } + } + + if( ui_mb_type & MACROBLOCK_INTRA ) + { + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + y262_write_intra_block( ps_y262, ps_slice, 0, i_idx ); + } + switch( ps_y262->i_sequence_chroma_format ) + { + case Y262_CHROMA_FORMAT_420: + y262_write_intra_block( ps_y262, ps_slice, 1, 0 ); + y262_write_intra_block( ps_y262, ps_slice, 2, 0 ); + break; + case Y262_CHROMA_FORMAT_422: + y262_write_intra_block( ps_y262, ps_slice, 1, 0 ); + y262_write_intra_block( ps_y262, ps_slice, 2, 0 ); + y262_write_intra_block( ps_y262, ps_slice, 1, 1 ); + y262_write_intra_block( ps_y262, ps_slice, 2, 1 ); + break; + case Y262_CHROMA_FORMAT_444: + y262_write_intra_block( ps_y262, ps_slice, 1, 0 ); + y262_write_intra_block( ps_y262, ps_slice, 2, 0 ); + y262_write_intra_block( ps_y262, ps_slice, 1, 2 ); + y262_write_intra_block( ps_y262, ps_slice, 2, 2 ); + y262_write_intra_block( ps_y262, ps_slice, 1, 1 ); + y262_write_intra_block( ps_y262, ps_slice, 2, 1 ); + y262_write_intra_block( ps_y262, ps_slice, 1, 3 ); + y262_write_intra_block( ps_y262, ps_slice, 2, 3 ); + break; + } + } + else if( ui_mb_type & MACROBLOCK_PATTERN ) + { + uint32_t ui_pattern, ui_chroma_extra, ui_chroma_extra_len; + + ui_pattern = ps_mb->rgb_cbp[ 0 ][ 0 ] << 5; + ui_pattern |= ps_mb->rgb_cbp[ 0 ][ 1 ] << 4; + ui_pattern |= ps_mb->rgb_cbp[ 0 ][ 2 ] << 3; + ui_pattern |= ps_mb->rgb_cbp[ 0 ][ 3 ] << 2; + switch( ps_y262->i_sequence_chroma_format ) + { + case Y262_CHROMA_FORMAT_420: + ui_pattern |= ps_mb->rgb_cbp[ 1 ][ 0 ] << 1; + ui_pattern |= ps_mb->rgb_cbp[ 2 ][ 0 ] << 0; + ui_chroma_extra = ui_chroma_extra_len = 0; + break; + case Y262_CHROMA_FORMAT_422: + ui_pattern |= ps_mb->rgb_cbp[ 1 ][ 0 ] << 1; + ui_pattern |= ps_mb->rgb_cbp[ 2 ][ 0 ] << 0; + ui_chroma_extra = 0; + ui_chroma_extra_len = 2; + ui_chroma_extra |= ps_mb->rgb_cbp[ 1 ][ 1 ] << 1; + ui_chroma_extra |= ps_mb->rgb_cbp[ 2 ][ 1 ] << 0; + break; + case Y262_CHROMA_FORMAT_444: + ui_pattern |= ps_mb->rgb_cbp[ 1 ][ 0 ] << 1; + ui_pattern |= ps_mb->rgb_cbp[ 2 ][ 0 ] << 0; + ui_chroma_extra = 0; + ui_chroma_extra_len = 6; + ui_chroma_extra |= ps_mb->rgb_cbp[ 1 ][ 2 ] << 5; + ui_chroma_extra |= ps_mb->rgb_cbp[ 2 ][ 2 ] << 4; + ui_chroma_extra |= ps_mb->rgb_cbp[ 1 ][ 1 ] << 3; + ui_chroma_extra |= ps_mb->rgb_cbp[ 2 ][ 1 ] << 2; + ui_chroma_extra |= ps_mb->rgb_cbp[ 1 ][ 3 ] << 1; + ui_chroma_extra |= ps_mb->rgb_cbp[ 2 ][ 3 ] << 0; + break; + } + for( i_idx = 0; rgs_y262_coded_block_pattern_table[ i_idx ].i_code != VLC_SENTINEL; i_idx++ ) + { + if( rgi_y262_coded_block_pattern_lookup_table[ i_idx ] == ui_pattern ) + { + break; + } + } + if( rgs_y262_coded_block_pattern_table[ i_idx ].i_code == VLC_SENTINEL ) + { + assert( FALSE ); + } + y262_bitstream_write( ps_bitstream, rgs_y262_coded_block_pattern_table[ i_idx ].i_code, rgs_y262_coded_block_pattern_table[ i_idx ].i_length ); + if( ui_chroma_extra_len > 0 ) + { + y262_bitstream_write( ps_bitstream, ui_chroma_extra, ui_chroma_extra_len ); + } + + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + if( ps_mb->rgb_cbp[ 0 ][ i_idx ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 0, i_idx ); + } + } + switch( ps_y262->i_sequence_chroma_format ) + { + case Y262_CHROMA_FORMAT_420: + if( ps_mb->rgb_cbp[ 1 ][ 0 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 1, 0 ); + } + if( ps_mb->rgb_cbp[ 2 ][ 0 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 2, 0 ); + } + break; + case Y262_CHROMA_FORMAT_422: + if( ps_mb->rgb_cbp[ 1 ][ 0 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 1, 0 ); + } + if( ps_mb->rgb_cbp[ 2 ][ 0 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 2, 0 ); + } + if( ps_mb->rgb_cbp[ 1 ][ 1 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 1, 1 ); + } + if( ps_mb->rgb_cbp[ 2 ][ 1 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 2, 1 ); + } + break; + case Y262_CHROMA_FORMAT_444: + if( ps_mb->rgb_cbp[ 1 ][ 0 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 1, 0 ); + } + if( ps_mb->rgb_cbp[ 2 ][ 0 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 2, 0 ); + } + if( ps_mb->rgb_cbp[ 1 ][ 2 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 1, 2 ); + } + if( ps_mb->rgb_cbp[ 2 ][ 2 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 2, 2 ); + } + if( ps_mb->rgb_cbp[ 1 ][ 1 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 1, 1 ); + } + if( ps_mb->rgb_cbp[ 2 ][ 1 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 2, 1 ); + } + if( ps_mb->rgb_cbp[ 1 ][ 3 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 1, 3 ); + } + if( ps_mb->rgb_cbp[ 2 ][ 3 ] ) + { + y262_write_inter_block( ps_y262, ps_slice, 2, 3 ); + } + break; + } + } + + if( ui_mb_type & MACROBLOCK_INTRA || !( ui_mb_type & ( MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD ) ) ) + { + y262_slice_reset_predictors_inter( ps_y262, ps_slice ); + } + if( !( ui_mb_type & MACROBLOCK_INTRA ) ) + { + y262_slice_reset_predictors_intra( ps_y262, ps_slice ); + } + ps_slice->i_last_mb_motion_flags = ( ui_mb_type & ( MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD ) ); + + + return ( int32_t )( ( ( ps_bitstream->pui8_codeword_ptr - ps_bitstream->pui8_bitstream ) * 8 ) + ps_bitstream->i_codeword_fill - i_bits_start ); +} + +int32_t y262_get_inter_block_bits( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_plane_idx, int32_t i_blk_idx ) +{ + int32_t i_bits_start, i_bits; + y262_bitstream_t s_saved_bitstream, *ps_bitstream; + y262_macroblock_t *ps_mb; + + ps_mb = &ps_slice->s_macroblock; + s_saved_bitstream = ps_y262->s_bitstream; + ps_bitstream = &ps_y262->s_bitstream; + + i_bits_start = ( int32_t )( ( ( ps_bitstream->pui8_codeword_ptr - ps_bitstream->pui8_bitstream ) * 8 ) + ps_bitstream->i_codeword_fill ); + + y262_write_inter_block( ps_y262, ps_slice, i_plane_idx, i_blk_idx ); + + i_bits = ( int32_t )( ( ( ( ps_bitstream->pui8_codeword_ptr - ps_bitstream->pui8_bitstream ) * 8 ) + ps_bitstream->i_codeword_fill ) - i_bits_start ); + + ps_y262->s_bitstream = s_saved_bitstream; + + return i_bits; +} + +int32_t y262_get_mbmode_cost( y262_t *ps_y262, y262_slice_t *ps_slice, int32_t i_mbmode ) +{ + int32_t i_saved_quantizer, i_bits, i_ssd, i_saved_skip_run, i_saved_mb_motion; + int32_t rgi_saved_pmv[ 8 ]; + int32_t rgi_saved_intra_dc[ 3 ]; + y262_bitstream_t s_saved_bitstream; + y262_macroblock_t *ps_mb; + + ps_mb = &ps_slice->s_macroblock; + + i_saved_quantizer = ps_slice->i_quantizer; + rgi_saved_pmv[ 0 ] = ps_slice->rgi_pmv[ 0 ][ 0 ][ 0 ]; + rgi_saved_pmv[ 1 ] = ps_slice->rgi_pmv[ 0 ][ 0 ][ 1 ]; + rgi_saved_pmv[ 2 ] = ps_slice->rgi_pmv[ 0 ][ 1 ][ 0 ]; + rgi_saved_pmv[ 3 ] = ps_slice->rgi_pmv[ 0 ][ 1 ][ 1 ]; + rgi_saved_pmv[ 4 ] = ps_slice->rgi_pmv[ 1 ][ 0 ][ 0 ]; + rgi_saved_pmv[ 5 ] = ps_slice->rgi_pmv[ 1 ][ 0 ][ 1 ]; + rgi_saved_pmv[ 6 ] = ps_slice->rgi_pmv[ 1 ][ 1 ][ 0 ]; + rgi_saved_pmv[ 7 ] = ps_slice->rgi_pmv[ 1 ][ 1 ][ 1 ]; + rgi_saved_intra_dc[ 0 ] = ps_slice->rgi_dc_dct_pred[ 0 ]; + rgi_saved_intra_dc[ 1 ] = ps_slice->rgi_dc_dct_pred[ 1 ]; + rgi_saved_intra_dc[ 2 ] = ps_slice->rgi_dc_dct_pred[ 2 ]; + s_saved_bitstream = ps_y262->s_bitstream; + i_saved_skip_run = ps_slice->i_skip_run; + i_saved_mb_motion = ps_slice->i_last_mb_motion_flags; + + if( i_mbmode == Y262_MBMODE_INTRA || i_mbmode == Y262_MBMODE_INTRA_IL ) + { + y262_encode_macroblock_intra( ps_y262, ps_slice, i_mbmode == Y262_MBMODE_INTRA_IL ); + } + else + { + y262_encode_macroblock_inter( ps_y262, ps_slice, i_mbmode ); + } + i_ssd = ps_y262->s_funcs.f_ssd_16x16( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, ps_mb->pui8_dst_luma, ps_mb->i_dst_luma_stride ); + if( ps_y262->i_psyrd_strength > 0 ) + { + int32_t i_src_cplx, i_rec_cplx, i_cost; + bool_t b_interlaced; + static const uint8_t rgui8_zero[ 16 ] = { 0, }; + + b_interlaced = i_mbmode == Y262_MBMODE_INTRA_IL || i_mbmode == Y262_MBMODE_FW_IL || i_mbmode == Y262_MBMODE_BW_IL || i_mbmode == Y262_MBMODE_BI_IL; + if( !b_interlaced ) + { + i_src_cplx = ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x16 ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, ( uint8_t *)rgui8_zero, 0 ); + i_src_cplx -= ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x16 ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride, ( uint8_t *)rgui8_zero, 0 ) >> 2; + i_rec_cplx = ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x16 ]( ps_mb->pui8_dst_luma, ps_mb->i_dst_luma_stride, ( uint8_t *)rgui8_zero, 0 ); + i_rec_cplx -= ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x16 ]( ps_mb->pui8_dst_luma, ps_mb->i_dst_luma_stride, ( uint8_t *)rgui8_zero, 0 ) >> 2; + i_cost = abs( i_src_cplx - i_rec_cplx ); + } + else + { + i_src_cplx = ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x8 ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ); + i_src_cplx -= ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x8 ]( ps_mb->pui8_src_luma, ps_mb->i_src_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ) >> 2; + i_rec_cplx = ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x8 ]( ps_mb->pui8_dst_luma, ps_mb->i_dst_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ); + i_rec_cplx -= ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x8 ]( ps_mb->pui8_dst_luma, ps_mb->i_dst_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ) >> 2; + i_cost = abs( i_src_cplx - i_rec_cplx ); + i_src_cplx = ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x8 ]( ps_mb->pui8_src_luma + ps_mb->i_src_luma_stride, ps_mb->i_src_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ); + i_src_cplx -= ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x8 ]( ps_mb->pui8_src_luma + ps_mb->i_src_luma_stride, ps_mb->i_src_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ) >> 2; + i_rec_cplx = ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x8 ]( ps_mb->pui8_dst_luma + ps_mb->i_dst_luma_stride, ps_mb->i_dst_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ); + i_rec_cplx -= ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x8 ]( ps_mb->pui8_dst_luma + ps_mb->i_dst_luma_stride, ps_mb->i_dst_luma_stride * 2, ( uint8_t *)rgui8_zero, 0 ) >> 2; + i_cost += abs( i_src_cplx - i_rec_cplx ); + } + i_cost = ( i_cost * ps_y262->i_psyrd_strength * ps_mb->i_lambda_sqr ) >> 8; + i_ssd += i_cost; + } + i_ssd += ps_y262->s_funcs.f_ssd_8x8( ps_mb->pui8_src_cb, ps_mb->i_src_chroma_stride, ps_mb->pui8_dst_cb, ps_mb->i_dst_chroma_stride ); + i_ssd += ps_y262->s_funcs.f_ssd_8x8( ps_mb->pui8_src_cr, ps_mb->i_src_chroma_stride, ps_mb->pui8_dst_cr, ps_mb->i_dst_chroma_stride ); + if( i_mbmode != Y262_MBMODE_SKIP ) + { + i_bits = y262_write_macroblock( ps_y262, ps_slice ); + } + else + { + i_bits = 1; + } + + ps_slice->i_quantizer = i_saved_quantizer; + ps_slice->rgi_pmv[ 0 ][ 0 ][ 0 ] = rgi_saved_pmv[ 0 ]; + ps_slice->rgi_pmv[ 0 ][ 0 ][ 1 ] = rgi_saved_pmv[ 1 ]; + ps_slice->rgi_pmv[ 0 ][ 1 ][ 0 ] = rgi_saved_pmv[ 2 ]; + ps_slice->rgi_pmv[ 0 ][ 1 ][ 1 ] = rgi_saved_pmv[ 3 ]; + ps_slice->rgi_pmv[ 1 ][ 0 ][ 0 ] = rgi_saved_pmv[ 4 ]; + ps_slice->rgi_pmv[ 1 ][ 0 ][ 1 ] = rgi_saved_pmv[ 5 ]; + ps_slice->rgi_pmv[ 1 ][ 1 ][ 0 ] = rgi_saved_pmv[ 6 ]; + ps_slice->rgi_pmv[ 1 ][ 1 ][ 1 ] = rgi_saved_pmv[ 7 ]; + ps_slice->rgi_dc_dct_pred[ 0 ] = rgi_saved_intra_dc[ 0 ]; + ps_slice->rgi_dc_dct_pred[ 1 ] = rgi_saved_intra_dc[ 1 ]; + ps_slice->rgi_dc_dct_pred[ 2 ] = rgi_saved_intra_dc[ 2 ]; + ps_y262->s_bitstream = s_saved_bitstream; + ps_slice->i_skip_run = i_saved_skip_run; + ps_slice->i_last_mb_motion_flags = i_saved_mb_motion; + + return i_ssd + ( ( i_bits * ( ps_mb->i_lambda ) ) >> Y262_LAMBDA_BITS ); +} + + +int32_t y262_get_quantizer_from_quantizer_f8( y262_t *ps_y262, int32_t i_quantizer_f8, int32_t *pi_lambda_quantizer_f8 ) +{ + int32_t i_scale, i_idx, i_best_delta, i_best_idx, i_linear_int_quantizer; + + if( ps_y262->b_qscale_type ) + { + i_scale = ( 128 << 8 ) / 32; + } + else + { + i_scale = ( 64 << 8 ) / 32; + } + + i_quantizer_f8 = ( ( i_quantizer_f8 * i_scale ) + 128 ) >> 8; + *pi_lambda_quantizer_f8 = i_quantizer_f8; + + i_linear_int_quantizer = ( i_quantizer_f8 + 128 ) >> 8; + + i_best_delta = 256; + i_best_idx = 1; + + for( i_idx = 1; i_idx < 32; i_idx++ ) + { + int32_t i_delta; + + i_delta = abs( i_linear_int_quantizer - rgi8_y262_quantiser_scale_table[ ps_y262->b_qscale_type ][ i_idx ] ); + if( i_best_delta > i_delta ) + { + i_best_delta = i_delta; + i_best_idx = i_idx; + } + } + + return i_best_idx; +} + +void y262_encode_slice( y262_t *ps_y262, y262_slice_t *ps_slice ) +{ + int32_t i_mb_idx, i_bits; + bool_t b_skip; + + y262_slice_reset_predictors_intra( ps_y262, ps_slice ); + y262_slice_reset_predictors_inter( ps_y262, ps_slice ); + ps_slice->i_skip_run = 0; + + for( i_mb_idx = ps_slice->i_start_mb_addr; i_mb_idx <= ps_slice->i_end_mb_addr; i_mb_idx++ ) + { + if( i_mb_idx == 71 ) + { + i_mb_idx = i_mb_idx; + } + + ps_slice->b_allow_skip = i_mb_idx != ps_slice->i_start_mb_addr && i_mb_idx != ps_slice->i_end_mb_addr; + ps_slice->b_allow_skip = ps_slice->b_allow_skip && ( ( ps_slice->i_last_mb_motion_flags & ( MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD ) ) || ( ps_slice->i_picture_type == PICTURE_CODING_TYPE_P ) ); + + y262_init_macroblock( ps_y262, ps_slice, i_mb_idx ); + + + if( ps_slice->s_macroblock.i_mb_x == 160 && ps_slice->s_macroblock.i_mb_y == 48 ) + { + ps_slice = ps_slice; + } + /* dumped quantizer here */ + { + float f_lambda; + int32_t i_quantizer_f8, i_lambda_quantizer_f8; + y262_macroblock_t *ps_mb; + + ps_mb = &ps_slice->s_macroblock; + + i_quantizer_f8 = y262_ratectrl_get_slice_mb_quantizer( ps_y262, &ps_y262->s_slice_encoder_ratectrl, i_mb_idx ); + + /* mbtree */ + i_quantizer_f8 = ( ( i_quantizer_f8 ) * ( ( ps_y262->ps_input_picture->ps_lookahead[ i_mb_idx ].i_quantizer_scale ) ) ) / ( 1 << 12 ); + + /* variance aq */ + if( ps_y262->b_variance_aq ) + { + i_quantizer_f8 = ( i_quantizer_f8 * ps_y262->ps_input_picture->ps_lookahead[ i_mb_idx ].i_quantizer_aq_scale ) / ( 1 << 12 ); + } + + ps_mb->i_quantizer = y262_get_quantizer_from_quantizer_f8( ps_y262, i_quantizer_f8, &i_lambda_quantizer_f8 ); + ps_mb->i_quantizer = MIN( 31, MAX( 1, ps_mb->i_quantizer ) ); + i_lambda_quantizer_f8 = MIN( 128 << 8, MAX( 1 << 8, i_lambda_quantizer_f8 ) ); + ps_mb->i_scaled_quantizer = rgi8_y262_quantiser_scale_table[ ps_y262->b_qscale_type ][ ps_mb->i_quantizer ]; + f_lambda = ( ( ps_mb->i_scaled_quantizer * ps_mb->i_scaled_quantizer ) * ( 0.4f * 0.4f ) ); + /*ps_mb->i_lambda = ( int32_t ) ( ( ( ( i_lambda_quantizer_f8 * i_lambda_quantizer_f8 ) + ( 1 << 15 ) ) >> 16 ) * ( 0.4 * 0.4 ) );*/ + ps_mb->i_lambda = ( int32_t ) ( floorf( MAX( 1.0f, f_lambda + 0.5f ) * ( 1 << Y262_LAMBDA_BITS ) ) ); + ps_mb->i_lambda_sqr = ( int32_t ) floorf( MAX( 1.0f, sqrtf( f_lambda ) + 0.5f ) ); + + } + + y262_encode_macroblock( ps_y262, ps_slice, i_mb_idx, &b_skip ); + + + if( !b_skip ) + { + i_bits = y262_write_macroblock( ps_y262, ps_slice ); + } + else + { + assert( ps_slice->b_allow_skip ); + + if( ps_slice->i_picture_type == PICTURE_CODING_TYPE_P ) + { + y262_slice_reset_predictors_inter( ps_y262, ps_slice ); + } + y262_slice_reset_predictors_intra( ps_y262, ps_slice ); + + ps_slice->i_skip_run++; + + i_bits = 1; + } + y262_ratectrl_update_slice_mb( ps_y262, &ps_y262->s_slice_encoder_ratectrl, i_mb_idx, i_bits ); + } +} + +void y262_write_slice( y262_t *ps_y262, int32_t i_picture_type, int32_t i_slice_row, y262_slice_t *ps_slice ) +{ + y262_bitstream_t *ps_bitstream; + + ps_bitstream = &ps_y262->s_bitstream; + + y262_bitstream_write( ps_bitstream, 1, 24 ); + + ps_slice->i_quantizer_f8 = ps_y262->s_ratectrl.i_quantizer; + ps_slice->s_slice_header.i_quantizer_scale_code = MAX( 1, MIN( 31, ( ps_slice->i_quantizer_f8 >> 8 ) ) ); + ps_slice->s_slice_header.b_intra_slice_flag = FALSE; + ps_slice->s_slice_header.b_intra_slice = ps_slice->s_slice_header.b_intra_slice_flag; + ps_slice->i_picture_type = i_picture_type; + ps_slice->i_mb_addr = i_slice_row * ( ps_y262->i_sequence_width >> 4 ); + ps_slice->i_start_mb_addr = ps_slice->i_mb_addr; + ps_slice->i_end_mb_addr = ps_slice->i_start_mb_addr + ( ps_y262->i_sequence_width >> 4 ) - 1; + ps_slice->i_quantizer = ps_slice->s_slice_header.i_quantizer_scale_code; + + if( ps_y262->i_sequence_height > 2800 ) + { + int32_t i_slice_vertical_msb; + + i_slice_vertical_msb = ( i_slice_row >> 7 ) & 3; + + y262_bitstream_write( ps_bitstream, STARTCODE_SLICE_START + i_slice_row & 0x7f, 8 ); + y262_bitstream_write( ps_bitstream, i_slice_vertical_msb, 3 ); + } + else + { + y262_bitstream_write( ps_bitstream, STARTCODE_SLICE_START + i_slice_row, 8 ); + } + + y262_bitstream_write( ps_bitstream, ps_slice->s_slice_header.i_quantizer_scale_code, 5 ); + y262_bitstream_write( ps_bitstream, ps_slice->s_slice_header.b_intra_slice_flag, 1 ); + + if( ps_slice->s_slice_header.b_intra_slice_flag ) + { + y262_bitstream_write( ps_bitstream, ps_slice->s_slice_header.b_intra_slice, 1 ); + y262_bitstream_write( ps_bitstream, 0x7f, 7 ); + y262_bitstream_write( ps_bitstream, 0, 1 ); /* no more extra slice data */ + } + + y262_encode_slice( ps_y262, ps_slice ); +} + + +int32_t y262_encode_unit( y262_t *ps_y262, int32_t i_unit_startcode, int32_t i_unit_extension_startcode, int32_t i_picture_type, int32_t i_slice_row ) +{ + y262_bitstream_reset( &ps_y262->s_bitstream ); + if( i_unit_startcode == STARTCODE_SEQUENCE_HEADER ) + { + y262_write_sequence_header( ps_y262 ); + } + else if( i_unit_startcode == STARTCODE_EXTENSION && i_unit_extension_startcode == H262_SEQUENCE_EXTENSION_ID ) + { + y262_write_sequence_extension( ps_y262 ); + } + else if( i_unit_startcode == STARTCODE_EXTENSION && i_unit_extension_startcode == H262_SEQUENCE_DISPLAY_EXTENSION_ID ) + { + y262_write_sequence_display_extension( ps_y262 ); + } + else if( i_unit_startcode == STARTCODE_GROUP ) + { + y262_write_group_of_pictures_header( ps_y262 ); + } + else if( i_unit_startcode == STARTCODE_PICTURE ) + { + y262_write_picture_header( ps_y262, i_picture_type ); + } + else if( i_unit_startcode == STARTCODE_EXTENSION && i_unit_extension_startcode == H262_PICTURE_CODING_EXTENSION_ID ) + { + y262_write_picture_coding_extension( ps_y262 ); + } + else if( i_unit_startcode == STARTCODE_SLICE_START ) + { + /* deprecated */ + } + else if( i_unit_startcode == STARTCODE_USER_DATA ) + { + y262_write_user_data( ps_y262, i_slice_row ); + } + else if( i_unit_startcode == STARTCODE_STUFFING ) + { + y262_write_zero_stuffing( ps_y262, i_slice_row ); + } + + { + uint8_t *pui8_bs; + uint32_t ui_len; + y262_result_t s_res; + + y262_bitstream_flush( &ps_y262->s_bitstream, &pui8_bs, &ui_len ); + + if( ps_y262->s_funcs.pf_result_callback ) + { + s_res.bitstream_unit.i_type = Y262_RESULT_BITSTREAM; + s_res.bitstream_unit.i_don = ps_y262->ps_input_picture->i_don; + s_res.bitstream_unit.i_pon = ps_y262->ps_input_picture->i_pon; + s_res.bitstream_unit.pui8_unit = pui8_bs; + s_res.bitstream_unit.i_unit_length = ui_len; + s_res.bitstream_unit.i_unit_type = i_unit_startcode; + ps_y262->s_funcs.pf_result_callback( ps_y262->p_cb_handle, Y262_RESULT_BITSTREAM, &s_res ); + } + return ui_len * 8; + } +} + +/* called by slice thread or directly */ +void y262_encode_unit_slice( y262_t *ps_y262, int32_t i_picture_type, int32_t i_slice_row ) +{ + y262_slice_t s_slice; + y262_write_slice( ps_y262, i_picture_type, i_slice_row, &s_slice ); +} + +void y262_start_and_encode_unit_slices( y262_t *ps_y262, int32_t i_slice_encoder_idx, int32_t i_picture_type, int32_t i_start_mb_row, int32_t i_end_mb_row ) +{ + int32_t i_idx, i_slices_start_mb, i_slices_end_mb; + + i_slices_start_mb = i_start_mb_row * ( ps_y262->i_sequence_width >> 4 ); + i_slices_end_mb = ( ( i_end_mb_row + 1 ) * ( ps_y262->i_sequence_width >> 4 ) ) - 1; + + y262_ratectrl_start_slice_encoder( ps_y262, &ps_y262->rgps_slice_encoders[ i_slice_encoder_idx ]->s_slice_encoder_ratectrl, i_slices_start_mb, i_slices_end_mb ); + + if( !ps_y262->b_multithreading ) + { + y262_bitstream_reset( &ps_y262->rgps_slice_encoders[ i_slice_encoder_idx ]->s_bitstream ); + for( i_idx = i_start_mb_row; i_idx <= i_end_mb_row; i_idx++ ) + { + y262_encode_unit_slice( ps_y262->rgps_slice_encoders[ i_slice_encoder_idx ], i_picture_type, i_idx ); + y262_bitstream_bytealign( &ps_y262->rgps_slice_encoders[ i_slice_encoder_idx ]->s_bitstream ); + } + } + else + { + y262_slice_thread_t *ps_slice_thread; + + ps_slice_thread = &ps_y262->rgs_slice_threads[ i_slice_encoder_idx ]; + ps_slice_thread->i_command = Y262_SLICE_THREAD_CMD_ENCODE; + ps_slice_thread->i_picture_type = i_picture_type; + ps_slice_thread->i_first_slice_row = i_start_mb_row; + ps_slice_thread->i_last_slice_row = i_end_mb_row; + + y262_event_set_g( ps_y262, ps_slice_thread->p_start_event ); + } +} + + +int32_t y262_finish_encode_unit_slices( y262_t *ps_y262, int32_t i_slice_encoder_idx ) +{ + y262_t *ps_y262_i; + uint8_t *pui8_bs; + uint32_t ui_len; + + if( ps_y262->b_multithreading ) + { + y262_slice_thread_t *ps_slice_thread; + ps_slice_thread = &ps_y262->rgs_slice_threads[ i_slice_encoder_idx ]; + y262_event_wait_g( ps_y262, ps_slice_thread->p_finished_event ); + } + + ps_y262_i = ps_y262->rgps_slice_encoders[ i_slice_encoder_idx ]; + + y262_ratectrl_end_slice_encoder( ps_y262, &ps_y262_i->s_slice_encoder_ratectrl ); + + y262_bitstream_flush( &ps_y262_i->s_bitstream, &pui8_bs, &ui_len ); + + return ui_len * 8; +} + + +int32_t y262_encode_slices( y262_t *ps_y262, int32_t i_picture_type ) +{ + int32_t i_idx, i_num_mb_rows, i_num_used_slice_encoders, i_enc_start_row, i_enc_end_row; + y262_bitstream_t s_save_bs; + int32_t i_accumulated_bits; + + if( ps_y262->b_multithreading ) + { + y262_mutex_lock( ps_y262, ps_y262->p_resource_mutex ); /* need to lock this to shut up helgrind */ + } + for( i_idx = 0; i_idx < ps_y262->i_num_slice_encoders; i_idx++ ) + { + y262_t *ps_y262_i; + ps_y262_i = ps_y262->rgps_slice_encoders[ i_idx ]; + s_save_bs = ps_y262_i->s_bitstream; + *ps_y262_i = *ps_y262; + ps_y262_i->s_bitstream = s_save_bs; + } + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); /* need to lock this to shut up helgrind */ + } + + i_accumulated_bits = 0; + i_num_mb_rows = ( ps_y262->i_sequence_height >> 4 ); + i_num_used_slice_encoders = MIN( ps_y262->i_num_slice_encoders, i_num_mb_rows ); + for( i_idx = 0; i_idx < i_num_used_slice_encoders; i_idx++ ) + { + i_enc_start_row = ( i_num_mb_rows * i_idx ) / i_num_used_slice_encoders; + i_enc_end_row = ( ( i_num_mb_rows * ( i_idx + 1 ) ) / i_num_used_slice_encoders ) - 1; + y262_start_and_encode_unit_slices( ps_y262, i_idx, i_picture_type, i_enc_start_row, i_enc_end_row ); + } + for( i_idx = 0; i_idx < i_num_used_slice_encoders; i_idx++ ) + { + i_accumulated_bits += y262_finish_encode_unit_slices( ps_y262, i_idx ); + } + return i_accumulated_bits; +} + + +int32_t y262_output_slices( y262_t *ps_y262 ) +{ + y262_t *ps_y262_i; + int32_t i_num_mb_rows, i_idx, i_num_used_slice_encoders, i_accumulated_bits; + uint8_t *pui8_bs; + uint32_t ui_len; + y262_result_t s_res; + + i_num_mb_rows = ( ps_y262->i_sequence_height >> 4 ); + i_num_used_slice_encoders = MIN( ps_y262->i_num_slice_encoders, i_num_mb_rows ); + + i_accumulated_bits = 0; + for( i_idx = 0; i_idx < i_num_used_slice_encoders; i_idx++ ) + { + ps_y262_i = ps_y262->rgps_slice_encoders[ i_idx ]; + + y262_bitstream_get( &ps_y262_i->s_bitstream, &pui8_bs, &ui_len ); + + if( ps_y262->s_funcs.pf_result_callback ) + { + s_res.bitstream_unit.i_type = Y262_RESULT_BITSTREAM; + s_res.bitstream_unit.i_don = ps_y262->ps_input_picture->i_don; + s_res.bitstream_unit.i_pon = ps_y262->ps_input_picture->i_pon; + s_res.bitstream_unit.pui8_unit = pui8_bs; + s_res.bitstream_unit.i_unit_length = ui_len; + s_res.bitstream_unit.i_unit_type = STARTCODE_SLICE_START; + ps_y262->s_funcs.pf_result_callback( ps_y262->p_cb_handle, Y262_RESULT_BITSTREAM, &s_res ); + } + + i_accumulated_bits += ui_len * 8; + } + return i_accumulated_bits; +} + + +void y262_get_frame_psnr( y262_t *ps_y262, y262_picture_t *ps_original, y262_reference_picture_t *ps_recon, double *pd_psnr ) +{ + int32_t i_x, i_y, i_num_pel_x, i_num_pel_y, i_pel_stride, i_idx; + int64_t ui64_ssd; + uint8_t *pui8_ref, *pui8_recon; + + for( i_idx = 0; i_idx < 3; i_idx++ ) + { + if( i_idx == 0 ) + { + pui8_ref = ps_original->pui8_luma; + pui8_recon = ps_recon->pui8_luma; + i_num_pel_x = ps_y262->i_sequence_width; + i_num_pel_y = ps_y262->i_sequence_height; + i_pel_stride = ps_y262->i_sequence_width; + } + else + { + if( i_idx == 1 ) + { + pui8_ref = ps_original->pui8_cb; + pui8_recon = ps_recon->pui8_cb; + } + else + { + pui8_ref = ps_original->pui8_cr; + pui8_recon = ps_recon->pui8_cr; + } + i_num_pel_x = ps_y262->i_sequence_width / 2; + i_num_pel_y = ps_y262->i_sequence_height / 2; + i_pel_stride = ps_y262->i_sequence_width / 2; + } + ui64_ssd = 0; + for( i_y = 0; i_y < i_num_pel_y; i_y += 8 ) + { + for( i_x = 0; i_x < i_num_pel_x; i_x += 8 ) + { + ui64_ssd += ps_y262->s_funcs.f_ssd_8x8( pui8_ref + i_x, i_pel_stride, pui8_recon + i_x, i_pel_stride ); + } + pui8_ref += i_pel_stride * 8; + pui8_recon += i_pel_stride * 8; + } + + if( 0 == ui64_ssd ) + { + pd_psnr[ i_idx ] = 100.0; + } + else + { + pd_psnr[ i_idx ] = 10 * log10( 255.0 * 255.0 * ( i_num_pel_x * i_num_pel_y ) / ( double )( ui64_ssd ) ); + } + } +} + + +int32_t y262_encode_possible_user_data_unit( y262_t *ps_y262, int32_t i_where ) +{ + int32_t i_idx, i_accumulated_bits; + + i_accumulated_bits = 0; + + for( i_idx = 0; i_idx < ps_y262->ps_input_picture->i_num_user_data; i_idx++ ) + { + if( i_where == ps_y262->ps_input_picture->rgps_user_data[ i_idx ]->i_where ) + { + i_accumulated_bits += y262_encode_unit( ps_y262, STARTCODE_USER_DATA, 0, 0, i_idx ); + } + } + + return i_accumulated_bits; +} + + +void y262_encode_picture( y262_t *ps_y262, y262_picture_t *ps_picture, int32_t i_picture_type, int32_t i_pon ) +{ + int32_t i_idx, i_accumulated_bits, i_number_of_encodes, i_stuffing_bits; + bool_t b_reencode_if_possible; + + ps_y262->ps_input_picture = ps_picture; + + i_accumulated_bits = 0; + + if( i_picture_type == PICTURE_CODING_TYPE_I ) + { + i_accumulated_bits += y262_encode_unit( ps_y262, STARTCODE_SEQUENCE_HEADER, 0, 0, 0 ); + if( !ps_y262->b_sequence_mpeg1 ) + { + i_accumulated_bits += y262_encode_unit( ps_y262, STARTCODE_EXTENSION, H262_SEQUENCE_EXTENSION_ID, 0, 0 ); + i_accumulated_bits += y262_encode_unit( ps_y262, STARTCODE_EXTENSION, H262_SEQUENCE_DISPLAY_EXTENSION_ID, 0, 0 ); + } + i_accumulated_bits += y262_encode_possible_user_data_unit( ps_y262, Y262_USER_DATA_AFTER_SEQUENCE ); + i_accumulated_bits += y262_encode_unit( ps_y262, STARTCODE_GROUP, 0, 0, 0 ); + i_accumulated_bits += y262_encode_possible_user_data_unit( ps_y262, Y262_USER_DATA_AFTER_GOP ); + } + + if( i_picture_type != PICTURE_CODING_TYPE_B ) + { + ps_y262->b_next_reference_picture_toggle = !ps_y262->b_next_reference_picture_toggle; + ps_y262->ps_refpic_forward = &ps_y262->rgs_frame_buffer[ !ps_y262->b_next_reference_picture_toggle ]; + ps_y262->ps_refpic_dst = &ps_y262->rgs_frame_buffer[ ps_y262->b_next_reference_picture_toggle ]; + } + else + { + ps_y262->ps_refpic_forward = &ps_y262->rgs_frame_buffer[ !ps_y262->b_next_reference_picture_toggle ]; + ps_y262->ps_refpic_backward = &ps_y262->rgs_frame_buffer[ ps_y262->b_next_reference_picture_toggle ]; + ps_y262->ps_refpic_dst = &ps_y262->rgs_frame_buffer[ 2 ]; + } + y262_lookahead_mbtree( ps_y262, ps_picture ); + y262_lookahead_fill_ratectrl_vars( ps_y262, ps_picture ); + y262_ratectrl_start_picture( ps_y262, i_accumulated_bits ); + + i_accumulated_bits += y262_encode_unit( ps_y262, STARTCODE_PICTURE, 0, i_picture_type, 0 ); + if( !ps_y262->b_sequence_mpeg1 ) + { + i_accumulated_bits += y262_encode_unit( ps_y262, STARTCODE_EXTENSION, H262_PICTURE_CODING_EXTENSION_ID, PICTURE_CODING_TYPE_I, 0 ); + } + i_accumulated_bits += y262_encode_possible_user_data_unit( ps_y262, Y262_USER_DATA_BEFORE_SLICES ); + + + if( i_pon == 1 ) + { + i_pon = i_pon; + } + + i_number_of_encodes = 0; + do + { + int32_t i_picture_bits; + + i_picture_bits = i_accumulated_bits; + i_picture_bits += y262_encode_slices( ps_y262, i_picture_type ); + + for( i_idx = 0; i_idx < 64; i_idx++ ) + { + /*printf("%d: %d\n", i_idx - 32, rgi_quantizer_delta[ i_idx ] );*/ + } + + b_reencode_if_possible = FALSE; + if( i_number_of_encodes < 3 ) + { + b_reencode_if_possible = y262_ratectrl_check_for_reencode( ps_y262, i_picture_bits ); + if( b_reencode_if_possible ) + { + /*fprintf( stderr, "reenc\n" );*/ + } + } + + i_number_of_encodes++; + } while( b_reencode_if_possible ); + + i_accumulated_bits += y262_output_slices( ps_y262 ); + + y262_ratectrl_end_picture( ps_y262, i_accumulated_bits ); + + if( ps_y262->b_sequence_cbr ) + { + int32_t i_commit_bits; + i_stuffing_bits = y262_ratectrl_stuffing_bits_needed( ps_y262 ); + if( i_stuffing_bits > 0 ) + { + i_commit_bits = y262_encode_unit( ps_y262, STARTCODE_STUFFING, 0, i_picture_type, ( i_stuffing_bits + 7 ) / 8 ); + y262_ratectrl_commit_stuffing_bits( ps_y262, i_commit_bits ); + } + } + + if( ps_y262->s_funcs.pf_result_callback ) + { + y262_result_t s_res; + + s_res.recon.i_type = Y262_RESULT_RECON; + s_res.recon.i_pon = ps_y262->ps_input_picture->i_pon; + s_res.recon.i_don = ps_y262->ps_input_picture->i_don; + y262_get_frame_psnr( ps_y262, ps_picture, ps_y262->ps_refpic_dst, &s_res.recon.f64_psnr[ 0 ] ); + s_res.recon.pui8_luma = ps_y262->ps_refpic_dst->pui8_luma; + s_res.recon.pui8_cb = ps_y262->ps_refpic_dst->pui8_cb; + s_res.recon.pui8_cr = ps_y262->ps_refpic_dst->pui8_cr; + s_res.recon.i_frame_type = i_picture_type; + + ps_y262->s_funcs.pf_result_callback( ps_y262->p_cb_handle, Y262_RESULT_RECON, &s_res ); + } +} + + + diff --git a/src/y262/y262.h b/src/y262/y262.h new file mode 100644 index 0000000..309aef9 --- /dev/null +++ b/src/y262/y262.h @@ -0,0 +1,74 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS 1 +#define ALIGNED(x) __declspec(align(x)) +#else +#define ALIGNED(x) __attribute__((aligned(x))) +#endif + + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "y262api.h" + +#include "types.h" +#include "tables.h" +#include "bitstream.h" +#include "aboveslicelevel.h" +#include "lookahead.h" +#include "transform.h" +#include "pixelop.h" +#include "me.h" +#include "transform_x86.h" +#include "pixelop_x86.h" +#include "ratectrl.h" +#include "threads.h" + +void y262_init_motion_compensation( y262_t *ps_y262 ); +void y262_error( y262_t *ps_y262, int32_t i_error_code, int8_t* pi8_format, ... ); +void y262_encode_picture( y262_t *ps_y262, y262_picture_t *ps_picture, int32_t i_picture_type, int32_t i_pon ); + + +#define MIN( x, y ) ( ( x ) < ( y ) ? ( x ) : ( y ) ) +#define MAX( x, y ) ( ( x ) > ( y ) ? ( x ) : ( y ) ) + diff --git a/src/y262/y262api.c b/src/y262/y262api.c new file mode 100644 index 0000000..be4ef48 --- /dev/null +++ b/src/y262/y262api.c @@ -0,0 +1,1031 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "y262.h" + +void *y262_alloc( size_t s_size ) +{ + size_t s_offs; + void *p_allocated, **p_ptr; + + p_allocated = malloc( s_size + 16 + sizeof( void * ) ); + + if( p_allocated == NULL ) + { + return NULL; + } + + s_offs = ( 16 - ( ( ( ( size_t ) p_allocated ) + sizeof( void * ) ) & 0xf ) ); + if( s_offs < sizeof( void * ) ) + { + s_offs += 16; + } + p_ptr = ( void ** )( ( unsigned char * )p_allocated + s_offs ); + *p_ptr = p_allocated; + + return p_ptr + 1; +} + +void y262_dealloc( void *p_ptr ) +{ + if( p_ptr ) + { + void *p_memory; + + p_memory = *( ( ( void ** )( p_ptr ) ) - 1 ); + free( p_memory ); + } +} + + +void *y262_create( y262_configuration_t *ps_config ) +{ + y262_t *ps_y262; + + if( !ps_config ) + { + return NULL; + } + + + memset( ps_config, 0, sizeof( y262_configuration_t ) ); + ps_config->i_multithreading = 0; + ps_config->i_num_threads = 1; + ps_config->b_interlaced = FALSE; + ps_config->b_top_field_first = 1; + ps_config->i_profile = Y262_PROFILE_DERIVE; + ps_config->i_level = Y262_LEVEL_DERIVE; + ps_config->i_coded_chroma_format = Y262_CHROMA_FORMAT_420; + ps_config->rgi_fcode[ 0 ] = 5; + ps_config->rgi_fcode[ 1 ] = 5; + ps_config->i_coded_width = 352; + ps_config->i_coded_height = 288; + ps_config->i_display_width = 352; + ps_config->i_display_height = 288; + ps_config->i_vbv_size = 400; + ps_config->i_vbv_rate = 800; + ps_config->i_bitrate = 400; + ps_config->i_frame_rate_code = 1; + ps_config->i_aspect_ratio_information = 1; + ps_config->i_quantizer = 2; + ps_config->i_rcmode = 0; + ps_config->i_bframes = 2; + ps_config->i_keyframe_ref_distance = 4; + ps_config->b_closed_gop = FALSE; + ps_config->i_lookahead_pictures = 40; + ps_config->b_variance_aq = TRUE; + ps_config->i_psyrd_strength = 0; + ps_config->b_qscale_type = 1; + ps_config->b_mpeg1 = FALSE; + ps_config->b_cbr_padding = FALSE; + + ps_y262 = ( y262_t * )y262_alloc( sizeof( y262_t ) ); + + memset( ps_y262, 0, sizeof( y262_t ) ); + + return ( void *)ps_y262; +} + + +bool_t y262_validate_level( y262_t *ps_y262, int32_t i_level, bool_t b_forderive ) +{ + int32_t i_idx, i_luma_sample_rate; + int32_t rgi_max_hfcode[ 4 ] = { 7, 8, 9, 9 }; + int32_t rgi_max_vfcode[ 4 ] = { 4, 5, 5, 5 }; + int32_t rgi_max_frcode[ 4 ] = { 5, 5, 8, 8 }; + int32_t rgi_max_pic_width[ 4 ] = { 352, 720, 1440, 1920 }; + int32_t rgi_max_pic_height[ 4 ] = { 288, 576, 1152, 1152 }; + int32_t rgi_fr[ 16 ] = { 100, 24, 24, 25, 30, 30, 50, 60, 60, 100, 100, 100, 100, 100, 100, 100 }; + int32_t rgi_max_fr[ 4 ] = { 30, 30, 60, 60 }; + int32_t rgi_max_luma_sample_rate[ 4 ] = { 3041280, 10368000, 47001600, 62668800 }; + int32_t rgi_max_bitrate[ 4 ] = { 4000000, 15000000, 60000000, 80000000 }; + int32_t rgi_max_buffer_size[ 4 ] = { 475136, 1835008, 7340032, 9781248 }; + + i_idx = -1; + switch( i_level ) + { + case Y262_LEVEL_LOW: + i_idx = 0; + break; + case Y262_LEVEL_MAIN: + i_idx = 1; + break; + case Y262_LEVEL_HIGH1440: + i_idx = 2; + break; + case Y262_LEVEL_HIGH: + i_idx = 3; + break; + } + + if( i_idx == -1 ) + { + return FALSE; + } + if( ps_y262->rgi_fcode[ 0 ][ 0 ] < 1 || ps_y262->rgi_fcode[ 0 ][ 0 ] > rgi_max_hfcode[ i_idx ] || + ps_y262->rgi_fcode[ 1 ][ 0 ] < 1 || ps_y262->rgi_fcode[ 1 ][ 0 ] > rgi_max_hfcode[ i_idx ] ) + { + if( !b_forderive && ps_y262->s_funcs.pf_error_callback ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_PROFILELEVEL, ( int8_t *)"horizontal fcode exceeds level limit" ); + } + return FALSE; + } + if( ps_y262->rgi_fcode[ 0 ][ 1 ] < 1 || ps_y262->rgi_fcode[ 0 ][ 1 ] > rgi_max_vfcode[ i_idx ] || + ps_y262->rgi_fcode[ 1 ][ 1 ] < 1 || ps_y262->rgi_fcode[ 1 ][ 1 ] > rgi_max_vfcode[ i_idx ] ) + { + if( !b_forderive && ps_y262->s_funcs.pf_error_callback ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_PROFILELEVEL, ( int8_t *)"vertical fcode exceeds level limit" ); + } + return FALSE; + } + if( ps_y262->i_sequence_frame_rate_code < 1 || ps_y262->i_sequence_frame_rate_code > rgi_max_frcode[ i_idx ] ) + { + if( !b_forderive && ps_y262->s_funcs.pf_error_callback ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_PROFILELEVEL, ( int8_t *)"frame rate exceeds level limit" ); + } + return FALSE; + } + if( ps_y262->i_sequence_width > rgi_max_pic_width[ i_idx ] || + ps_y262->i_sequence_height > rgi_max_pic_height[ i_idx ] ) + { + if( !b_forderive && ps_y262->s_funcs.pf_error_callback ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_PROFILELEVEL, ( int8_t *)"picture size exceeds level limit" ); + } + return FALSE; + } + i_luma_sample_rate = ( int32_t )( ( ( ( int64_t )( ps_y262->i_sequence_width * ps_y262->i_sequence_height ) ) * ps_y262->i_sequence_derived_timescale ) / ps_y262->i_sequence_derived_picture_duration ); + if( i_luma_sample_rate > rgi_max_luma_sample_rate[ i_idx ] ) + { + if( !b_forderive && ps_y262->s_funcs.pf_error_callback ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_PROFILELEVEL, ( int8_t *)"luma sample rate exceeds level limit" ); + } + return FALSE; + } + if( ps_y262->s_ratectrl.i_vbvrate > rgi_max_bitrate[ i_idx ] ) + { + if( !b_forderive && ps_y262->s_funcs.pf_error_callback ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_PROFILELEVEL, ( int8_t *)"maximum bitrate exceeds level limit" ); + } + return FALSE; + } + if( ps_y262->s_ratectrl.i_vbv_size > rgi_max_buffer_size[ i_idx ] ) + { + if( !b_forderive && ps_y262->s_funcs.pf_error_callback ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_PROFILELEVEL, ( int8_t *)"video buffer size exceeds level limit" ); + } + return FALSE; + } + return TRUE; +} + +int32_t y262_initialize( void *p_y262, y262_configuration_t *ps_config ) +{ + int32_t i_idx, i_mv_range, i_mvs, i_fcode_m1, i_wrap; + y262_t *ps_y262; + + if( p_y262 == NULL ) + { + return Y262_INIT_ERROR_CONTEXT; + } + + ps_y262 = ( y262_t * )p_y262; + + ps_y262->p_cb_handle = ps_config->p_cb_handle; + ps_y262->s_funcs.pf_error_callback = ps_config->pf_error_callback; + ps_y262->s_funcs.pf_result_callback = ps_config->pf_result_callback; + ps_y262->s_funcs.pf_rcsample_callback = ps_config->pf_rcsample_callback; + + ps_y262->b_multithreading = !!ps_config->i_multithreading; + ps_y262->i_num_slice_encoders = MIN( MAX_NUM_SLICE_ENCODERS, MAX( 1, ps_config->i_num_threads ) ); + ps_y262->i_num_lookahead_encoders = MIN( MAX_NUM_SLICE_ENCODERS, MAX( 1, ps_config->i_num_threads ) ); + + if( ps_config->i_coded_width < 16 || ps_config->i_coded_width > 4096 || + ps_config->i_coded_height < 16 || ps_config->i_coded_height > 4096 ) + { + return Y262_INIT_ERROR_CODED_SIZE; + } + + if( ( ps_config->i_coded_width & 0xf ) != 0 ) + { + return Y262_INIT_ERROR_CODED_SIZE; + } + if( ( ps_config->i_coded_height & 0xf ) != 0 ) + { + return Y262_INIT_ERROR_CODED_SIZE; + } + if( ps_config->i_coded_chroma_format < Y262_CHROMA_FORMAT_420 || + ps_config->i_coded_chroma_format > Y262_CHROMA_FORMAT_444 ) + { + return Y262_INIT_ERROR_CHROMA_FORMAT; + } + if( ps_config->i_display_width > ps_config->i_coded_width ) + { + return Y262_INIT_ERROR_DISPLAY_SIZE; + } + if( ps_config->i_display_height > ps_config->i_coded_height ) + { + return Y262_INIT_ERROR_DISPLAY_SIZE; + } + if( ps_config->i_frame_rate_code < 1 || ps_config->i_frame_rate_code > 8 ) + { + return Y262_INIT_ERROR_FRAMERATE; + } + if( ps_config->i_pulldown_frame_rate_code != 0 ) + { + if( ps_config->i_pulldown_frame_rate_code < 1 || ps_config->i_pulldown_frame_rate_code > 8 ) + { + return Y262_INIT_ERROR_PFRAMERATE; + } + } + if( ps_config->i_aspect_ratio_information < 1 || ps_config->i_aspect_ratio_information > 4 && !ps_config->b_mpeg1 ) + { + return Y262_INIT_ERROR_ASPECT; + } + else if( ps_config->i_aspect_ratio_information < 1 || ps_config->i_aspect_ratio_information > 14 && ps_config->b_mpeg1 ) + { + return Y262_INIT_ERROR_ASPECT; + } + if( ps_config->i_videoformat < 0 || ps_config->i_videoformat > 4 ) + { + return Y262_INIT_ERROR_VIDEO_FORMAT; + } + if( ps_config->i_profile != Y262_PROFILE_DERIVE && + ps_config->i_profile != Y262_PROFILE_SIMPLE && + ps_config->i_profile != Y262_PROFILE_MAIN ) + { + return Y262_INIT_ERROR_PROFILE; + } + if( ps_config->i_level != Y262_LEVEL_DERIVE && + ps_config->i_level != Y262_LEVEL_LOW && + ps_config->i_level != Y262_LEVEL_MAIN && + ps_config->i_level != Y262_LEVEL_HIGH1440 && + ps_config->i_level != Y262_LEVEL_HIGH ) + { + return Y262_INIT_ERROR_LEVEL; + } + + if( ps_config->i_lookahead_pictures < 10 || ps_config->i_lookahead_pictures > 50 ) + { + return Y262_INIT_ERROR_LOOKAHEADPICS; + } + if( ps_config->i_keyframe_ref_distance < 0 ) + { + return Y262_INIT_ERROR_KEYFRAME_DIST; + } + if( ps_config->i_bframes < 0 || ps_config->i_bframes > 4 ) + { + return Y262_INIT_ERROR_BFRAMES_COUNT; + } + if( ps_config->i_quality_for_speed < -100 || ps_config->i_quality_for_speed > 100 ) + { + return Y262_INIT_ERROR_QUALITY_SPEED; + } + if( ps_config->i_psyrd_strength > 512 ) + { + return Y262_INIT_ERROR_PSYRD_STR; + } + if( ps_config->i_num_threads < 1 || ps_config->i_num_threads > 8 ) + { + return Y262_INIT_ERROR_THREADS; + } + + if( ps_config->i_rcmode < BITRATE_CONTROL_CQ || ps_config->i_rcmode > BITRATE_CONTROL_PASS2 ) + { + return Y262_INIT_ERROR_RCMODE; + } + if( ps_config->i_bitrate < 20 && ps_config->i_rcmode != BITRATE_CONTROL_CQ ) + { + return Y262_INIT_ERROR_BITRATE; + } + if( ps_config->i_vbv_rate < 20 ) + { + return Y262_INIT_ERROR_VBVRATE; + } + if( ps_config->i_vbv_size < 20 ) + { + return Y262_INIT_ERROR_VBVSIZE; + } + if( ps_config->i_quantizer < 1 || ps_config->i_quantizer > 31 ) + { + return Y262_INIT_ERROR_QUANTIZER; + } + ps_y262->s_ratectrl.i_mode = ps_config->i_rcmode; + ps_y262->s_ratectrl.i_bitrate = ps_config->i_bitrate * 1000; + ps_y262->s_ratectrl.i_vbvrate = ps_config->i_vbv_rate * 1000; + ps_y262->s_ratectrl.i_vbv_size = ps_config->i_vbv_size * 1000; + ps_y262->b_sequence_cbr = ps_config->b_cbr_padding; + ps_y262->i_quantizer = ps_config->i_quantizer; + + + ps_y262->b_sequence_mpeg1 = ps_config->b_mpeg1; + if( ps_y262->b_sequence_mpeg1 ) + { + ps_y262->b_progressive_sequence = TRUE; + ps_y262->i_intra_dc_precision = 0; + } + else + { + ps_y262->b_progressive_sequence = FALSE; + ps_y262->i_intra_dc_precision = 1; + } + ps_y262->b_frame_pred_frame_dct = !ps_config->b_interlaced; /* interlaced */ + ps_y262->b_qscale_type = !!ps_config->b_qscale_type; + ps_y262->b_intra_vlc_format = 0; + ps_y262->i_sequence_width = ps_config->i_coded_width; + ps_y262->i_sequence_height = ps_config->i_coded_height; + switch( ps_config->i_coded_chroma_format ) + { + case Y262_CHROMA_FORMAT_420: + ps_y262->i_sequence_chroma_width = ps_y262->i_sequence_width >> 1; + ps_y262->i_sequence_chroma_height = ps_y262->i_sequence_height >> 1; + break; + case Y262_CHROMA_FORMAT_422: + ps_y262->i_sequence_chroma_width = ps_y262->i_sequence_width >> 1; + ps_y262->i_sequence_chroma_height = ps_y262->i_sequence_height; + break; + case Y262_CHROMA_FORMAT_444: + ps_y262->i_sequence_chroma_width = ps_y262->i_sequence_width; + ps_y262->i_sequence_chroma_height = ps_y262->i_sequence_height; + break; + } + ps_y262->i_sequence_chroma_format = ps_config->i_coded_chroma_format; + ps_y262->i_sequence_display_width = ps_config->i_display_width; + ps_y262->i_sequence_display_height = ps_config->i_display_height; + ps_y262->i_sequence_video_format = ps_config->i_videoformat; + ps_y262->i_sequence_frame_rate_code = ps_config->i_frame_rate_code; + ps_y262->i_sequence_pulldown_frame_rate_code = ps_config->i_pulldown_frame_rate_code ? ps_config->i_pulldown_frame_rate_code : ps_config->i_frame_rate_code; + ps_y262->i_sequence_frame_rate_extension_n = 0; + ps_y262->i_sequence_frame_rate_extension_d = 0; + ps_y262->i_sequence_aspect_ratio_information = ps_config->i_aspect_ratio_information; + + if( ps_y262->b_sequence_mpeg1 ) + { + if( !ps_y262->b_progressive_sequence ) + { + return Y262_INIT_ERROR_MPEG1_CONSTRAINT; + } + if( ps_y262->i_intra_dc_precision != 0 ) + { + return Y262_INIT_ERROR_MPEG1_CONSTRAINT; + } + if( ps_y262->i_sequence_chroma_format != Y262_CHROMA_FORMAT_420 ) + { + return Y262_INIT_ERROR_MPEG1_CHROMA_FORMAT; + } + if( ps_y262->i_sequence_frame_rate_extension_n != 0 || + ps_y262->i_sequence_frame_rate_extension_d != 0 ) + { + return Y262_INIT_ERROR_MPEG1_CONSTRAINT; + } + if( !ps_y262->b_frame_pred_frame_dct ) + { + return Y262_INIT_ERROR_MPEG1_INTERLACED; + } + if( ps_y262->b_qscale_type ) + { + return Y262_INIT_ERROR_MPEG1_QSCALE; + } + if( ps_y262->b_intra_vlc_format ) + { + return Y262_INIT_ERROR_MPEG1_CONSTRAINT; + } + if( ps_config->rgi_fcode[ 0 ] < 1 || ps_config->rgi_fcode[ 0 ] > 7 ) + { + return Y262_INIT_ERROR_MPEG1_FCODE; + } + } + + ps_y262->i_sequence_derived_picture_duration = rgi_y262_framerate_code_duration[ ps_y262->i_sequence_frame_rate_code ]; + ps_y262->i_sequence_derived_picture_duration *= ( 1 + ps_y262->i_sequence_frame_rate_extension_d ); + ps_y262->i_sequence_derived_picture_duration *= 2; /* for repeat first field */ + ps_y262->i_sequence_derived_timescale = rgi_y262_framerate_code_timescale[ ps_y262->i_sequence_frame_rate_code ]; + ps_y262->i_sequence_derived_timescale *= ( 1 + ps_y262->i_sequence_frame_rate_extension_n ); + ps_y262->i_sequence_derived_timescale *= 2; + + ps_y262->i_sequence_derived_pulldown_picture_duration = rgi_y262_framerate_code_duration[ ps_y262->i_sequence_pulldown_frame_rate_code ]; + ps_y262->i_sequence_derived_pulldown_picture_duration *= ( 1 + ps_y262->i_sequence_frame_rate_extension_d ); + ps_y262->i_sequence_derived_pulldown_picture_duration *= 2; + ps_y262->i_sequence_derived_pulldown_timescale = rgi_y262_framerate_code_timescale[ ps_y262->i_sequence_pulldown_frame_rate_code ]; + ps_y262->i_sequence_derived_pulldown_timescale *= ( 1 + ps_y262->i_sequence_frame_rate_extension_n ); + ps_y262->i_sequence_derived_pulldown_timescale *= 2; + + ps_y262->i_num_lookahead_pictures = MAX( 1, ps_config->i_lookahead_pictures ); /* lookahead */ + ps_y262->i_current_input_pon = 0; + ps_y262->i_current_input_field = ps_config->b_top_field_first ? 0 : 1; + ps_y262->i_leading_lookahead_don = 0; + ps_y262->i_current_lookahead_don = 0; + ps_y262->i_current_encoder_don = 0; + ps_y262->i_current_eof_pon = -1; + ps_y262->i_current_eof_don = -1; + ps_y262->i_sequence_num_bframes = ps_config->i_bframes; /* bframes */ + ps_y262->i_sequence_keyframe_distance = ps_config->i_keyframe_ref_distance; /* 0 = I B B I, 1 = I B B P B B I ... */ + ps_y262->b_closed_gop = ps_config->b_closed_gop; + ps_y262->i_keyframe_countdown = 0; + ps_y262->i_last_keyframe_temporal_reference = 0; + if( ps_y262->b_sequence_mpeg1 ) + { + ps_y262->rgi_fcode[ 0 ][ 0 ] = ps_config->rgi_fcode[ 0 ]; + ps_y262->rgi_fcode[ 0 ][ 1 ] = ps_config->rgi_fcode[ 1 ]; + ps_y262->rgi_fcode[ 1 ][ 0 ] = ps_y262->rgi_fcode[ 0 ][ 0 ]; + ps_y262->rgi_fcode[ 1 ][ 1 ] = ps_y262->rgi_fcode[ 0 ][ 1 ]; + } + else + { + ps_y262->rgi_fcode[ 0 ][ 0 ] = ps_config->rgi_fcode[ 0 ]; + ps_y262->rgi_fcode[ 0 ][ 1 ] = ps_config->rgi_fcode[ 0 ]; + ps_y262->rgi_fcode[ 1 ][ 0 ] = ps_config->rgi_fcode[ 0 ]; + ps_y262->rgi_fcode[ 1 ][ 1 ] = ps_config->rgi_fcode[ 0 ]; + + } + ps_y262->i_max_buffered_input_pictures = ps_y262->i_num_lookahead_pictures + ( ps_y262->i_sequence_num_bframes * 2 ) + 2; /* not + 1 ? */ + + ps_y262->b_variance_aq = ps_config->b_variance_aq; + ps_y262->i_psyrd_strength = ps_config->i_psyrd_strength; + ps_y262->i_quality_for_speed = ps_config->i_quality_for_speed; + + + memcpy( ps_y262->rgui8_intra_quantiser_matrix, rgui8_y262_default_intra_matrix, sizeof( rgui8_y262_default_intra_matrix ) ); + memcpy( ps_y262->rgui8_non_intra_quantiser_matrix, rgui8_y262_default_non_intra_matrix, sizeof( rgui8_y262_default_non_intra_matrix ) ); + + if( ps_config->b_non_default_intra_matrix ) + { + memcpy( ps_y262->rgui8_intra_quantiser_matrix, ps_config->rgui8_non_default_intra_matrix, sizeof( ps_y262->rgui8_intra_quantiser_matrix ) ); + } + if( ps_config->b_non_default_inter_matrix ) + { + memcpy( ps_y262->rgui8_non_intra_quantiser_matrix, ps_config->rgui8_non_default_inter_matrix, sizeof( ps_y262->rgui8_non_intra_quantiser_matrix ) ); + } + + memset( ps_y262->rgui16_intra_quantizer_matrices, 0, sizeof( ps_y262->rgui16_intra_quantizer_matrices ) ); + for( i_idx = 1; i_idx < 122; i_idx++ ) + { + int32_t i_qmat_idx; + for( i_qmat_idx = 0; i_qmat_idx < 64; i_qmat_idx++ ) + { + int32_t i_qm, i_qt, i_scale; + i_qm = ps_y262->rgui8_intra_quantiser_matrix[ i_qmat_idx ]; + i_qt = i_qm * i_idx; + i_scale = MIN( ( 1 << 16 ) - 1, ( ( 1 << 20 ) + ( i_qt / 2 ) ) / i_qt ); + ps_y262->rgui16_intra_quantizer_matrices[ i_idx ][ i_qmat_idx ] = i_scale; + if( i_scale > 0 ) + { + ps_y262->rgui16_intra_quantizer_matrices_bias[ i_idx ][ i_qmat_idx ] = 0x6000 / i_scale; + ps_y262->rgui16_intra_quantizer_matrices_trellis_bias[ i_idx ][ i_qmat_idx ] = 0x8000 / i_scale; + } + else + { + ps_y262->rgui16_intra_quantizer_matrices_bias[ i_idx ][ i_qmat_idx ] = 0; + ps_y262->rgui16_intra_quantizer_matrices_trellis_bias[ i_idx ][ i_qmat_idx ] = 0; + } + } + } + + memset( ps_y262->rgui16_non_intra_quantizer_matrices, 0, sizeof( ps_y262->rgui16_non_intra_quantizer_matrices ) ); + for( i_idx = 1; i_idx < 122; i_idx++ ) + { + int32_t i_qmat_idx; + for( i_qmat_idx = 0; i_qmat_idx < 64; i_qmat_idx++ ) + { + int32_t i_qm, i_qt, i_scale; + i_qm = ps_y262->rgui8_non_intra_quantiser_matrix[ i_qmat_idx ]; + i_qt = i_qm * i_idx; + i_scale = MIN( ( 1 << 16 ) - 1, ( ( 1 << 20 ) + ( i_qt / 2 ) ) / i_qt ); + ps_y262->rgui16_non_intra_quantizer_matrices[ i_idx ][ i_qmat_idx ] = i_scale; + } + } + + + ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x16 ] = y262_sad_16x16; + ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x8 ] = y262_sad_16x8; + ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x16 ] = y262_satd_16x16; + ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x8 ] = y262_satd_16x8; + + ps_y262->s_funcs.f_ssd_16x16 = y262_ssd_16x16; + ps_y262->s_funcs.f_ssd_8x8 = y262_ssd_8x8; + + ps_y262->s_funcs.f_add_8x8 = y262_add_8x8; + ps_y262->s_funcs.f_sub_8x8 = y262_sub_8x8; + + ps_y262->s_funcs.f_variance_16x16 = y262_variance_16x16; + ps_y262->s_funcs.f_variance_8x8 = y262_variance_8x8; + + ps_y262->s_funcs.f_quant8x8_intra_fw = y262_quant8x8_intra_fw_mpeg2; + ps_y262->s_funcs.f_quant8x8_inter_fw = y262_quant8x8_inter_fw_mpeg2; + + ps_y262->s_funcs.f_fdct_8x8 = y262_fdct_c; + ps_y262->s_funcs.f_idct_8x8 = y262_idct_c; + if( 1 ) + { + ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x16 ] = y262_sad_16x16_sse2; + ps_y262->s_funcs.rgf_sad[ BLOCK_TYPE_16x8 ] = y262_sad_16x8_sse2; + ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x16 ] = y262_satd_16x16_sse2; + ps_y262->s_funcs.rgf_satd[ BLOCK_TYPE_16x8 ] = y262_satd_16x8_sse2; + + ps_y262->s_funcs.f_ssd_16x16 = y262_ssd_16x16_sse2; + ps_y262->s_funcs.f_ssd_8x8 = y262_ssd_8x8_sse2; + ps_y262->s_funcs.f_add_8x8 = y262_add_8x8_sse2; + ps_y262->s_funcs.f_sub_8x8 = y262_sub_8x8_sse2; + ps_y262->s_funcs.f_quant8x8_intra_fw = y262_quant8x8_intra_fw_sse2; + ps_y262->s_funcs.f_quant8x8_inter_fw = y262_quant8x8_inter_fw_sse2; + + ps_y262->s_funcs.f_fdct_8x8 = y262_fdct_sse2; + ps_y262->s_funcs.f_idct_8x8 = y262_idct_sse2; + } + + + memset( ps_y262->rgi_y262_motion_bits_x, 0, sizeof( ps_y262->rgi_y262_motion_bits_x ) ); + memset( ps_y262->rgi_y262_motion_bits_y, 1, sizeof( ps_y262->rgi_y262_motion_bits_y ) ); + + i_fcode_m1 = ps_y262->rgi_fcode[ 0 ][ 0 ] - 1; + ps_y262->rgi_y262_motion_bits_x[ 2048 ] = 1; + i_wrap = ( 16 << ( i_fcode_m1 + 1 ) ) + 1; + for( i_idx = 0; i_idx < 16; i_idx++ ) + { + i_mvs = rgi_y262_motion_bits[ i_idx + 1 ] + i_fcode_m1; + for( i_mv_range = ( i_idx << i_fcode_m1 ) + 1; i_mv_range <= ( ( i_idx + 1 ) << i_fcode_m1 ); i_mv_range++ ) + { + ps_y262->rgi_y262_motion_bits_x[ 2048 + i_mv_range ] = i_mvs; + ps_y262->rgi_y262_motion_bits_x[ 2048 - i_mv_range ] = i_mvs; + ps_y262->rgi_y262_motion_bits_x[ 2048 + i_wrap - i_mv_range ] = i_mvs; + ps_y262->rgi_y262_motion_bits_x[ 2048 - i_wrap + i_mv_range ] = i_mvs; + } + } + + i_fcode_m1 = ps_y262->rgi_fcode[ 0 ][ 1 ] - 1; + ps_y262->rgi_y262_motion_bits_y[ 2048 ] = 1; + i_wrap = ( 16 << ( i_fcode_m1 + 1 ) ) + 1; + for( i_idx = 0; i_idx < 16; i_idx++ ) + { + i_mvs = rgi_y262_motion_bits[ i_idx + 1 ] + i_fcode_m1; + for( i_mv_range = ( i_idx << i_fcode_m1 ) + 1; i_mv_range <= ( ( i_idx + 1 ) << i_fcode_m1 ); i_mv_range++ ) + { + ps_y262->rgi_y262_motion_bits_y[ 2048 + i_mv_range ] = i_mvs; + ps_y262->rgi_y262_motion_bits_y[ 2048 - i_mv_range ] = i_mvs; + ps_y262->rgi_y262_motion_bits_y[ 2048 + i_wrap - i_mv_range ] = i_mvs; + ps_y262->rgi_y262_motion_bits_y[ 2048 - i_wrap + i_mv_range ] = i_mvs; + } + } + + + ps_y262->b_next_reference_picture_toggle = FALSE; + for( i_idx = 0; i_idx < 3; i_idx++ ) + { + ps_y262->rgs_frame_buffer[ i_idx ].pui8_luma = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * ps_y262->i_sequence_width * ps_y262->i_sequence_height ); + ps_y262->rgs_frame_buffer[ i_idx ].i_stride_luma = ps_y262->i_sequence_width; + ps_y262->rgs_frame_buffer[ i_idx ].pui8_cb = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * ps_y262->i_sequence_chroma_width * ps_y262->i_sequence_chroma_height ); + ps_y262->rgs_frame_buffer[ i_idx ].pui8_cr = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * ps_y262->i_sequence_chroma_width * ps_y262->i_sequence_chroma_height ); + ps_y262->rgs_frame_buffer[ i_idx ].i_stride_chroma = ps_y262->i_sequence_chroma_width; + } + + for( i_idx = 0; i_idx < ps_y262->i_max_buffered_input_pictures; i_idx++ ) + { + int32_t i_lookahead_size; + int32_t i_user_idx; + ps_y262->rgs_buffered_input_pictures[ i_idx ].b_used = FALSE; + ps_y262->rgs_buffered_input_pictures[ i_idx ].pui8_luma = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * ps_y262->i_sequence_width * ps_y262->i_sequence_height ); + ps_y262->rgs_buffered_input_pictures[ i_idx ].pui8_cb = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * ps_y262->i_sequence_chroma_width * ps_y262->i_sequence_chroma_height ); + ps_y262->rgs_buffered_input_pictures[ i_idx ].pui8_cr = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * ps_y262->i_sequence_chroma_width * ps_y262->i_sequence_chroma_height ); + i_lookahead_size = ( ps_y262->i_sequence_width * ps_y262->i_sequence_height ) >> 8; + ps_y262->rgs_buffered_input_pictures[ i_idx ].ps_lookahead = ( y262_lookahead_mb_t * ) y262_alloc( sizeof( y262_lookahead_mb_t ) * i_lookahead_size ); + for( i_user_idx = 0; i_user_idx < Y262_MAX_NUM_USER_DATA; i_user_idx++ ) + { + ps_y262->rgs_buffered_input_pictures[ i_idx ].rgps_user_data[ i_user_idx ] = ( y262_user_data_t * ) y262_alloc( sizeof( y262_user_data_t ) ); + } + } + + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + int32_t i_lookahead_size_x = ps_y262->i_sequence_width >> 4; + int32_t i_lookahead_size_y = ps_y262->i_sequence_height >> 4; + ps_y262->rgpi_mbtree_references[ i_idx ] = ( int32_t *) y262_alloc( i_lookahead_size_x * i_lookahead_size_y * sizeof( int32_t ) ); + } + + y262_bitstream_init( &ps_y262->s_bitstream, 10000000 ); + + y262_init_motion_compensation( ps_y262 ); + y262_ratectrl_init( ps_y262 ); + + if( ps_config->i_profile == Y262_PROFILE_DERIVE ) + { + ps_y262->i_derived_profile = ps_y262->i_sequence_num_bframes > 0 ? Y262_PROFILE_MAIN : Y262_PROFILE_SIMPLE; + } + else + { + if( ps_config->i_profile == Y262_PROFILE_SIMPLE ) + { + if( ps_y262->i_sequence_num_bframes > 0 ) + { + return Y262_INIT_ERROR_PROFILE_BFRAMES_COUNT; + } + } + else if( ps_config->i_profile == Y262_PROFILE_MAIN ) + { + } + else + { + return Y262_INIT_ERROR_PROFILE; + } + ps_y262->i_derived_profile = ps_config->i_profile; + } + if( ps_config->i_level == Y262_LEVEL_DERIVE ) + { + int32_t i_check; + int32_t rgi_levels[ 4 ] = { Y262_LEVEL_LOW, Y262_LEVEL_MAIN, Y262_LEVEL_HIGH1440, Y262_LEVEL_HIGH }; + + for( i_check = 0; i_check < 4; i_check++ ) + { + if( y262_validate_level( ps_y262, rgi_levels[ i_check ], i_check < 3 ? TRUE : FALSE ) ) + { + ps_y262->i_derived_level = rgi_levels[ i_check ]; + break; + } + } + if( i_check == 4 ) + { + return Y262_INIT_ERROR_LEVEL_LIMITS; + } + } + else + { + if( !y262_validate_level( ps_y262, ps_config->i_level, FALSE ) ) + { + return Y262_INIT_ERROR_LEVEL_LIMITS; + } + ps_y262->i_derived_level = ps_config->i_level; + } + + if( ps_y262->b_multithreading ) + { + ps_y262->b_multithreading = y262_can_do_threads(); + } + + if( ps_y262->b_multithreading ) + { + ps_y262->p_resource_mutex = y262_create_mutex( ps_y262 ); + if( !ps_y262->p_resource_mutex ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_THREADING, ( int8_t * )"failure to create mutex for multithreading" ); + return Y262_INIT_ERROR_RESOURCE_INTERNAL; + } + } + + for( i_idx = 0; i_idx < ps_y262->i_num_slice_encoders; i_idx++ ) + { + ps_y262->rgps_slice_encoders[ i_idx ] = ( y262_t * ) y262_alloc( sizeof( y262_t ) ); + y262_bitstream_init( &ps_y262->rgps_slice_encoders[ i_idx ]->s_bitstream, 10000000 ); + if( ps_y262->b_multithreading ) + { + ps_y262->rgs_slice_threads[ i_idx ].i_slice_encoder_idx = i_idx; + ps_y262->rgs_slice_threads[ i_idx ].ps_y262 = ps_y262; + ps_y262->rgs_slice_threads[ i_idx ].p_start_event = y262_create_event( ps_y262 ); + ps_y262->rgs_slice_threads[ i_idx ].p_finished_event = y262_create_event( ps_y262 ); + if( ps_y262->rgs_slice_threads[ i_idx ].p_start_event == NULL || ps_y262->rgs_slice_threads[ i_idx ].p_finished_event == NULL ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_THREADING, ( int8_t * )"failure to create events for multithreading" ); + return Y262_INIT_ERROR_RESOURCE_INTERNAL; + } + ps_y262->rgs_slice_threads[ i_idx ].p_thread = y262_create_thread( ps_y262, y262_slice_thread, &ps_y262->rgs_slice_threads[ i_idx ] ); + if( ps_y262->rgs_slice_threads[ i_idx ].p_thread == NULL ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_THREADING, ( int8_t * )"failure to spawn slice thread for multithreading" ); + return Y262_INIT_ERROR_RESOURCE_INTERNAL; + } + } + } + + ps_y262->b_lookahead_running = FALSE; + if( ps_y262->b_multithreading ) + { + ps_y262->s_lookahead_thread.p_start_event = y262_create_event( ps_y262 ); + ps_y262->s_lookahead_thread.p_finished_event = y262_create_event( ps_y262 ); + if( ps_y262->s_lookahead_thread.p_start_event == NULL || ps_y262->s_lookahead_thread.p_finished_event == NULL ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_THREADING, ( int8_t * )"failure to create events for multithreading" ); + return Y262_INIT_ERROR_RESOURCE_INTERNAL; + } + ps_y262->s_lookahead_thread.p_thread = y262_create_thread( ps_y262, y262_main_lookahead_thread, ps_y262 ); + if( ps_y262->s_lookahead_thread.p_thread == NULL ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_THREADING, ( int8_t * )"failure to spawn slice thread for multithreading" ); + return Y262_INIT_ERROR_RESOURCE_INTERNAL; + } + + for( i_idx = 0; i_idx < ps_y262->i_num_lookahead_encoders; i_idx++ ) + { + ps_y262->rgs_lookahead_threads[ i_idx ].i_slice_encoder_idx = i_idx; + ps_y262->rgs_lookahead_threads[ i_idx ].ps_y262 = ps_y262; + ps_y262->rgs_lookahead_threads[ i_idx ].p_start_event = y262_create_event( ps_y262 ); + ps_y262->rgs_lookahead_threads[ i_idx ].p_finished_event = y262_create_event( ps_y262 ); + if( ps_y262->rgs_lookahead_threads[ i_idx ].p_start_event == NULL || ps_y262->rgs_lookahead_threads[ i_idx ].p_finished_event == NULL ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_THREADING, ( int8_t * )"failure to create events for multithreading" ); + return Y262_INIT_ERROR_RESOURCE_INTERNAL; + } + ps_y262->rgs_lookahead_threads[ i_idx ].p_thread = y262_create_thread( ps_y262, y262_lookahead_thread, &ps_y262->rgs_lookahead_threads[ i_idx ] ); + if( ps_y262->rgs_lookahead_threads[ i_idx ].p_thread == NULL ) + { + ps_y262->s_funcs.pf_error_callback( ps_y262->p_cb_handle, Y262_ERROR_THREADING, ( int8_t * )"failure to spawn slice thread for multithreading" ); + return Y262_INIT_ERROR_RESOURCE_INTERNAL; + } + } + } + + return Y262_INIT_SUCCESS; +} + +void y262_deinitialize( void *p_y262 ) +{ + int32_t i_idx; + y262_t *ps_y262; + + if( p_y262 == NULL ) + { + return; + } + + ps_y262 = ( y262_t * )p_y262; + + y262_bitstream_deinit( &ps_y262->s_bitstream ); + + for( i_idx = 0; i_idx < 3; i_idx++ ) + { + y262_dealloc( ps_y262->rgs_frame_buffer[ i_idx ].pui8_luma ); + y262_dealloc( ps_y262->rgs_frame_buffer[ i_idx ].pui8_cb ); + y262_dealloc( ps_y262->rgs_frame_buffer[ i_idx ].pui8_cr ); + } + + for( i_idx = 0; i_idx < ps_y262->i_max_buffered_input_pictures; i_idx++ ) + { + int32_t i_user_idx; + y262_dealloc( ps_y262->rgs_buffered_input_pictures[ i_idx ].pui8_luma ); + y262_dealloc( ps_y262->rgs_buffered_input_pictures[ i_idx ].pui8_cb ); + y262_dealloc( ps_y262->rgs_buffered_input_pictures[ i_idx ].pui8_cr ); + y262_dealloc( ps_y262->rgs_buffered_input_pictures[ i_idx ].ps_lookahead ); + for( i_user_idx = 0; i_user_idx < Y262_MAX_NUM_USER_DATA; i_user_idx++ ) + { + y262_dealloc( ps_y262->rgs_buffered_input_pictures[ i_idx ].rgps_user_data[ i_user_idx ] ); + } + } + + for( i_idx = 0; i_idx < 4; i_idx++ ) + { + y262_dealloc( ps_y262->rgpi_mbtree_references[ i_idx ] ); + } + + y262_ratectrl_deinit( ps_y262 ); + + if( ps_y262->b_multithreading ) + { + for( i_idx = 0; i_idx < ps_y262->i_num_slice_encoders; i_idx++ ) + { + y262_slice_thread_t *ps_slice_thread; + ps_slice_thread = &ps_y262->rgs_slice_threads[ i_idx ]; + ps_slice_thread->i_command = Y262_SLICE_THREAD_CMD_EXIT; + y262_event_set_g( ps_y262, ps_slice_thread->p_start_event ); + y262_event_wait_g( ps_y262, ps_slice_thread->p_finished_event ); + + y262_join_thread( ps_y262, ps_slice_thread->p_thread ); + y262_destroy_event( ps_y262, ps_slice_thread->p_start_event ); + y262_destroy_event( ps_y262, ps_slice_thread->p_finished_event ); + } + for( i_idx = 0; i_idx < ps_y262->i_num_lookahead_encoders; i_idx++ ) + { + y262_slice_thread_t *ps_slice_thread; + ps_slice_thread = &ps_y262->rgs_lookahead_threads[ i_idx ]; + ps_slice_thread->i_command = Y262_SLICE_THREAD_CMD_EXIT; + y262_event_set_g( ps_y262, ps_slice_thread->p_start_event ); + y262_event_wait_g( ps_y262, ps_slice_thread->p_finished_event ); + + y262_join_thread( ps_y262, ps_slice_thread->p_thread ); + y262_destroy_event( ps_y262, ps_slice_thread->p_start_event ); + y262_destroy_event( ps_y262, ps_slice_thread->p_finished_event ); + } + ps_y262->s_lookahead_thread.i_command = Y262_SLICE_THREAD_CMD_EXIT; + y262_event_set_g( ps_y262, ps_y262->s_lookahead_thread.p_start_event ); + y262_event_wait_g( ps_y262, ps_y262->s_lookahead_thread.p_finished_event ); + + y262_join_thread( ps_y262, ps_y262->s_lookahead_thread.p_thread ); + y262_destroy_event( ps_y262, ps_y262->s_lookahead_thread.p_start_event ); + y262_destroy_event( ps_y262, ps_y262->s_lookahead_thread.p_finished_event ); + + y262_destroy_mutex( ps_y262, ps_y262->p_resource_mutex ); + } + + for( i_idx = 0; i_idx < ps_y262->i_num_slice_encoders; i_idx++ ) + { + y262_bitstream_deinit( &ps_y262->rgps_slice_encoders[ i_idx ]->s_bitstream ); + y262_dealloc( ps_y262->rgps_slice_encoders[ i_idx ] ); + } + + y262_dealloc( ps_y262 ); +} + + +int32_t y262_push_input_picture( void *p_y262, y262_input_picture_t *ps_picture, int32_t i_pon ) +{ + int32_t i_buf_idx, i_idx; + y262_t *ps_y262; + + if( p_y262 == NULL ) + { + return Y262_PUSH_INPUT_ERR_ARG; + } + + ps_y262 = ( y262_t *) p_y262; + if( ps_y262->i_current_eof_pon >= 0 && ps_picture != NULL ) + { + return Y262_PUSH_INPUT_ERR_STATE; + } + + if( ps_picture ) + { + i_buf_idx = y262_get_free_input_frame_idx( ps_y262 ); + + if( ps_y262->b_multithreading ) + { + y262_mutex_lock( ps_y262, ps_y262->p_resource_mutex ); + } + + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].b_used = TRUE; + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].i_don = -1; + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].i_pon = ps_y262->i_current_input_pon++; + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].i_frame_type = -1; + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].b_progressive_frame = ps_picture->i_frame_structure != Y262_INPUT_PICTURE_FRAME_INTERLACED; + if( ps_picture->i_frame_structure != Y262_INPUT_PICTURE_FRAME_INTERLACED ) + { + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].b_repeat_first_field = ps_picture->i_frame_structure == Y262_INPUT_PICTURE_FRAME_PROGRESSIVE_REPEAT; + } + else + { + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].b_repeat_first_field = FALSE; + } + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].b_top_field_first = ( ps_y262->i_current_input_field & 1 ) ? FALSE : TRUE; + if( ps_picture->i_frame_structure == Y262_INPUT_PICTURE_FRAME_PROGRESSIVE_REPEAT ) + { + ps_y262->i_current_input_field += 3; + } + else + { + ps_y262->i_current_input_field += 2; + } + + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } + + + memcpy( ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].pui8_luma, ps_picture->pui8_luma, sizeof( uint8_t ) * ps_y262->i_sequence_width * ps_y262->i_sequence_height ); + memcpy( ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].pui8_cb, ps_picture->pui8_cb, sizeof( uint8_t ) * ps_y262->i_sequence_chroma_width * ps_y262->i_sequence_chroma_height ); + memcpy( ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].pui8_cr, ps_picture->pui8_cr, sizeof( uint8_t ) * ps_y262->i_sequence_chroma_width * ps_y262->i_sequence_chroma_height ); + + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].b_force_new_gop = ps_picture->b_start_new_gop; + + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].i_num_user_data = 0; + for( i_idx = 0; i_idx < ps_picture->i_num_user_data; i_idx++ ) + { + int32_t i_len = ps_picture->rgi_user_data_len[ i_idx ]; + memcpy( &ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].rgps_user_data[ i_idx ]->rgui8_user_data[ 0 ], ps_picture->rgpui8_user_data[ i_idx ], i_len * sizeof( uint8_t ) ); + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].rgps_user_data[ i_idx ]->i_len = i_len; + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].rgps_user_data[ i_idx ]->i_where = ps_picture->rgi_user_data_where[ i_idx ]; + ps_y262->rgs_buffered_input_pictures[ i_buf_idx ].i_num_user_data++; + } + } + else + { + ps_y262->i_current_eof_pon = ps_y262->i_current_input_pon; + } + + if( ps_y262->i_current_input_pon > 290 ) + { + ps_y262->i_current_lookahead_don = ps_y262->i_current_lookahead_don; + } + + if( ps_y262->b_lookahead_running && !ps_y262->b_flushing ) + { + if( ( ps_y262->i_current_input_pon > ps_y262->i_current_lookahead_don + ps_y262->i_sequence_num_bframes ) || + ( ps_y262->i_current_eof_pon >= 0 && ps_y262->i_current_eof_pon <= ps_y262->i_current_lookahead_don + ps_y262->i_sequence_num_bframes ) ) + { + y262_finish_lookahead( ps_y262 ); + ps_y262->b_lookahead_running = FALSE; + assert( ps_y262->i_current_lookahead_don == ps_y262->i_leading_lookahead_don ); + } + } + + if( !ps_y262->b_flushing && ( + ( ps_y262->i_current_input_pon == 1 ) || + ( ps_y262->i_current_input_pon > ps_y262->i_current_lookahead_don + ps_y262->i_sequence_num_bframes ) || + ( ( ps_y262->i_current_eof_pon >= 0 && ps_y262->i_current_eof_pon <= ps_y262->i_current_lookahead_don + ps_y262->i_sequence_num_bframes && + ps_y262->i_leading_lookahead_don < ps_y262->i_current_input_pon ) ) ) ) + { + if( !ps_y262->b_lookahead_running ) + { + y262_setup_lookahead_next_and_start_lookahead( ps_y262 ); + + ps_y262->b_lookahead_running = TRUE; + ps_y262->i_current_lookahead_don += ps_y262->i_lookahead_next_ref - ps_y262->i_lookahead_next_pon + 1; + } + else + { + assert( FALSE ); + } + } + + if( ps_y262->i_current_eof_pon == ps_y262->i_current_lookahead_don ) + { + if( ps_y262->b_lookahead_running ) + { + y262_finish_lookahead( ps_y262 ); + } + ps_y262->b_lookahead_running = FALSE; + ps_y262->i_current_eof_don = ps_y262->i_current_lookahead_don; + ps_y262->b_flushing = TRUE; + } + + if( ( ps_y262->i_current_lookahead_don - ps_y262->i_current_encoder_don > ps_y262->i_num_lookahead_pictures + ( 1 + ps_y262->i_sequence_num_bframes ) ) ) + { + y262_picture_t *ps_pic; + + ps_pic = &ps_y262->rgs_buffered_input_pictures[ y262_get_input_frame_don( ps_y262, ps_y262->i_current_encoder_don++ ) ]; + y262_encode_picture( ps_y262, ps_pic, ps_pic->i_frame_type, ps_pic->i_pon ); + if( ps_y262->b_multithreading ) + { + y262_mutex_lock( ps_y262, ps_y262->p_resource_mutex ); + } + ps_pic->b_used = FALSE; + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } + } + else if( ps_y262->b_flushing ) + { + if( ps_y262->i_current_encoder_don < ps_y262->i_current_lookahead_don ) + { + y262_picture_t *ps_pic; + + ps_pic = &ps_y262->rgs_buffered_input_pictures[ y262_get_input_frame_don( ps_y262, ps_y262->i_current_encoder_don++ ) ]; + y262_encode_picture( ps_y262, ps_pic, ps_pic->i_frame_type, ps_pic->i_pon ); + + if( ps_y262->b_multithreading ) + { + y262_mutex_lock( ps_y262, ps_y262->p_resource_mutex ); + } + ps_pic->b_used = FALSE; + if( ps_y262->b_multithreading ) + { + y262_mutex_unlock( ps_y262, ps_y262->p_resource_mutex ); + } + } + if( ps_y262->i_current_encoder_don < ps_y262->i_current_lookahead_don ) + { + return Y262_PUSH_INPUT_CONTINUE; + } + else + { + return Y262_PUSH_INPUT_FLUSHED; + } + } + else + { + int32_t i; + i = 0; + } + + return Y262_PUSH_INPUT_CONTINUE; +} + diff --git a/src/y262/y262api.h b/src/y262/y262api.h new file mode 100644 index 0000000..8a3ed76 --- /dev/null +++ b/src/y262/y262api.h @@ -0,0 +1,301 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +/* unable to allocate memory */ +#define Y262_ERROR_MEMORY 1 +/* vbv buffer failure */ +#define Y262_ERROR_BUFFER 2 +/* profile/level limit error */ +#define Y262_ERROR_PROFILELEVEL 3 +/* thread resource error */ +#define Y262_ERROR_THREADING 4 +/* +once the encoder is ( partly ) initialized it will report errors through an error callback function +*/ +typedef void ( *pf_error_callback_t ) ( void *p_handle, int32_t i_error_code, int8_t *pi8_error_text ); + + +/* structure for result callback function: */ +typedef union +{ + struct + { + int32_t i_type; /* Y262_RESULT_BITSTREAM */ + int32_t i_don; /* decode order number */ + int32_t i_pon; /* picture ( display ) order number */ + int32_t i_unit_type; /* startcode of bitstream unit */ + uint8_t *pui8_unit; /* pointer to bitstream data */ + int32_t i_unit_length; /* bitstream data length */ + } bitstream_unit; + struct + { + int32_t i_type; /* Y262_RESULT_RECON */ + int32_t i_don; /* decode order number */ + int32_t i_pon; /* picture order number */ + int32_t i_frame_type; + double f64_psnr[ 3 ]; /* psnr of the 3 planes to the src */ + uint8_t *pui8_luma; /* ptr to recon luma plane, i_coded_width * i_coded_height size */ + uint8_t *pui8_cb; /* ptr to recon cr plane, ( i_coded_width / 2 ) * ( i_coded_height / 2 ) size */ + uint8_t *pui8_cr; /* ptr to recon cb plane, ( i_coded_width / 2 ) * ( i_coded_height / 2 ) size */ + } recon; + struct + { + int32_t i_don; /* decode order number */ + uint8_t *pui8_data; /* rc sample data */ + int32_t i_data_length; /* rc sample data length */ + } rc_sample; +} y262_result_t; + +#define Y262_RESULT_BITSTREAM 1 +#define Y262_RESULT_RECON 2 +#define Y262_RESULT_RC_SAMPLE 3 +/* +when the encoder has an encoding result ( bitstream unit, recon picture or bitrate control sample for multipass ) +it will output the result through a callback function. i_result_type one of the Y262_RESULT_* defines above +*/ +typedef void ( *pf_result_callback_t ) ( void *p_handle, int32_t i_result_type, y262_result_t *ps_result ); + + +/* +if the rate control mode is set to 2nd pass the encoder will read bitrate control samples from a previous pass +through a callback function. the callback should return the bitrate control sample from the previous pass +with the same decode order number ( i_don ) by copying it to pui8_rcv_data. i_data_length is the expected size of +the rc sample data and the size of the pui8_rvc_data array. +the callback should return the size of the rc sample ( equal to i_data_length ) if such an rc sample is available. +if the previous pass did not put out an rc sample with the requested decode order number, the sequence was shorter, +the callback function should return 0, signalling end of sequence. + +the behaviour of the encoder is undefined if more frames are pushed into the encoder for encoding than rc samples were +supplied through the callback function. +*/ +typedef int32_t ( *pf_rcsample_callback_t ) ( void *p_handle, int32_t i_don, uint8_t *pui8_rcv_data, int32_t i_data_length ); + +typedef struct { + int32_t i_multithreading; /* != 0 means enable multithreading */ + int32_t i_num_threads; /* number of threads, >= 1 , <= 8 */ + + int32_t i_display_width; /* display width, for example 1920 */ + int32_t i_display_height; /* display height, for example 1080 */ + int32_t i_coded_width; /* coded width, has to be multiple of 16, for example 1920 */ + int32_t i_coded_height; /* coded height, has to be multiple of 16, for example 1088 */ +#define Y262_CHROMA_FORMAT_420 1 +#define Y262_CHROMA_FORMAT_422 2 +#define Y262_CHROMA_FORMAT_444 3 + int32_t i_coded_chroma_format; + +#define Y262_PROFILE_DERIVE 256 +#define Y262_PROFILE_SIMPLE 5 +#define Y262_PROFILE_MAIN 4 + int32_t i_profile; /* profile, one of the profile defines */ + +#define Y262_LEVEL_DERIVE 256 +#define Y262_LEVEL_LOW 10 +#define Y262_LEVEL_MAIN 8 +#define Y262_LEVEL_HIGH1440 6 +#define Y262_LEVEL_HIGH 4 + int32_t i_level; /* level, one of the level defines */ + +#define Y262_VIDEOFORMAT_PAL 0 +#define Y262_VIDEOFORMAT_NTSC 1 +#define Y262_VIDEOFORMAT_SECAM 2 +#define Y262_VIDEOFORMAT_709 3 +#define Y262_VIDEOFORMAT_UNKNOWN 4 + int32_t i_videoformat; /* video format, one of the videoformat defines */ + + int32_t i_frame_rate_code; /* framerate code, see mpeg2 spec or readme.txt for valid values */ + int32_t i_pulldown_frame_rate_code; /* framerate code to pull input up to ( field repeat has to be done by application ) */ + int32_t i_aspect_ratio_information; /* aspect ratio code, see mpeg2 spec or readme txt for valid values */ + +#define BITRATE_CONTROL_CQ 0 +#define BITRATE_CONTROL_PASS1 1 +#define BITRATE_CONTROL_PASS2 2 + int32_t i_rcmode; /* bitrate control mode, one of the bitrate control defines */ + int32_t i_bitrate; /* average bitrate, in kbps */ + int32_t i_vbv_rate; /* video buffer rate, in kbps, >= bitrate */ + int32_t i_vbv_size; /* video buffer size, in kbit */ + int32_t i_quantizer; /* quantizer for CQ mode, >= 1, <= 31 */ + + int32_t b_interlaced; /* enable interlaced modes */ + int32_t b_top_field_first; /* field order of the first picture, due to field repeating this might change per picture */ + int32_t i_lookahead_pictures; /* number of lookahead pictures, >= 10, <= 50 */ + int32_t i_bframes; /* number of b frames between reference frames, >= 0, <= 4 */ + int32_t i_keyframe_ref_distance; /* number of reference frames between keyframes */ + int32_t b_closed_gop; /* if this is != 0 then B frames directly following an I frame in a new GOP in decode order only reference the I frame */ + int32_t rgi_fcode[ 2 ]; /* fcode for motion vector range, max range is profile dependent, mpeg2: horizontal, vertical, mpeg1: all, ignored */ + + int32_t b_non_default_intra_matrix; /* enable custom intra matrix stored directly below */ + uint8_t rgui8_non_default_intra_matrix[ 64 ]; /* custom intra matrix */ + + int32_t b_non_default_inter_matrix; /* enable custom inter matrix stored directly below */ + uint8_t rgui8_non_default_inter_matrix[ 64 ]; /* custom inter matrix */ + + void *p_cb_handle; /* user pointer callbacks get called with */ + pf_error_callback_t pf_error_callback; /* user supplied error callback function */ + pf_result_callback_t pf_result_callback; /* user supplied result callback function */ + pf_rcsample_callback_t pf_rcsample_callback; /* user supplied rc sample read callback function */ + + int32_t b_variance_aq; /* if this is != 0 then variance based AQ is enabled */ + int32_t i_psyrd_strength; /* >= 0, <= 512, experimental */ + int32_t i_quality_for_speed; /* speed/quality tradeoff, negative gives more speed, positive more quality, >= -100, <= 100 */ + + int32_t b_qscale_type; /* if this is != 0 then non linear qscale will be used */ + int32_t b_mpeg1; + int32_t b_cbr_padding; /* guarantee vbvrate rate, insert zero byte stuffing if needed */ + +} y262_configuration_t; + +typedef struct { + uint8_t *pui8_luma; /* input picture luma plane, i_coded_width * i_coded_height size */ + uint8_t *pui8_cb; /* input picture cb plane, ( i_coded_width >> 1 ) * ( i_coded_height size >> 1 ) */ + uint8_t *pui8_cr; /* input picture cr plane, ( i_coded_width >> 1 ) * ( i_coded_height size >> 1 ) */ +#define Y262_INPUT_PICTURE_FRAME_PROGRESSIVE 0 +#define Y262_INPUT_PICTURE_FRAME_PROGRESSIVE_REPEAT 1 +#define Y262_INPUT_PICTURE_FRAME_INTERLACED 2 + /* PROGRESSIVE means a progressive frame */ + /* PROGRESSIVE_REPEAT means a progressive frame of which one field is to be repeated for pulldown */ + /* INTERLACED means the frame is actually to interleaved fields */ + int32_t i_frame_structure; /* one of the Y262_INPUT_PICTURE_FRAME_* defines */ + + int32_t b_start_new_gop; /* start a new gop at this picture ( force I ) */ + + int32_t i_num_user_data; /* number of user data payloads to be inserted into the stream at this frame */ +#define Y262_MAX_NUM_USER_DATA 4 + uint8_t *rgpui8_user_data[ Y262_MAX_NUM_USER_DATA ]; /* pointer to the user data payloads */ +#define Y262_MAX_USER_DATA_SIZE ( 1 << 17 ) + int32_t rgi_user_data_len[ Y262_MAX_NUM_USER_DATA ]; /* user data payloads lengths */ +#define Y262_USER_DATA_AFTER_SEQUENCE 0 +#define Y262_USER_DATA_AFTER_GOP 1 +#define Y262_USER_DATA_BEFORE_SLICES 2 + int32_t rgi_user_data_where[ Y262_MAX_NUM_USER_DATA ]; /* user data location, one of the Y262_USER_DATA_* defines */ +} y262_input_picture_t; + + +/* +these functions should be used to allocate and deallocate the planes of a y262_input_picture_t +*/ + +void *y262_alloc( size_t s_size ); +void y262_dealloc( void *p_ptr ); + +/* +creates an encoder context and initializes the configuration ps_config with some default values. +returns the encoder context. +*/ +void *y262_create( y262_configuration_t *ps_config ); + +#define Y262_INIT_SUCCESS 0 + +/* coded size not multiple of 16 or < 16, > 4096 */ +#define Y262_INIT_ERROR_CODED_SIZE -1 +/* display size > coded size */ +#define Y262_INIT_ERROR_DISPLAY_SIZE -2 +/* fr code out of range */ +#define Y262_INIT_ERROR_FRAMERATE -3 +/* pulldown fr code out of range */ +#define Y262_INIT_ERROR_PFRAMERATE -4 +/* rc mode out of range */ +#define Y262_INIT_ERROR_RCMODE -5 +/* bitrate < 20kbit */ +#define Y262_INIT_ERROR_BITRATE -6 +/* vbv rate < 20kbit */ +#define Y262_INIT_ERROR_VBVRATE -7 +/* vbv size < 20kbit */ +#define Y262_INIT_ERROR_VBVSIZE -8 +/* quantizer out of range */ +#define Y262_INIT_ERROR_QUANTIZER -9 +/* lookahead pic < 10 or > 50 */ +#define Y262_INIT_ERROR_LOOKAHEADPICS -10 +/* keyframe to ref distance < 0 */ +#define Y262_INIT_ERROR_KEYFRAME_DIST -11 +/* b frame count < 0 or > 4 */ +#define Y262_INIT_ERROR_BFRAMES_COUNT -12 +/* quality for speed < -100 or > 100 */ +#define Y262_INIT_ERROR_QUALITY_SPEED -13 +/* psyrd strength > 512 */ +#define Y262_INIT_ERROR_PSYRD_STR -14 +/* aspect ratio code out of range */ +#define Y262_INIT_ERROR_ASPECT -15 +/* thread count < 1 or > 8 */ +#define Y262_INIT_ERROR_THREADS -16 +/* video format code out of range */ +#define Y262_INIT_ERROR_VIDEO_FORMAT -17 +/* invalid profile */ +#define Y262_INIT_ERROR_PROFILE -18 +/* invalid level */ +#define Y262_INIT_ERROR_LEVEL -19 +/* invalid chroma format */ +#define Y262_INIT_ERROR_CHROMA_FORMAT -20 +/* generic mpeg1 constraint failure */ +#define Y262_INIT_ERROR_MPEG1_CONSTRAINT -21 +/* invalid chroma format for mpeg1 */ +#define Y262_INIT_ERROR_MPEG1_CHROMA_FORMAT -22 +/* mpeg1 does not support interlaced */ +#define Y262_INIT_ERROR_MPEG1_INTERLACED -23 +/* mpeg1 does not support interlaced */ +#define Y262_INIT_ERROR_MPEG1_QSCALE -24 +/* invalid fcode for mpeg1 ( only config.rgi_fcode[ 0 ] is used ) */ +#define Y262_INIT_ERROR_MPEG1_FCODE -25 +/* invalid b frame count for profile */ +#define Y262_INIT_ERROR_PROFILE_BFRAMES_COUNT -26 +/* invalid config for level maximums */ +#define Y262_INIT_ERROR_LEVEL_LIMITS -27 +/* internal resource error */ +#define Y262_INIT_ERROR_RESOURCE_INTERNAL -28 +/* invalid y262 context */ +#define Y262_INIT_ERROR_CONTEXT -29 + +/* +initializes the encoder context p_y262 with the encoding configuration ps_config. +returns Y262_INIT_SUCCESS or one of the Y262_INIT_* error codes above. +*/ +int32_t y262_initialize( void *p_y262, y262_configuration_t *ps_config ); + + +#define Y262_PUSH_INPUT_CONTINUE 0 +#define Y262_PUSH_INPUT_FLUSHED 1 +#define Y262_PUSH_INPUT_ERR_ARG -1 +#define Y262_PUSH_INPUT_ERR_STATE -2 + +/* +pushes the input picture ps_picture into the encoder context p_y262 for encoding. +if the input picture ps_picture is NULL then this signals the encoder that no more +pictures are to be encoded and that the picture queue is to be flushed. + +returns Y262_PUSH_INPUT_CONTINUE if the function can be called again ( with NULL as ps_picture if flushing ). +returns Y262_PUSH_INPUT_FLUSHED if flushing is done. +returns Y262_PUSH_INPUT_ERR_ARG if p_y262 or ps_picture is invalid. +returns Y262_PUSH_INPUT_ERR_STATE if the function was called with ps_picture as NULL before but then got supplied a non NULL ps_picture. +*/ +int32_t y262_push_input_picture( void *p_y262, y262_input_picture_t *ps_picture, int32_t i_pon ); + +/* +destroys the encoder context p_y262 and frees resources +*/ +void y262_deinitialize( void *p_y262 ); diff --git a/src/y262app/CMakeLists.txt b/src/y262app/CMakeLists.txt new file mode 100644 index 0000000..604df94 --- /dev/null +++ b/src/y262app/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.1) +project(y262app) + +if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(ARCH "_x64") +else() + set(ARCH "_x86") +endif() + +set( SRC_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/main.c +) + +add_executable(y262app ${SRC_FILES}) +target_link_libraries(y262app liby262) +set_target_properties(y262app PROPERTIES + OUTPUT_NAME "y262$<$:d>${ARCH}" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin" + POSITION_INDEPENDENT_CODE ON +) diff --git a/src/y262app/main.c b/src/y262app/main.c new file mode 100644 index 0000000..59c8ce1 --- /dev/null +++ b/src/y262app/main.c @@ -0,0 +1,926 @@ +/* +Copyright (c) 2013, Ralf Willenbacher +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#define _CRT_SECURE_NO_WARNINGS 1 + +#include +#include +#include +#include + +#ifndef TRUE +#define TRUE 1 +#endif + +#if defined( WIN32 ) || defined( WIN64 ) +#include +#include +#include +#endif + +#define DUMP_MEM_LEAKS 0 + +#if DUMP_MEM_LEAKS +#include +#endif + +#include "y262api.h" + + +FILE *f_rec, *f_out, *f_mpass_out = NULL, *f_mpass_in = NULL; +int64_t i64_bytes_num_out = 0; +int32_t i_accumulated_bytes; + +typedef struct { + int32_t i_don; + int32_t i_len; +} y262app_rcsamplehdr_t; + + +double d_num_mean_psnr; +double rgd_mean_psnr_accum[ 3 ]; + +int32_t i_next_recon_output_pon = 0; +int32_t i_buffered_output_picture_pon = -1; +int32_t i_pad_x, i_pad_y, i_width = 720, i_height = 576, i_frcode = 1; +int32_t i_chroma_width, i_chroma_height, i_coded_chroma_width, i_coded_chroma_height; + +static const int32_t rgi_framerate_code_duration[ 16 ] = { + 3600, 1001, 1000, 3600, 1001, 3000, 1800, 1001, 1500, 3600, 3600, 3600, 3600, 3600, 3600, 3600 +}; + +static const int32_t rgi_framerate_code_timescale[ 16 ] = { + 90000, 24000, 24000, 90000, 30000, 90000, 90000, 60000, 90000, 90000, 90000, 90000, 90000, 90000, 90000, 90000 +}; + +double d_picture_duration, d_timescale; + +y262_input_picture_t s_buffered_output_picture; + +y262_configuration_t s_config; + +void y262app_error_cb( void *p_handle, int32_t i_code, int8_t *pi8_msg ) +{ + fprintf( stderr, "error %d: '%s'\n" , i_code, pi8_msg ); +} + +void y262app_result_cb( void *p_handle, int32_t i_result_type, y262_result_t *ps_result ) +{ + if( i_result_type == Y262_RESULT_BITSTREAM ) + { + /*fprintf( stderr, "result %d ( p %d d %d )\n", i_result_type, ps_result->bitstream_unit.i_pon, ps_result->bitstream_unit.i_don );*/ + i64_bytes_num_out += ps_result->bitstream_unit.i_unit_length; + i_accumulated_bytes += ps_result->bitstream_unit.i_unit_length; + if( f_out != NULL ) + { + fwrite( ps_result->bitstream_unit.pui8_unit, sizeof( uint8_t ), ps_result->bitstream_unit.i_unit_length, f_out ); + } + } + else if( i_result_type == Y262_RESULT_RECON ) + { + /*fprintf( stderr, "result %d ( p %d d %d )\n", i_result_type, ps_result->recon.i_pon, ps_result->recon.i_don );*/ + rgd_mean_psnr_accum[ 0 ] += ps_result->recon.f64_psnr[ 0 ]; + rgd_mean_psnr_accum[ 1 ] += ps_result->recon.f64_psnr[ 1 ]; + rgd_mean_psnr_accum[ 2 ] += ps_result->recon.f64_psnr[ 2 ]; + d_num_mean_psnr += 1.0; + + fprintf( stderr, "%04d: T:%c Y=%.2f CB=%.2f CR=%.2f sz=%d\n", ps_result->recon.i_pon, + ps_result->recon.i_frame_type == 1 ? 'I' : ps_result->recon.i_frame_type == 2 ? 'P' : 'B', + ps_result->recon.f64_psnr[ 0 ], ps_result->recon.f64_psnr[ 1 ], ps_result->recon.f64_psnr[ 2 ], i_accumulated_bytes ); + fflush( stderr ); + i_accumulated_bytes = 0; + + if( f_rec ) + { + if( ps_result->recon.i_pon == i_next_recon_output_pon ) + { + fwrite( ps_result->recon.pui8_luma, s_config.i_coded_width * s_config.i_coded_height, 1, f_rec ); + fwrite( ps_result->recon.pui8_cb, i_coded_chroma_width * i_coded_chroma_height, 1, f_rec ); + fwrite( ps_result->recon.pui8_cr, i_coded_chroma_width * i_coded_chroma_height, 1, f_rec ); + i_next_recon_output_pon++; + } + else + { + memcpy( s_buffered_output_picture.pui8_luma, ps_result->recon.pui8_luma, s_config.i_coded_width * s_config.i_coded_height ); + memcpy( s_buffered_output_picture.pui8_cb, ps_result->recon.pui8_cb, i_coded_chroma_width * i_coded_chroma_height ); + memcpy( s_buffered_output_picture.pui8_cr, ps_result->recon.pui8_cr, i_coded_chroma_width * i_coded_chroma_height ); + i_buffered_output_picture_pon = ps_result->recon.i_pon; + } + if( i_buffered_output_picture_pon == i_next_recon_output_pon ) + { + fwrite( s_buffered_output_picture.pui8_luma, s_config.i_coded_width * s_config.i_coded_height, 1, f_rec ); + fwrite( s_buffered_output_picture.pui8_cb, i_coded_chroma_width * i_coded_chroma_height, 1, f_rec ); + fwrite( s_buffered_output_picture.pui8_cr, i_coded_chroma_width * i_coded_chroma_height, 1, f_rec ); + i_next_recon_output_pon++; + i_buffered_output_picture_pon = -1; + } + } + } + else if( i_result_type == Y262_RESULT_RC_SAMPLE && f_mpass_out != NULL ) + { + y262app_rcsamplehdr_t s_sample; + /*fprintf( stderr, "result %d ( d %d )\n", i_result_type, ps_result->rc_sample.i_don );*/ + s_sample.i_don = ps_result->rc_sample.i_don; + s_sample.i_len = ps_result->rc_sample.i_data_length; + fwrite( &s_sample, sizeof( s_sample ), 1, f_mpass_out ); + fwrite( ps_result->rc_sample.pui8_data, s_sample.i_len, 1, f_mpass_out ); + } +} + + +int32_t y262app_rcsample_cb( void *p_handle, int32_t i_don, uint8_t *pui8_data, int32_t i_data_length ) +{ + if( f_mpass_in ) + { + int32_t i_seeked = 0; + int32_t i_ret; + y262app_rcsamplehdr_t s_sample; + + while( 1 ) + { + i_ret = fread( &s_sample, sizeof( s_sample ), 1, f_mpass_in ); + if( i_ret == 1 ) + { + if( i_data_length == s_sample.i_len ) + { + fread( pui8_data, s_sample.i_len, 1, f_mpass_in ); + } + else + { + break; /* version missmatch ? */ + } + if( s_sample.i_don == i_don && i_data_length == s_sample.i_len ) + { + return s_sample.i_len; + } + else if( s_sample.i_don > i_don ) + { + if( !i_seeked ) + { + fseek( f_mpass_in, SEEK_SET, 0 ); + i_seeked = 1; + } + else + { + break; + } + } + } + else + { + if( !i_seeked ) + { + fseek( f_mpass_in, SEEK_SET, 0 ); + i_seeked = 1; + } + else + { + break; + } + } + } + } + return 0; +} + + +void usage( ) +{ + fprintf( stderr, "y262 usage:\n"); + fprintf( stderr, "\ty262app -in <420yuv> -size -out \n\n"); + fprintf( stderr, "\t-frames : number of frames to encode, 0 for all\n"); + fprintf( stderr, "\t-threads : threading enabled and number of concurrent slices\n" ); + fprintf( stderr, "\t-profile : simple or main profile\n"); + fprintf( stderr, "\t-level : low main high1440 or high level\n"); + fprintf( stderr, "\t-chromaf : chroma format, 420, 422 or 444\n"); + fprintf( stderr, "\t-rec : write reconstructed frames to \n"); + fprintf( stderr, "\t-rcmode : 0 = CQ, 1 = 1st pass, 2 = subsequent pass\n"); + fprintf( stderr, "\t-mpin : stats file of previous pass\n"); + fprintf( stderr, "\t-mpout : output stats file of current pass\n"); + fprintf( stderr, "\t-bitrate : average bitrate\n"); + fprintf( stderr, "\t-vbvrate : maximum bitrate\n"); + fprintf( stderr, "\t-vbv : video buffer size\n"); + fprintf( stderr, "\t-quant : quantizer for CQ\n"); + fprintf( stderr, "\t-interlaced : enable field macroblock modes\n"); + fprintf( stderr, "\t-bff : first input frame is bottom field first\n"); + fprintf( stderr, "\t-pulldown_frcode :frame rate code to pull input up to\n"); + fprintf( stderr, "\t-quality : encoder complexity, negative faster, positive slower\n"); + fprintf( stderr, "\t-frcode : frame rate code, see mpeg2 spec\n"); + fprintf( stderr, "\t-arinfo : aspect ratio information, see mpeg2 spec\n"); + fprintf( stderr, "\t-qscale0 : use more linear qscale type\n"); + fprintf( stderr, "\t-nump : number of p frames between i frames\n"); + fprintf( stderr, "\t-numb : number of b frames between i/p frames\n"); + fprintf( stderr, "\t-closedgop : bframes after i frames use only backwards prediction\n"); + fprintf( stderr, "\t-noaq : disable variance based quantizer modulation\n"); + fprintf( stderr, "\t-psyrd : psy rd strength\n"); + fprintf( stderr, "\t-avamat6 : use avamat6 quantization matrices\n"); + fprintf( stderr, "\t-flatmat : use flat quantization matrices \n"); + fprintf( stderr, "\t-intramat : use the 64 numbers in the file as intra matrix\n"); + fprintf( stderr, "\t-intermat : use the 64 numbers in the file as inter matrix\n"); + fprintf( stderr, "\t-videoformat : pal, secam, ntsc, 709 or unknown \n"); + fprintf( stderr, "\t-mpeg1 : output mpeg1 instead mpeg2, constraints apply\n" ); +} + +int32_t read_mat( uint8_t *pui8_filename, uint8_t *pui8_mat ) +{ + FILE *f_in; + char rgc_number[ 0x200 ]; + int32_t i_midx, i_nidx, i_r, i_ret; + + f_in = fopen( ( const char *)pui8_filename, "rt" ); + + if( f_in == NULL ) + { + fprintf( stderr, "error reading quant matrix, cannot open file '%s'\n", pui8_filename ); + return -1; + } + + i_ret = -1; + i_midx = i_nidx = 0; + while( 1 ) + { + i_r = fgetc( f_in ); + if( i_r < '0' || i_r > '9' ) + { + if( !( i_r < 0 ) && i_r != '\n' && i_r != '\t' && i_r != '\r' && i_r != ' ' ) + { + fprintf( stderr, "error reading quant matrix, invalid char '%c'\n", i_r ); + goto err; + } + if( i_nidx > 0 && i_midx < 64 ) + { + /* flush number */ + rgc_number[ i_nidx ] = 0; + i_r = atoi( rgc_number ); + if( i_r < 8 || i_r > 255 ) + { + fprintf( stderr, "error reading quant matrix %s, value %d is outside 8-255 range\n", pui8_filename, i_r ); + goto err; + } + pui8_mat[ i_midx++ ] = ( uint8_t )i_r; + i_nidx = 0; + } + else if( i_nidx > 0 && i_midx >= 64 ) + { + fprintf( stderr, "error reading quant matrix %s, more than 64 values\n", pui8_filename ); + goto err; + } + if( i_r < 0 ) + { + break; + } + } + else + { + if( i_nidx >= 0x200 ) + { + fprintf( stderr, "error reading quant matrix %s, please just check whats in the file\n", pui8_filename ); + goto err; + } + rgc_number[ i_nidx++ ] = ( char )i_r; + } + } + if( i_midx == 64 ) + { + i_ret = 0; + } + else + { + fprintf( stderr, "error reading quant matrix %s, file does not contain 64 numbers\n", pui8_filename ); + } +err: + fclose( f_in ); + return i_ret; +} + +int32_t main( int32_t i_argc, char *rgpi8_argv[] ) +{ + FILE *f_in; + int32_t i_idx, i_ret, i_input_picture_duration, i_input_timescale, i_sequence_picture_duration, i_sequence_timescale, i_field_count, i_frames; + int64_t i64_input_frac_ticks, i64_input_frac_picture_duration; + + void *p_y262; + int8_t *pi8_infile = NULL, *pi8_outfile = NULL, *pi8_reconfile = NULL, *pi8_mp_in = NULL, *pi8_mp_out = NULL; + uint8_t rgui8_avamat6_intra[ 64 ] = { 8,16,19,22,26,27,29,34,16,16,22,24,27,29,34,35,19,22,26,27,29,34,35,38,22,22,26,27,29,34,35,40,22,26,27,29,32,35,40,48,26,27,29,32,35,40,48,50,26,27,29,35,40,48,50,60,27,29,35,40,48,50,60,62}; + uint8_t rgui8_avamat6_inter[ 64 ] = { 16,20,24,28,32,36,40,44,20,24,28,32,36,40,44,48,24,28,32,36,40,44,48,52,28,32,36,40,44,48,52,56,32,36,40,44,48,52,56,58,36,40,44,48,52,56,58,60,40,44,48,52,56,58,60,62,44,48,52,56,58,60,62,62}; + uint8_t rgui8_flatmat_intra[ 64 ] = { 8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16}; + uint8_t rgui8_flatmat_inter[ 64 ] = { 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16}; + + y262_input_picture_t s_picture; + + i_frames = 0; + + p_y262 = y262_create( &s_config ); + + s_config.b_top_field_first = 1; + s_config.i_pulldown_frame_rate_code = 0; + s_config.b_qscale_type = 1; + + for( i_idx = 1; i_idx < i_argc; i_idx++ ) + { + if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-in" ) == 0 ) + { + pi8_infile = rgpi8_argv[ ++i_idx ]; + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "-frames" ) == 0 ) + { + i_frames = atoi( ( char * ) rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "-size" ) == 0 ) + { + i_width = atoi( ( char * ) rgpi8_argv[ ++i_idx ] ); + i_height = atoi( ( char * ) rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-threads" ) == 0 ) + { + s_config.i_multithreading = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + s_config.i_num_threads = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-profile" ) == 0 ) + { + i_idx++; + if( strcmp( ( char *)rgpi8_argv[ i_idx ], "simple" ) == 0 ) + { + s_config.i_profile = Y262_PROFILE_SIMPLE; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "main" ) == 0 ) + { + s_config.i_profile = Y262_PROFILE_MAIN; + } + else + { + fprintf( stderr, "unknown profile specified on commandline\n"); + } + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "-level" ) == 0 ) + { + i_idx++; + if( strcmp( ( char *)rgpi8_argv[ i_idx ], "low" ) == 0 ) + { + s_config.i_level = Y262_LEVEL_LOW; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "main" ) == 0 ) + { + s_config.i_level = Y262_LEVEL_MAIN; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "high1440" ) == 0 ) + { + s_config.i_level = Y262_LEVEL_HIGH1440; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "high" ) == 0 ) + { + s_config.i_level = Y262_LEVEL_HIGH; + } + else + { + fprintf( stderr, "unknown level specified on commandline\n" ); + return -1; + } + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "-chromaf" ) == 0 ) + { + i_idx++; + if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "420" ) == 0 ) + { + s_config.i_coded_chroma_format = Y262_CHROMA_FORMAT_420; + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "422" ) == 0 ) + { + s_config.i_coded_chroma_format = Y262_CHROMA_FORMAT_422; + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "444" ) == 0 ) + { + s_config.i_coded_chroma_format = Y262_CHROMA_FORMAT_444; + } + else + { + fprintf( stderr, "unknown chroma format specified on commandline\n" ); + return -1; + } + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-out" ) == 0 ) + { + pi8_outfile = rgpi8_argv[ ++i_idx ]; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-rec" ) == 0 ) + { + pi8_reconfile = rgpi8_argv[ ++i_idx ]; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-mpin" ) == 0 ) + { + pi8_mp_in = rgpi8_argv[ ++i_idx ]; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-mpout" ) == 0 ) + { + pi8_mp_out = rgpi8_argv[ ++i_idx ]; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-bitrate" ) == 0 ) + { + s_config.i_bitrate = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-vbvrate" ) == 0 ) + { + s_config.i_vbv_rate = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-vbv" ) == 0 ) + { + s_config.i_vbv_size = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-rcmode" ) == 0 ) + { + s_config.i_rcmode = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-quant" ) == 0 ) + { + s_config.i_quantizer = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-interlaced" ) == 0 ) + { + s_config.b_interlaced = 1; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-bff" ) == 0 ) + { + s_config.b_top_field_first = 0; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-pulldown_frcode" ) == 0 ) + { + s_config.i_pulldown_frame_rate_code = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-quality" ) == 0 ) + { + s_config.i_quality_for_speed = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-frcode" ) == 0 ) + { + s_config.i_frame_rate_code = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-arinfo" ) == 0 ) + { + s_config.i_aspect_ratio_information = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "-qscale0" ) == 0 ) + { + s_config.b_qscale_type = 0; + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "-nump" ) == 0 ) + { + s_config.i_keyframe_ref_distance = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "-numb" ) == 0 ) + { + s_config.i_bframes = atoi( ( char * ) rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "-closedgop" ) == 0 ) + { + s_config.b_closed_gop = TRUE; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-noaq" ) == 0 ) + { + s_config.b_variance_aq = 0; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-psyrd" ) == 0 ) + { + s_config.i_psyrd_strength = atoi( ( char *)rgpi8_argv[ ++i_idx ] ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-avamat6" ) == 0 ) + { + s_config.b_non_default_intra_matrix = 1; + memcpy( &s_config.rgui8_non_default_intra_matrix, rgui8_avamat6_intra, sizeof( rgui8_avamat6_intra ) ); + s_config.b_non_default_inter_matrix = 1; + memcpy( &s_config.rgui8_non_default_inter_matrix, rgui8_avamat6_inter, sizeof( rgui8_avamat6_inter ) ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-flatmat" ) == 0 ) + { + s_config.b_non_default_intra_matrix = 1; + memcpy( &s_config.rgui8_non_default_intra_matrix, rgui8_flatmat_intra, sizeof( rgui8_flatmat_intra ) ); + s_config.b_non_default_inter_matrix = 1; + memcpy( &s_config.rgui8_non_default_inter_matrix, rgui8_flatmat_inter, sizeof( rgui8_flatmat_inter ) ); + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-intramat" ) == 0 ) + { + s_config.b_non_default_intra_matrix = 1; + if( read_mat( ( uint8_t * )rgpi8_argv[ ++i_idx ], &s_config.rgui8_non_default_intra_matrix[ 0 ] ) < 0 ) + { + return -1; + } + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-intermat" ) == 0 ) + { + s_config.b_non_default_inter_matrix = 1; + if( read_mat( ( uint8_t * )rgpi8_argv[ ++i_idx ], &s_config.rgui8_non_default_inter_matrix[ 0 ] ) < 0 ) + { + return -1; + } + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "-videoformat" ) == 0 ) + { + i_idx++; + if( strcmp( ( char *)rgpi8_argv[ i_idx ], "pal" ) == 0 ) + { + s_config.i_videoformat = Y262_VIDEOFORMAT_PAL; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "secam" ) == 0 ) + { + s_config.i_videoformat = Y262_VIDEOFORMAT_SECAM; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "ntsc" ) == 0 ) + { + s_config.i_videoformat = Y262_VIDEOFORMAT_NTSC; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "709" ) == 0 ) + { + s_config.i_videoformat = Y262_VIDEOFORMAT_709; + } + else if( strcmp( ( char *)rgpi8_argv[ i_idx ], "unknown" ) == 0 ) + { + s_config.i_videoformat = Y262_VIDEOFORMAT_UNKNOWN; + } + else + { + fprintf( stderr, "unknown video format specified on commandline\n"); + return -1; + } + } + else if( strcmp( ( char* ) rgpi8_argv[ i_idx ], "-mpeg1" ) == 0 ) + { + s_config.b_mpeg1 = 1; + } + else if( strcmp( ( char * ) rgpi8_argv[ i_idx ], "-cbr" ) == 0 ) + { + s_config.b_cbr_padding = 1; + } + else + { + fprintf( stderr, "unknown commandline argument '%s'\n", rgpi8_argv[ i_idx ] ); + return -1; + } + } + + if( i_argc < 2 || !pi8_infile ) + { + usage(); + return -1; + } + + + s_config.pf_error_callback = y262app_error_cb; + s_config.pf_result_callback = y262app_result_cb; + s_config.pf_rcsample_callback = y262app_rcsample_cb; + + if( pi8_infile ) + { + if( strcmp( ( char * ) pi8_infile, "-" ) == 0 ) + { +#ifdef WIN32 + _setmode( _fileno( stdin ), _O_BINARY ); +#else + stdin = freopen( NULL, "rb", stdin ); +#endif + f_in = stdin; + } + else + { + f_in = fopen( ( char *)pi8_infile, "rb" ); + } + if( !f_in ) + { + fprintf( stderr, "could not open input file\n"); + return -1; + } + } + else + { + fprintf( stderr, "need -in commandline argument\n"); + } + + if( pi8_outfile ) + { + f_out = fopen( ( char *)pi8_outfile, "wb" ); + if( !f_out ) + { + return -1; + } + } + else + { + f_out = NULL; + fprintf( stderr, "no -out commandline argument found, no output\n"); + } + + if( pi8_reconfile ) + { + f_rec = fopen( ( char *)pi8_reconfile, "wb" ); + if( !f_rec ) + { + return -1; + } + } + else + { + f_rec = NULL; + fprintf( stderr, "no -rec commandline argument found, no recon output\n"); + } + + if( pi8_mp_in ) + { + f_mpass_in = fopen( ( char *)pi8_mp_in, "rb" ); + if( !f_mpass_in ) + { + fprintf( stderr, "could not open mpass in file %s\n", ( char *)pi8_mp_in ); + return -1; + } + } + else + { + f_mpass_in = NULL; + if( s_config.i_rcmode == 2 ) + { + fprintf( stderr, "no multipass input file ( -mpin ) but -rcmode is 2\n"); + return -1; + } + } + + if( pi8_mp_out ) + { + f_mpass_out = fopen( ( char *)pi8_mp_out, "wb" ); + if( !f_mpass_out ) + { + fprintf( stderr, "could not open mpass out file %s\n", ( char *)pi8_mp_out ); + return -1; + } + } + else + { + f_mpass_out = NULL; + } + + if( s_config.i_pulldown_frame_rate_code == 0 ) + { + s_config.i_pulldown_frame_rate_code = s_config.i_frame_rate_code; + } + if( s_config.i_pulldown_frame_rate_code < s_config.i_frame_rate_code ) + { + fprintf( stderr, "error: pulldown frame rate lower than frame rate\n"); + return -1; + } + + i_frcode = s_config.i_frame_rate_code; + i_input_picture_duration = rgi_framerate_code_duration[ i_frcode ]; + i_input_timescale = rgi_framerate_code_timescale[ i_frcode ]; + + i_frcode = s_config.i_pulldown_frame_rate_code; + d_picture_duration = i_sequence_picture_duration = rgi_framerate_code_duration[ i_frcode ]; + d_timescale = i_sequence_timescale = rgi_framerate_code_timescale[ i_frcode ]; + + + s_config.i_display_width = i_width; + s_config.i_display_height = i_height; + i_pad_x = ( ( ( i_width + 15 ) / 16 ) * 16 ) - i_width; + i_pad_y = ( ( ( i_height + 15 ) / 16 ) * 16 ) - i_height; + + s_config.i_coded_width = i_width + i_pad_x; + s_config.i_coded_height = i_height + i_pad_y; + switch( s_config.i_coded_chroma_format ) + { + case Y262_CHROMA_FORMAT_420: + i_coded_chroma_width = s_config.i_coded_width >> 1; + i_coded_chroma_height = s_config.i_coded_height >> 1; + i_chroma_width = i_width >> 1; + i_chroma_height = i_height >> 1; + break; + case Y262_CHROMA_FORMAT_422: + i_coded_chroma_width = s_config.i_coded_width >> 1; + i_coded_chroma_height = s_config.i_coded_height; + i_chroma_width = i_width >> 1; + i_chroma_height = i_height; + break; + case Y262_CHROMA_FORMAT_444: + i_coded_chroma_width = s_config.i_coded_width; + i_coded_chroma_height = s_config.i_coded_height; + i_chroma_width = i_width; + i_chroma_height = i_height; + break; + } + + s_picture.pui8_luma = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * s_config.i_coded_width * s_config.i_coded_height ); + s_picture.pui8_cb = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * i_coded_chroma_width * i_coded_chroma_height ); + s_picture.pui8_cr = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * i_coded_chroma_width * i_coded_chroma_height ); + + i_buffered_output_picture_pon = -1; + s_buffered_output_picture.pui8_luma = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * s_config.i_coded_width * s_config.i_coded_height ); + s_buffered_output_picture.pui8_cb = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * i_coded_chroma_width * i_coded_chroma_height ); + s_buffered_output_picture.pui8_cr = ( uint8_t * ) y262_alloc( sizeof( uint8_t ) * i_coded_chroma_width * i_coded_chroma_height ); + + memset( s_picture.pui8_luma, 0, sizeof( uint8_t ) * s_config.i_coded_width * s_config.i_coded_height ); + memset( s_picture.pui8_cb, 0, sizeof( uint8_t ) * i_coded_chroma_width * i_coded_chroma_height ); + memset( s_picture.pui8_cr, 0, sizeof( uint8_t ) * i_coded_chroma_width * i_coded_chroma_height ); + + memset( s_buffered_output_picture.pui8_luma, 0, sizeof( uint8_t ) * s_config.i_coded_width * s_config.i_coded_height ); + memset( s_buffered_output_picture.pui8_cb, 0, sizeof( uint8_t ) * i_coded_chroma_width * i_coded_chroma_height ); + memset( s_buffered_output_picture.pui8_cr, 0, sizeof( uint8_t ) * i_coded_chroma_width * i_coded_chroma_height ); + + i_ret = y262_initialize( p_y262, &s_config ); + if( i_ret != Y262_INIT_SUCCESS ) + { + fprintf( stderr, "y262 init failure ( errc: %d ), exiting\n", i_ret ); + return 1; + } + else + { + fprintf( stderr, "y262 init ok, %dx%d @ %f fps\n", i_width, i_height, d_timescale / d_picture_duration ); + } + + i64_input_frac_ticks = 0; + + i_field_count = i_idx = 0; + while( 1 ) + { + uint8_t rgui8_user_data[ 100 ]; + + if( i_pad_x != 0 ) + { + int32_t i_y; + + for( i_y = 0; i_y < i_height; i_y++ ) + { + i_ret = fread( s_picture.pui8_luma + i_y * s_config.i_coded_width, sizeof( uint8_t ) * i_width, 1, f_in ); + } + for( i_y = 0; i_y < i_chroma_height; i_y++ ) + { + i_ret = fread( s_picture.pui8_cb + i_y * i_coded_chroma_width, sizeof( uint8_t ) * i_chroma_width, 1, f_in ); + } + for( i_y = 0; i_y < i_chroma_height; i_y++ ) + { + i_ret = fread( s_picture.pui8_cr + i_y * i_coded_chroma_width, sizeof( uint8_t ) * i_chroma_width, 1, f_in ); + } + } + else + { + i_ret = fread( s_picture.pui8_luma, sizeof( uint8_t ) * s_config.i_coded_width * i_height, 1, f_in ); + i_ret = fread( s_picture.pui8_cb, sizeof( uint8_t ) * i_coded_chroma_width * i_chroma_height, 1, f_in ); + i_ret = fread( s_picture.pui8_cr, sizeof( uint8_t ) * i_coded_chroma_width * i_chroma_height, 1, f_in ); + } + if( i_pad_y != 0 ) + { + int32_t i_y; + for( i_y = i_height; i_y < s_config.i_coded_height; i_y++ ) + { + memset( s_picture.pui8_luma + i_y * s_config.i_coded_width, 0, sizeof( uint8_t ) * s_config.i_coded_width ); + } + for( i_y = i_chroma_height; i_y < i_coded_chroma_height; i_y++ ) + { + memset( s_picture.pui8_cb + i_y * i_coded_chroma_width, 128, sizeof( uint8_t ) * i_coded_chroma_width ); + memset( s_picture.pui8_cr + i_y * i_coded_chroma_width, 128, sizeof( uint8_t ) * i_coded_chroma_width ); + } + } + if( i_ret <= 0 ) + { + break; + } + + if( s_config.i_frame_rate_code != s_config.i_pulldown_frame_rate_code ) + { + int64_t i64_tinput_picture_duration, i64_tsequence_picture_duration; + + i64_tinput_picture_duration = ( i_input_picture_duration * i_sequence_timescale ); + i64_tsequence_picture_duration = ( i_sequence_picture_duration * i_input_timescale ); + i64_input_frac_picture_duration = i64_tsequence_picture_duration - i64_tinput_picture_duration; + i64_tinput_picture_duration *= 2; + i64_tsequence_picture_duration *= 2; + i64_input_frac_picture_duration *= 2; + + i64_input_frac_ticks += i64_tsequence_picture_duration; + + if( i64_input_frac_ticks < i64_tinput_picture_duration ) + { + i64_input_frac_ticks += i64_tsequence_picture_duration / 2; + if( i64_input_frac_ticks < i64_tinput_picture_duration ) + { + fprintf( stderr, "error: pulldown frame rate too high, ran out of repeatable fields\n"); + return -1; + } + else + { + i64_input_frac_ticks -= i64_tinput_picture_duration; + } + s_picture.i_frame_structure = Y262_INPUT_PICTURE_FRAME_PROGRESSIVE_REPEAT; + i_field_count += 3; + } + else + { + i64_input_frac_ticks -= i64_tinput_picture_duration; + s_picture.i_frame_structure = Y262_INPUT_PICTURE_FRAME_PROGRESSIVE; + i_field_count += 2; + } + } + else + { + if( s_config.b_interlaced ) + { + s_picture.i_frame_structure = Y262_INPUT_PICTURE_FRAME_INTERLACED; + } + else + { + s_picture.i_frame_structure = Y262_INPUT_PICTURE_FRAME_PROGRESSIVE; + } + i_field_count += 2; + } + + s_picture.i_num_user_data = 0; + s_picture.b_start_new_gop = 0; + + /* + if( i_idx == 50 ) + { + s_picture.b_start_new_gop = 1; + s_picture.i_num_user_data = 1; + s_picture.rgi_user_data_len[ 0 ] = 100; + memset( rgui8_user_data, 0xef, 100 ); + s_picture.rgpui8_user_data[ 0 ] = rgui8_user_data; + s_picture.rgi_user_data_where[ 0 ] = Y262_USER_DATA_BEFORE_SLICES; + } + */ + + if( y262_push_input_picture( p_y262, &s_picture, i_idx ) != Y262_PUSH_INPUT_CONTINUE ) + { + fprintf( stderr, "push input picture: some error\n"); + break; + } + i_idx++; + if( i_idx == i_frames && i_frames > 0 ) + { + break; + } + } + + do + { + i_ret = y262_push_input_picture( p_y262, NULL, i_idx ); + if( i_ret == Y262_PUSH_INPUT_FLUSHED ) + { + break; + } + } while( i_ret == Y262_PUSH_INPUT_CONTINUE ); + + if( i_ret != Y262_PUSH_INPUT_FLUSHED ) + { + fprintf( stderr, "push input picture NULL: some error while flushing\n" ); + } + + y262_deinitialize( p_y262 ); + + y262_dealloc( s_picture.pui8_luma ); + y262_dealloc( s_picture.pui8_cb ); + y262_dealloc( s_picture.pui8_cr ); + y262_dealloc( s_buffered_output_picture.pui8_luma ); + y262_dealloc( s_buffered_output_picture.pui8_cb ); + y262_dealloc( s_buffered_output_picture.pui8_cr ); + + fprintf( stderr, "over %d frames:\n", i_idx ); + fprintf( stderr, "mean psnr: Y=%.2f CB=%.2f CR=%.2f\n", rgd_mean_psnr_accum[ 0 ] / d_num_mean_psnr, rgd_mean_psnr_accum[ 1 ] / d_num_mean_psnr, rgd_mean_psnr_accum[ 2 ] / d_num_mean_psnr ); + fprintf( stderr, "%.0f bytes out, %.2f kbit/sec @ %.4f fps\n", ( double )i64_bytes_num_out, ( ( ( ( ( double )i64_bytes_num_out ) * 8 ) / ( double )i_field_count ) * ( d_timescale / ( d_picture_duration / 2.0 ) ) ) / 1000.0, d_timescale / d_picture_duration ); + fflush( stderr ); + +#if DUMP_MEM_LEAKS + _CrtDumpMemoryLeaks( ); +#endif + + if( strcmp( ( char * ) pi8_infile, "-" ) != 0 ) + { + fclose( f_in ); + } + + return 0; +} + + + + +