Update to 7.3.0 (2195)

This commit is contained in:
DrKLO 2020-12-23 11:48:30 +04:00
parent 5a47056c7b
commit d52b2c921a
4526 changed files with 73002 additions and 104030 deletions

View file

@ -2,7 +2,7 @@ FROM gradle:6.5.0-jdk8
ENV ANDROID_SDK_URL https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip ENV ANDROID_SDK_URL https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip
ENV ANDROID_API_LEVEL android-30 ENV ANDROID_API_LEVEL android-30
ENV ANDROID_BUILD_TOOLS_VERSION 30.0.2 ENV ANDROID_BUILD_TOOLS_VERSION 30.0.3
ENV ANDROID_HOME /usr/local/android-sdk-linux ENV ANDROID_HOME /usr/local/android-sdk-linux
ENV ANDROID_NDK_VERSION 21.1.6352462 ENV ANDROID_NDK_VERSION 21.1.6352462
ENV ANDROID_VERSION 30 ENV ANDROID_VERSION 30

View file

@ -12,24 +12,25 @@ configurations {
configurations.all { configurations.all {
exclude group: 'com.google.firebase', module: 'firebase-core' exclude group: 'com.google.firebase', module: 'firebase-core'
exclude group: 'androidx.recyclerview', module: 'recyclerview'
} }
dependencies { dependencies {
implementation 'androidx.core:core:1.3.2' implementation 'androidx.core:core:1.3.2'
implementation 'androidx.palette:palette:1.0.0' implementation 'androidx.palette:palette:1.0.0'
implementation 'androidx.exifinterface:exifinterface:1.3.1' implementation 'androidx.exifinterface:exifinterface:1.3.2'
implementation 'androidx.dynamicanimation:dynamicanimation:1.0.0' implementation 'androidx.dynamicanimation:dynamicanimation:1.0.0'
implementation 'androidx.multidex:multidex:2.0.1' implementation 'androidx.multidex:multidex:2.0.1'
implementation "androidx.sharetarget:sharetarget:1.0.0" implementation "androidx.sharetarget:sharetarget:1.0.0"
compileOnly 'org.checkerframework:checker-qual:2.5.2' compileOnly 'org.checkerframework:checker-qual:2.5.2'
compileOnly 'org.checkerframework:checker-compat-qual:2.5.0' compileOnly 'org.checkerframework:checker-compat-qual:2.5.0'
implementation 'com.google.firebase:firebase-messaging:20.3.0' implementation 'com.google.firebase:firebase-messaging:21.0.1'
implementation 'com.google.firebase:firebase-config:19.2.0' implementation 'com.google.firebase:firebase-config:20.0.2'
implementation 'com.google.firebase:firebase-datatransport:17.0.8' implementation 'com.google.firebase:firebase-datatransport:17.0.10'
implementation 'com.google.firebase:firebase-appindexing:19.1.0' implementation 'com.google.firebase:firebase-appindexing:19.1.0'
implementation 'com.google.android.gms:play-services-maps:17.0.0' implementation 'com.google.android.gms:play-services-maps:17.0.0'
implementation 'com.google.android.gms:play-services-auth:18.1.0' implementation 'com.google.android.gms:play-services-auth:19.0.0'
implementation 'com.google.android.gms:play-services-vision:16.2.0' implementation 'com.google.android.gms:play-services-vision:16.2.0'
implementation 'com.google.android.gms:play-services-wearable:17.0.0' implementation 'com.google.android.gms:play-services-wearable:17.0.0'
implementation 'com.google.android.gms:play-services-location:17.1.0' implementation 'com.google.android.gms:play-services-location:17.1.0'
@ -40,12 +41,12 @@ dependencies {
implementation 'com.stripe:stripe-android:2.0.2' implementation 'com.stripe:stripe-android:2.0.2'
implementation files('libs/libgsaverification-client.aar') implementation files('libs/libgsaverification-client.aar')
coreLibraryDesugaring 'com.android.tools:desugar_jdk_libs:1.0.10' coreLibraryDesugaring 'com.android.tools:desugar_jdk_libs:1.1.1'
} }
android { android {
compileSdkVersion 30 compileSdkVersion 30
buildToolsVersion '30.0.2' buildToolsVersion '30.0.3'
ndkVersion "21.1.6352462" ndkVersion "21.1.6352462"
defaultConfig.applicationId = "org.telegram.messenger" defaultConfig.applicationId = "org.telegram.messenger"
@ -97,9 +98,11 @@ android {
jniDebuggable true jniDebuggable true
signingConfig signingConfigs.debug signingConfig signingConfigs.debug
applicationIdSuffix ".beta" applicationIdSuffix ".beta"
minifyEnabled true minifyEnabled false
shrinkResources false
multiDexEnabled true multiDexEnabled true
proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
ndk.debugSymbolLevel = 'FULL'
} }
/*debugAsan { /*debugAsan {
@ -139,6 +142,7 @@ android {
minifyEnabled true minifyEnabled true
multiDexEnabled true multiDexEnabled true
proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
ndk.debugSymbolLevel = 'FULL'
} }
release { release {
@ -149,6 +153,7 @@ android {
shrinkResources false shrinkResources false
multiDexEnabled true multiDexEnabled true
proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
ndk.debugSymbolLevel = 'FULL'
} }
} }
@ -285,7 +290,7 @@ android {
} }
} }
defaultConfig.versionCode = 2139 defaultConfig.versionCode = 2195
applicationVariants.all { variant -> applicationVariants.all { variant ->
variant.outputs.all { output -> variant.outputs.all { output ->
@ -303,8 +308,8 @@ android {
defaultConfig { defaultConfig {
minSdkVersion 16 minSdkVersion 16
targetSdkVersion 28 targetSdkVersion 29
versionName "7.2.1" versionName "7.3.0"
vectorDrawables.generatedDensities = ['mdpi', 'hdpi', 'xhdpi', 'xxhdpi'] vectorDrawables.generatedDensities = ['mdpi', 'hdpi', 'xhdpi', 'xxhdpi']

View file

@ -26,6 +26,7 @@
android:hardwareAccelerated="@bool/useHardwareAcceleration" android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true" android:largeHeap="true"
android:supportsRtl="false" android:supportsRtl="false"
android:requestLegacyExternalStorage="true"
tools:replace="android:supportsRtl"> tools:replace="android:supportsRtl">
<meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" /> <meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" />

View file

@ -30,6 +30,7 @@
android:hardwareAccelerated="@bool/useHardwareAcceleration" android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true" android:largeHeap="true"
android:supportsRtl="false" android:supportsRtl="false"
android:requestLegacyExternalStorage="true"
tools:replace="android:supportsRtl"> tools:replace="android:supportsRtl">
<meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" /> <meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" />

View file

@ -27,6 +27,7 @@
android:hardwareAccelerated="@bool/useHardwareAcceleration" android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true" android:largeHeap="true"
android:supportsRtl="false" android:supportsRtl="false"
android:requestLegacyExternalStorage="true"
tools:replace="android:supportsRtl"> tools:replace="android:supportsRtl">
<meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" /> <meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" />

View file

@ -30,6 +30,7 @@
android:hardwareAccelerated="@bool/useHardwareAcceleration" android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true" android:largeHeap="true"
android:supportsRtl="false" android:supportsRtl="false"
android:requestLegacyExternalStorage="true"
tools:replace="android:supportsRtl"> tools:replace="android:supportsRtl">
<meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" /> <meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" />

View file

@ -395,7 +395,7 @@ target_compile_definitions(sqlite PUBLIC
#voip #voip
include(${CMAKE_HOME_DIRECTORY}/voip/CMakeLists.txt) include(${CMAKE_HOME_DIRECTORY}/voip/CMakeLists.txt)
set(NATIVE_LIB "tmessages.34") set(NATIVE_LIB "tmessages.35")
#tmessages #tmessages
add_library(${NATIVE_LIB} SHARED add_library(${NATIVE_LIB} SHARED
@ -642,7 +642,8 @@ target_sources(${NATIVE_LIB} PRIVATE
third_party/libyuv/source/scale_neon64.cc third_party/libyuv/source/scale_neon64.cc
third_party/libyuv/source/scale_win.cc third_party/libyuv/source/scale_win.cc
third_party/libyuv/source/scale.cc third_party/libyuv/source/scale.cc
third_party/libyuv/source/video_common.cc) third_party/libyuv/source/video_common.cc
third_party/libyuv/source/scale_uv.cc)
target_include_directories(${NATIVE_LIB} PUBLIC target_include_directories(${NATIVE_LIB} PUBLIC
opus/include opus/include

View file

@ -290,7 +290,7 @@ extern "C" JNIEXPORT void JNICALL Java_org_telegram_ui_Components_AnimatedFileDr
info->src = new char[len + 1]; info->src = new char[len + 1];
memcpy(info->src, srcString, len); memcpy(info->src, srcString, len);
info->src[len] = '\0'; info->src[len] = '\0';
if (srcString != 0) { if (srcString != nullptr) {
env->ReleaseStringUTFChars(src, srcString); env->ReleaseStringUTFChars(src, srcString);
} }
@ -364,7 +364,7 @@ extern "C" JNIEXPORT void JNICALL Java_org_telegram_ui_Components_AnimatedFileDr
dataArr[PARAM_NUM_WIDTH] = info->video_stream->codecpar->width; dataArr[PARAM_NUM_WIDTH] = info->video_stream->codecpar->width;
dataArr[PARAM_NUM_HEIGHT] = info->video_stream->codecpar->height; dataArr[PARAM_NUM_HEIGHT] = info->video_stream->codecpar->height;
AVDictionaryEntry *rotate_tag = av_dict_get(info->video_stream->metadata, "rotate", NULL, 0); AVDictionaryEntry *rotate_tag = av_dict_get(info->video_stream->metadata, "rotate", NULL, 0);
if (rotate_tag && *rotate_tag->value && strcmp(rotate_tag->value, "0")) { if (rotate_tag && *rotate_tag->value && strcmp(rotate_tag->value, "0") != 0) {
char *tail; char *tail;
dataArr[PARAM_NUM_ROTATION] = (jint) av_strtod(rotate_tag->value, &tail); dataArr[PARAM_NUM_ROTATION] = (jint) av_strtod(rotate_tag->value, &tail);
if (*tail) { if (*tail) {
@ -373,7 +373,7 @@ extern "C" JNIEXPORT void JNICALL Java_org_telegram_ui_Components_AnimatedFileDr
} else { } else {
dataArr[PARAM_NUM_ROTATION] = 0; dataArr[PARAM_NUM_ROTATION] = 0;
} }
if (info->video_stream->codecpar->codec_id == AV_CODEC_ID_H264) { if (info->video_stream->codecpar->codec_id == AV_CODEC_ID_H264 || info->video_stream->codecpar->codec_id == AV_CODEC_ID_HEVC) {
dataArr[PARAM_NUM_FRAMERATE] = (jint) av_q2d(info->video_stream->avg_frame_rate); dataArr[PARAM_NUM_FRAMERATE] = (jint) av_q2d(info->video_stream->avg_frame_rate);
} else { } else {
dataArr[PARAM_NUM_FRAMERATE] = (jint) av_q2d(info->video_stream->r_frame_rate); dataArr[PARAM_NUM_FRAMERATE] = (jint) av_q2d(info->video_stream->r_frame_rate);
@ -605,16 +605,19 @@ static inline void writeFrameToBitmap(JNIEnv *env, VideoInfo *info, jintArray da
jint *dataArr = env->GetIntArrayElements(data, 0); jint *dataArr = env->GetIntArrayElements(data, 0);
int32_t wantedWidth; int32_t wantedWidth;
int32_t wantedHeight; int32_t wantedHeight;
AndroidBitmapInfo bitmapInfo;
AndroidBitmap_getInfo(env, bitmap, &bitmapInfo);
int32_t bitmapWidth = bitmapInfo.width;
int32_t bitmapHeight = bitmapInfo.height;
if (dataArr != nullptr) { if (dataArr != nullptr) {
wantedWidth = dataArr[0]; wantedWidth = dataArr[0];
wantedHeight = dataArr[1]; wantedHeight = dataArr[1];
dataArr[3] = (jint) (1000 * info->frame->best_effort_timestamp * av_q2d(info->video_stream->time_base)); dataArr[3] = (jint) (1000 * info->frame->best_effort_timestamp * av_q2d(info->video_stream->time_base));
env->ReleaseIntArrayElements(data, dataArr, 0); env->ReleaseIntArrayElements(data, dataArr, 0);
} else { } else {
AndroidBitmapInfo bitmapInfo; wantedWidth = bitmapWidth;
AndroidBitmap_getInfo(env, bitmap, &bitmapInfo); wantedHeight = bitmapHeight;
wantedWidth = bitmapInfo.width;
wantedHeight = bitmapInfo.height;
} }
void *pixels; void *pixels;
@ -622,17 +625,17 @@ static inline void writeFrameToBitmap(JNIEnv *env, VideoInfo *info, jintArray da
if (wantedWidth == info->frame->width && wantedHeight == info->frame->height || wantedWidth == info->frame->height && wantedHeight == info->frame->width) { if (wantedWidth == info->frame->width && wantedHeight == info->frame->height || wantedWidth == info->frame->height && wantedHeight == info->frame->width) {
if (info->sws_ctx == nullptr) { if (info->sws_ctx == nullptr) {
if (info->frame->format > AV_PIX_FMT_NONE && info->frame->format < AV_PIX_FMT_NB) { if (info->frame->format > AV_PIX_FMT_NONE && info->frame->format < AV_PIX_FMT_NB) {
info->sws_ctx = sws_getContext(info->frame->width, info->frame->height, (AVPixelFormat) info->frame->format, info->frame->width, info->frame->height, AV_PIX_FMT_RGBA, SWS_BILINEAR, NULL, NULL, NULL); info->sws_ctx = sws_getContext(info->frame->width, info->frame->height, (AVPixelFormat) info->frame->format, bitmapWidth, bitmapHeight, AV_PIX_FMT_RGBA, SWS_BILINEAR, NULL, NULL, NULL);
} else if (info->video_dec_ctx->pix_fmt > AV_PIX_FMT_NONE && info->video_dec_ctx->pix_fmt < AV_PIX_FMT_NB) { } else if (info->video_dec_ctx->pix_fmt > AV_PIX_FMT_NONE && info->video_dec_ctx->pix_fmt < AV_PIX_FMT_NB) {
info->sws_ctx = sws_getContext(info->video_dec_ctx->width, info->video_dec_ctx->height, info->video_dec_ctx->pix_fmt, info->video_dec_ctx->width, info->video_dec_ctx->height, AV_PIX_FMT_RGBA, SWS_BILINEAR, NULL, NULL, NULL); info->sws_ctx = sws_getContext(info->video_dec_ctx->width, info->video_dec_ctx->height, info->video_dec_ctx->pix_fmt, bitmapWidth, bitmapHeight, AV_PIX_FMT_RGBA, SWS_BILINEAR, NULL, NULL, NULL);
} }
} }
if (info->sws_ctx == nullptr || ((intptr_t) pixels) % 16 != 0) { if (info->sws_ctx == nullptr || ((intptr_t) pixels) % 16 != 0) {
if (info->frame->format == AV_PIX_FMT_YUV420P || info->frame->format == AV_PIX_FMT_YUVJ420P) { if (info->frame->format == AV_PIX_FMT_YUV420P || info->frame->format == AV_PIX_FMT_YUVJ420P) {
if (info->frame->colorspace == AVColorSpace::AVCOL_SPC_BT709) { if (info->frame->colorspace == AVColorSpace::AVCOL_SPC_BT709) {
libyuv::H420ToARGB(info->frame->data[0], info->frame->linesize[0], info->frame->data[2], info->frame->linesize[2], info->frame->data[1], info->frame->linesize[1], (uint8_t *) pixels, info->frame->width * 4, info->frame->width, info->frame->height); libyuv::H420ToARGB(info->frame->data[0], info->frame->linesize[0], info->frame->data[2], info->frame->linesize[2], info->frame->data[1], info->frame->linesize[1], (uint8_t *) pixels, bitmapWidth * 4, bitmapWidth, bitmapHeight);
} else { } else {
libyuv::I420ToARGB(info->frame->data[0], info->frame->linesize[0], info->frame->data[2], info->frame->linesize[2], info->frame->data[1], info->frame->linesize[1], (uint8_t *) pixels, info->frame->width * 4, info->frame->width, info->frame->height); libyuv::I420ToARGB(info->frame->data[0], info->frame->linesize[0], info->frame->data[2], info->frame->linesize[2], info->frame->data[1], info->frame->linesize[1], (uint8_t *) pixels, bitmapWidth * 4, bitmapWidth, bitmapHeight);
} }
} else if (info->frame->format == AV_PIX_FMT_BGRA) { } else if (info->frame->format == AV_PIX_FMT_BGRA) {
libyuv::ABGRToARGB(info->frame->data[0], info->frame->linesize[0], (uint8_t *) pixels, info->frame->width * 4, info->frame->width, info->frame->height); libyuv::ABGRToARGB(info->frame->data[0], info->frame->linesize[0], (uint8_t *) pixels, info->frame->width * 4, info->frame->width, info->frame->height);

View file

@ -206,7 +206,7 @@ inline bool factorizeValue(uint64_t what, uint32_t &p, uint32_t &q) {
inline bool check_prime(BIGNUM *p) { inline bool check_prime(BIGNUM *p) {
int result = 0; int result = 0;
if (!BN_primality_test(&result, p, BN_prime_checks, bnContext, 0, NULL)) { if (!BN_primality_test(&result, p, 64, bnContext, 0, NULL)) {
if (LOGS_ENABLED) DEBUG_E("OpenSSL error at BN_primality_test"); if (LOGS_ENABLED) DEBUG_E("OpenSSL error at BN_primality_test");
return false; return false;
} }

View file

@ -25,6 +25,7 @@ Angie Chiang <angiebird@google.com>
Aron Rosenberg <arosenberg@logitech.com> Aron Rosenberg <arosenberg@logitech.com>
Attila Nagy <attilanagy@google.com> Attila Nagy <attilanagy@google.com>
Birk Magnussen <birk.magnussen@googlemail.com> Birk Magnussen <birk.magnussen@googlemail.com>
Brian Foley <bpfoley@google.com>
Brion Vibber <bvibber@wikimedia.org> Brion Vibber <bvibber@wikimedia.org>
changjun.yang <changjun.yang@intel.com> changjun.yang <changjun.yang@intel.com>
Charles 'Buck' Krasic <ckrasic@google.com> Charles 'Buck' Krasic <ckrasic@google.com>
@ -33,6 +34,7 @@ Chi Yo Tsai <chiyotsai@google.com>
chm <chm@rock-chips.com> chm <chm@rock-chips.com>
Chris Cunningham <chcunningham@chromium.org> Chris Cunningham <chcunningham@chromium.org>
Christian Duvivier <cduvivier@google.com> Christian Duvivier <cduvivier@google.com>
Clement Courbet <courbet@google.com>
Daniele Castagna <dcastagna@chromium.org> Daniele Castagna <dcastagna@chromium.org>
Daniel Kang <ddkang@google.com> Daniel Kang <ddkang@google.com>
Dan Zhu <zxdan@google.com> Dan Zhu <zxdan@google.com>
@ -91,6 +93,7 @@ John Koleszar <jkoleszar@google.com>
Johnny Klonaris <google@jawknee.com> Johnny Klonaris <google@jawknee.com>
John Stark <jhnstrk@gmail.com> John Stark <jhnstrk@gmail.com>
Jon Kunkee <jkunkee@microsoft.com> Jon Kunkee <jkunkee@microsoft.com>
Jorge E. Moreira <jemoreira@google.com>
Joshua Bleecher Snyder <josh@treelinelabs.com> Joshua Bleecher Snyder <josh@treelinelabs.com>
Joshua Litt <joshualitt@google.com> Joshua Litt <joshualitt@google.com>
Julia Robson <juliamrobson@gmail.com> Julia Robson <juliamrobson@gmail.com>
@ -125,6 +128,7 @@ Mirko Bonadei <mbonadei@google.com>
Moriyoshi Koizumi <mozo@mozo.jp> Moriyoshi Koizumi <mozo@mozo.jp>
Morton Jonuschat <yabawock@gmail.com> Morton Jonuschat <yabawock@gmail.com>
Nathan E. Egge <negge@mozilla.com> Nathan E. Egge <negge@mozilla.com>
Neil Birkbeck <neil.birkbeck@gmail.com>
Nico Weber <thakis@chromium.org> Nico Weber <thakis@chromium.org>
Niveditha Rau <niveditha.rau@gmail.com> Niveditha Rau <niveditha.rau@gmail.com>
Parag Salasakar <img.mips1@gmail.com> Parag Salasakar <img.mips1@gmail.com>
@ -165,6 +169,7 @@ Shimon Doodkin <helpmepro1@gmail.com>
Shiyou Yin <yinshiyou-hf@loongson.cn> Shiyou Yin <yinshiyou-hf@loongson.cn>
Shubham Tandle <shubham.tandle@ittiam.com> Shubham Tandle <shubham.tandle@ittiam.com>
Shunyao Li <shunyaoli@google.com> Shunyao Li <shunyaoli@google.com>
Sreerenj Balachandran <bsreerenj@gmail.com>
Stefan Holmer <holmer@google.com> Stefan Holmer <holmer@google.com>
Suman Sunkara <sunkaras@google.com> Suman Sunkara <sunkaras@google.com>
Supradeep T R <supradeep.tr@ittiam.com> Supradeep T R <supradeep.tr@ittiam.com>
@ -185,6 +190,7 @@ Vignesh Venkatasubramanian <vigneshv@google.com>
Vitaly Buka <vitalybuka@chromium.org> Vitaly Buka <vitalybuka@chromium.org>
Vlad Tsyrklevich <vtsyrklevich@chromium.org> Vlad Tsyrklevich <vtsyrklevich@chromium.org>
Wan-Teh Chang <wtc@google.com> Wan-Teh Chang <wtc@google.com>
Wonkap Jang <wonkap@google.com>
xiwei gu <guxiwei-hf@loongson.cn> xiwei gu <guxiwei-hf@loongson.cn>
Yaowu Xu <yaowu@google.com> Yaowu Xu <yaowu@google.com>
Yi Luo <luoyi@google.com> Yi Luo <luoyi@google.com>

View file

@ -1,3 +1,27 @@
2020-07-29 v1.9.0 "Quacking Duck"
This release adds support for NV12, a separate library for rate control, as
well as incremental improvements.
- Upgrading:
NV12 support is added to this release.
A new interface is added for VP9 rate control. The new library libvp9rc.a
must be linked by applications.
Googletest is updated to v1.10.0.
simple_encode.cc is compiled into a new library libsimple_encode.a with
CONFIG_RATE_CTRL.
- Enhancement:
Various changes to improve VP9 SVC, rate control, quality and speed to real
time encoding.
- Bug fixes:
Fix key frame update refresh simulcast flexible svc.
Fix to disable_16x16part speed feature for real time encoding.
Fix some signed integer overflows for VP9 rate control.
Fix initialization of delta_q_uv.
Fix condition in regulate_q for cyclic refresh.
Various fixes to dynamic resizing for VP9 SVC.
2019-12-09 v1.8.2 "Pekin Duck" 2019-12-09 v1.8.2 "Pekin Duck"
This release collects incremental improvements to many aspects of the library. This release collects incremental improvements to many aspects of the library.

View file

@ -1,5 +1,5 @@
URL: https://git.videolan.org/git/x264.git URL: https://git.videolan.org/git/x264.git
Version: d23d18655249944c1ca894b451e2c82c7a584c62 Version: 3e5aed95cc470f37e2db3e6506a8deb89b527720
License: ISC License: ISC
License File: LICENSE License File: LICENSE
@ -12,10 +12,8 @@ Get configuration from vpx_config.asm.
Prefix functions with vpx by default. Prefix functions with vpx by default.
Manage name mangling (prefixing with '_') manually because 'PREFIX' does not Manage name mangling (prefixing with '_') manually because 'PREFIX' does not
exist in libvpx. exist in libvpx.
Expand PIC default to macho64 and respect CONFIG_PIC from libvpx
Set 'private_extern' visibility for macho targets.
Copy PIC 'GLOBAL' macros from x86_abi_support.asm Copy PIC 'GLOBAL' macros from x86_abi_support.asm
Use .text instead of .rodata on macho to avoid broken tables in PIC mode. Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
Use .text with no alignment for aout Use .text with no alignment for aout.
Only use 'hidden' visibility with Chromium Only use 'hidden' visibility with Chromium.
Prefix ARCH_* with VPX_. Prefix ARCH_* with VPX_.

View file

@ -91,10 +91,13 @@ int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame) {
for (plane = 0; plane < 3; ++plane) { for (plane = 0; plane < 3; ++plane) {
uint8_t *ptr; uint8_t *ptr;
const int w = vpx_img_plane_width(yuv_frame, plane); int w = vpx_img_plane_width(yuv_frame, plane);
const int h = vpx_img_plane_height(yuv_frame, plane); const int h = vpx_img_plane_height(yuv_frame, plane);
int r; int r;
// Assuming that for nv12 we read all chroma data at one time
if (yuv_frame->fmt == VPX_IMG_FMT_NV12 && plane > 1) break;
// Fixing NV12 chroma width it is odd
if (yuv_frame->fmt == VPX_IMG_FMT_NV12 && plane == 1) w = (w + 1) & ~1;
/* Determine the correct plane based on the image format. The for-loop /* Determine the correct plane based on the image format. The for-loop
* always counts in Y,U,V order, but this may not match the order of * always counts in Y,U,V order, but this may not match the order of
* the data on disk. * the data on disk.

View file

@ -11,7 +11,8 @@
#include "extend.h" #include "extend.h"
#include "vpx_mem/vpx_mem.h" #include "vpx_mem/vpx_mem.h"
static void copy_and_extend_plane(unsigned char *s, /* source */ static void copy_and_extend_plane(
unsigned char *s, /* source */
int sp, /* source pitch */ int sp, /* source pitch */
unsigned char *d, /* destination */ unsigned char *d, /* destination */
int dp, /* destination pitch */ int dp, /* destination pitch */
@ -20,21 +21,30 @@ static void copy_and_extend_plane(unsigned char *s, /* source */
int et, /* extend top border */ int et, /* extend top border */
int el, /* extend left border */ int el, /* extend left border */
int eb, /* extend bottom border */ int eb, /* extend bottom border */
int er) { /* extend right border */ int er, /* extend right border */
int i; int interleave_step) { /* step between pixels of the current plane */
int i, j;
unsigned char *src_ptr1, *src_ptr2; unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2; unsigned char *dest_ptr1, *dest_ptr2;
int linesize; int linesize;
if (interleave_step < 1) interleave_step = 1;
/* copy the left and right most columns out */ /* copy the left and right most columns out */
src_ptr1 = s; src_ptr1 = s;
src_ptr2 = s + w - 1; src_ptr2 = s + (w - 1) * interleave_step;
dest_ptr1 = d - el; dest_ptr1 = d - el;
dest_ptr2 = d + w; dest_ptr2 = d + w;
for (i = 0; i < h; ++i) { for (i = 0; i < h; ++i) {
memset(dest_ptr1, src_ptr1[0], el); memset(dest_ptr1, src_ptr1[0], el);
if (interleave_step == 1) {
memcpy(dest_ptr1 + el, src_ptr1, w); memcpy(dest_ptr1 + el, src_ptr1, w);
} else {
for (j = 0; j < w; j++) {
dest_ptr1[el + j] = src_ptr1[interleave_step * j];
}
}
memset(dest_ptr2, src_ptr2[0], er); memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp; src_ptr1 += sp;
src_ptr2 += sp; src_ptr2 += sp;
@ -69,9 +79,12 @@ void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
int eb = dst->border + dst->y_height - src->y_height; int eb = dst->border + dst->y_height - src->y_height;
int er = dst->border + dst->y_width - src->y_width; int er = dst->border + dst->y_width - src->y_width;
// detect nv12 colorspace
int chroma_step = src->v_buffer - src->u_buffer == 1 ? 2 : 1;
copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer, copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
dst->y_stride, src->y_height, src->y_width, et, el, eb, dst->y_stride, src->y_height, src->y_width, et, el, eb,
er); er, 1);
et = dst->border >> 1; et = dst->border >> 1;
el = dst->border >> 1; el = dst->border >> 1;
@ -80,11 +93,11 @@ void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer, copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer,
dst->uv_stride, src->uv_height, src->uv_width, et, el, dst->uv_stride, src->uv_height, src->uv_width, et, el,
eb, er); eb, er, chroma_step);
copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer, copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer,
dst->uv_stride, src->uv_height, src->uv_width, et, el, dst->uv_stride, src->uv_height, src->uv_width, et, el,
eb, er); eb, er, chroma_step);
} }
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
@ -98,6 +111,8 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
int dst_y_offset = srcy * dst->y_stride + srcx; int dst_y_offset = srcy * dst->y_stride + srcx;
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
// detect nv12 colorspace
int chroma_step = src->v_buffer - src->u_buffer == 1 ? 2 : 1;
/* If the side is not touching the bounder then don't extend. */ /* If the side is not touching the bounder then don't extend. */
if (srcy) et = 0; if (srcy) et = 0;
@ -107,7 +122,7 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride, copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
dst->y_buffer + dst_y_offset, dst->y_stride, srch, srcw, dst->y_buffer + dst_y_offset, dst->y_stride, srch, srcw,
et, el, eb, er); et, el, eb, er, 1);
et = (et + 1) >> 1; et = (et + 1) >> 1;
el = (el + 1) >> 1; el = (el + 1) >> 1;
@ -118,11 +133,11 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride, copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
dst->u_buffer + dst_uv_offset, dst->uv_stride, srch, dst->u_buffer + dst_uv_offset, dst->uv_stride, srch,
srcw, et, el, eb, er); srcw, et, el, eb, er, chroma_step);
copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride, copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
dst->v_buffer + dst_uv_offset, dst->uv_stride, srch, dst->v_buffer + dst_uv_offset, dst->uv_stride, srch,
srcw, et, el, eb, er); srcw, et, el, eb, er, chroma_step);
} }
/* note the extension is only for the last row, for intra prediction purpose */ /* note the extension is only for the last row, for intra prediction purpose */

View file

@ -16,6 +16,8 @@
#include "vpx_ports/x86.h" #include "vpx_ports/x86.h"
#elif VPX_ARCH_PPC #elif VPX_ARCH_PPC
#include "vpx_ports/ppc.h" #include "vpx_ports/ppc.h"
#elif VPX_ARCH_MIPS
#include "vpx_ports/mips.h"
#endif #endif
#include "vp8/common/onyxc_int.h" #include "vp8/common/onyxc_int.h"
#include "vp8/common/systemdependent.h" #include "vp8/common/systemdependent.h"
@ -96,6 +98,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) {
ctx->cpu_caps = x86_simd_caps(); ctx->cpu_caps = x86_simd_caps();
#elif VPX_ARCH_PPC #elif VPX_ARCH_PPC
ctx->cpu_caps = ppc_simd_caps(); ctx->cpu_caps = ppc_simd_caps();
#elif VPX_ARCH_MIPS
ctx->cpu_caps = mips_cpu_caps();
#else #else
// generic-gnu targets. // generic-gnu targets.
ctx->cpu_caps = 0; ctx->cpu_caps = 0;

View file

@ -100,7 +100,7 @@ void vp8_dequant_idct_add_mmi(int16_t *input, int16_t *dq, unsigned char *dest,
vp8_short_idct4x4llm_mmi(input, dest, stride, dest, stride); vp8_short_idct4x4llm_mmi(input, dest, stride, dest, stride);
__asm__ volatile( __asm__ volatile(
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gssdlc1 %[ftmp0], 0x07(%[input]) \n\t" "gssdlc1 %[ftmp0], 0x07(%[input]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[input]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[input]) \n\t"
"sdl $0, 0x0f(%[input]) \n\t" "sdl $0, 0x0f(%[input]) \n\t"

View file

@ -13,25 +13,25 @@
#include "vpx_ports/asmdefs_mmi.h" #include "vpx_ports/asmdefs_mmi.h"
#define TRANSPOSE_4H \ #define TRANSPOSE_4H \
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
MMI_LI(%[tmp0], 0x93) \ MMI_LI(%[tmp0], 0x93) \
"mtc1 %[tmp0], %[ftmp10] \n\t" \ "mtc1 %[tmp0], %[ftmp10] \n\t" \
"punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ "punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \
"punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ "punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
"or %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \ "por %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \
"punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \ "punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \
"punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ "punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
"or %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \ "por %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \
"punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ "punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \
"punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ "punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
"or %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \ "por %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \
"punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ "punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \
"punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ "punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
"or %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ "por %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
"punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \ "punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \
"punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \ "punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \
"punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \ "punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \
@ -41,15 +41,19 @@ void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr, int pred_stride, unsigned char *dst_ptr,
int dst_stride) { int dst_stride) {
double ftmp[12]; double ftmp[12];
uint32_t tmp[0]; uint64_t tmp[1];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_04) = { 0x0004000400040004ULL }; double ff_ph_04, ff_ph_4e7b, ff_ph_22a3;
DECLARE_ALIGNED(8, const uint64_t, ff_ph_4e7b) = { 0x4e7b4e7b4e7b4e7bULL };
DECLARE_ALIGNED(8, const uint64_t, ff_ph_22a3) = { 0x22a322a322a322a3ULL };
__asm__ volatile ( __asm__ volatile (
"dli %[tmp0], 0x0004000400040004 \n\t"
"dmtc1 %[tmp0], %[ff_ph_04] \n\t"
"dli %[tmp0], 0x4e7b4e7b4e7b4e7b \n\t"
"dmtc1 %[tmp0], %[ff_ph_4e7b] \n\t"
"dli %[tmp0], 0x22a322a322a322a3 \n\t"
"dmtc1 %[tmp0], %[ff_ph_22a3] \n\t"
MMI_LI(%[tmp0], 0x02) MMI_LI(%[tmp0], 0x02)
"mtc1 %[tmp0], %[ftmp11] \n\t" "dmtc1 %[tmp0], %[ftmp11] \n\t"
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
@ -186,9 +190,10 @@ void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
[ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
[ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]),
[pred_ptr]"+&r"(pred_ptr), [dst_ptr]"+&r"(dst_ptr) [pred_ptr]"+&r"(pred_ptr), [dst_ptr]"+&r"(dst_ptr),
: [ip]"r"(input), [ff_ph_22a3]"f"(ff_ph_22a3), [ff_ph_4e7b]"=&f"(ff_ph_4e7b), [ff_ph_04]"=&f"(ff_ph_04),
[ff_ph_4e7b]"f"(ff_ph_4e7b), [ff_ph_04]"f"(ff_ph_04), [ff_ph_22a3]"=&f"(ff_ph_22a3)
: [ip]"r"(input),
[pred_stride]"r"((mips_reg)pred_stride), [pred_stride]"r"((mips_reg)pred_stride),
[dst_stride]"r"((mips_reg)dst_stride) [dst_stride]"r"((mips_reg)dst_stride)
: "memory" : "memory"
@ -198,12 +203,13 @@ void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr, void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr, int pred_stride, unsigned char *dst_ptr,
int dst_stride) { int dst_stride) {
int a1 = ((input_dc + 4) >> 3); int a0 = ((input_dc + 4) >> 3);
double ftmp[5]; double a1, ftmp[5];
int low32; int low32;
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dmtc1 %[a0], %[a1] \n\t"
"pshufh %[a1], %[a1], %[ftmp0] \n\t" "pshufh %[a1], %[a1], %[ftmp0] \n\t"
"ulw %[low32], 0x00(%[pred_ptr]) \n\t" "ulw %[low32], 0x00(%[pred_ptr]) \n\t"
"mtc1 %[low32], %[ftmp1] \n\t" "mtc1 %[low32], %[ftmp1] \n\t"
@ -244,9 +250,9 @@ void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
"gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
[ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [low32]"=&r"(low32), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [low32]"=&r"(low32),
[dst_ptr]"+&r"(dst_ptr), [pred_ptr]"+&r"(pred_ptr) [dst_ptr]"+&r"(dst_ptr), [pred_ptr]"+&r"(pred_ptr), [a1]"=&f"(a1)
: [dst_stride]"r"((mips_reg)dst_stride), : [dst_stride]"r"((mips_reg)dst_stride),
[pred_stride]"r"((mips_reg)pred_stride), [a1]"f"(a1) [pred_stride]"r"((mips_reg)pred_stride), [a0]"r"(a0)
: "memory" : "memory"
); );
} }
@ -254,14 +260,15 @@ void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) { void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) {
int i; int i;
int16_t output[16]; int16_t output[16];
double ftmp[12]; double ff_ph_03, ftmp[12];
uint32_t tmp[1]; uint64_t tmp[1];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_03) = { 0x0003000300030003ULL };
__asm__ volatile ( __asm__ volatile (
"dli %[tmp0], 0x0003000300030003 \n\t"
"dmtc1 %[tmp0], %[ff_ph_03] \n\t"
MMI_LI(%[tmp0], 0x03) MMI_LI(%[tmp0], 0x03)
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "dmtc1 %[tmp0], %[ftmp11] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
"gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t" "gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t"
@ -317,8 +324,8 @@ void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) {
[ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
[ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
[ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]) [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [ff_ph_03]"=&f"(ff_ph_03)
: [ip]"r"(input), [op]"r"(output), [ff_ph_03]"f"(ff_ph_03) : [ip]"r"(input), [op]"r"(output)
: "memory" : "memory"
); );

View file

@ -70,9 +70,8 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
unsigned int output_height, unsigned int output_height,
unsigned int output_width, unsigned int output_width,
const int16_t *vp8_filter) { const int16_t *vp8_filter) {
uint32_t tmp[1]; uint64_t tmp[1];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; double ff_ph_40;
#if _MIPS_SIM == _ABIO32 #if _MIPS_SIM == _ABIO32
register double fzero asm("$f0"); register double fzero asm("$f0");
register double ftmp0 asm("$f2"); register double ftmp0 asm("$f2");
@ -103,18 +102,21 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
register double ftmp11 asm("$f12"); register double ftmp11 asm("$f12");
#endif // _MIPS_SIM == _ABIO32 #endif // _MIPS_SIM == _ABIO32
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"dli %[tmp0], 0x0040004000400040 \n\t"
"dmtc1 %[tmp0], %[ff_ph_40] \n\t"
"ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t" "ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t"
"ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t" "ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t"
"ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t" "ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t"
"ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t" "ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t"
"ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t" "ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t"
"ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t" "ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t"
"xor %[fzero], %[fzero], %[fzero] \n\t" "pxor %[fzero], %[fzero], %[fzero] \n\t"
"li %[tmp0], 0x07 \n\t" "dli %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t" "dmtc1 %[tmp0], %[ftmp7] \n\t"
"li %[tmp0], 0x08 \n\t" "dli %[tmp0], 0x08 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "dmtc1 %[tmp0], %[ftmp11] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t" "gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t"
@ -137,12 +139,12 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
"pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "ssrld %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "ssrld %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
@ -166,21 +168,22 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10), [ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10),
[ftmp11]"=&f"(ftmp11), [tmp0]"=&r"(tmp[0]), [ftmp11]"=&f"(ftmp11), [tmp0]"=&r"(tmp[0]),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height), [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height),
[src_ptr]"+&r"(src_ptr) [src_ptr]"+&r"(src_ptr), [ff_ph_40]"=&f"(ff_ph_40)
: [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line), : [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line),
[vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width), [vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width)
[ff_ph_40]"f"(ff_ph_40)
: "memory" : "memory"
); );
/* clang-format on */
} }
/* Horizontal filter: pixel_step is always W */ /* Horizontal filter: pixel_step is always W */
static INLINE void vp8_filter_block1dc_v6_mmi( static INLINE void vp8_filter_block1dc_v6_mmi(
uint16_t *src_ptr, unsigned char *output_ptr, unsigned int output_height, uint16_t *src_ptr, unsigned char *output_ptr, unsigned int output_height,
int output_pitch, unsigned int pixels_per_line, const int16_t *vp8_filter) { int output_pitch, unsigned int pixels_per_line, const int16_t *vp8_filter) {
DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; double ff_ph_40;
uint32_t tmp[1]; uint64_t tmp[1];
mips_reg addr[1]; mips_reg addr[1];
#if _MIPS_SIM == _ABIO32 #if _MIPS_SIM == _ABIO32
register double fzero asm("$f0"); register double fzero asm("$f0");
register double ftmp0 asm("$f2"); register double ftmp0 asm("$f2");
@ -215,16 +218,19 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
register double ftmp13 asm("$f14"); register double ftmp13 asm("$f14");
#endif // _MIPS_SIM == _ABIO32 #endif // _MIPS_SIM == _ABIO32
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"dli %[tmp0], 0x0040004000400040 \n\t"
"dmtc1 %[tmp0], %[ff_ph_40] \n\t"
"ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t" "ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t"
"ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t" "ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t"
"ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t" "ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t"
"ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t" "ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t"
"ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t" "ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t"
"ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t" "ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t"
"xor %[fzero], %[fzero], %[fzero] \n\t" "pxor %[fzero], %[fzero], %[fzero] \n\t"
"li %[tmp0], 0x07 \n\t" "dli %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp13] \n\t" "dmtc1 %[tmp0], %[ftmp13] \n\t"
/* In order to make full use of memory load delay slot, /* In order to make full use of memory load delay slot,
* Operation of memory loading and calculating has been rearranged. * Operation of memory loading and calculating has been rearranged.
@ -285,15 +291,16 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
[ftmp11]"=&f"(ftmp11), [ftmp12]"=&f"(ftmp12), [ftmp11]"=&f"(ftmp11), [ftmp12]"=&f"(ftmp12),
[ftmp13]"=&f"(ftmp13), [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp13), [tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [src_ptr]"+&r"(src_ptr), [addr0]"=&r"(addr[0]), [src_ptr]"+&r"(src_ptr),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height) [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height),
[ff_ph_40]"=&f"(ff_ph_40)
: [pixels_per_line]"r"((mips_reg)pixels_per_line), : [pixels_per_line]"r"((mips_reg)pixels_per_line),
[pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)), [pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)),
[pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)), [pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)),
[vp8_filter]"r"(vp8_filter), [vp8_filter]"r"(vp8_filter),
[output_pitch]"r"((mips_reg)output_pitch), [output_pitch]"r"((mips_reg)output_pitch)
[ff_ph_40]"f"(ff_ph_40)
: "memory" : "memory"
); );
/* clang-format on */
} }
/* When xoffset == 0, vp8_filter= {0,0,128,0,0,0}, /* When xoffset == 0, vp8_filter= {0,0,128,0,0,0},
@ -313,8 +320,9 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
register double ftmp1 asm("$f2"); register double ftmp1 asm("$f2");
#endif // _MIPS_SIM == _ABIO32 #endif // _MIPS_SIM == _ABIO32
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[fzero], %[fzero], %[fzero] \n\t" "pxor %[fzero], %[fzero], %[fzero] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
@ -335,6 +343,7 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
[output_width]"r"(output_width) [output_width]"r"(output_width)
: "memory" : "memory"
); );
/* clang-format on */
} }
static INLINE void vp8_filter_block1dc_v6_filter0_mmi( static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
@ -350,8 +359,9 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
register double ftmp1 asm("$f2"); register double ftmp1 asm("$f2");
#endif // _MIPS_SIM == _ABIO32 #endif // _MIPS_SIM == _ABIO32
/* clang-format on */
__asm__ volatile ( __asm__ volatile (
"xor %[fzero], %[fzero], %[fzero] \n\t" "pxor %[fzero], %[fzero], %[fzero] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
@ -371,6 +381,7 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
[output_pitch]"r"((mips_reg)output_pitch) [output_pitch]"r"((mips_reg)output_pitch)
: "memory" : "memory"
); );
/* clang-format on */
} }
#define sixtapNxM(n, m) \ #define sixtapNxM(n, m) \

View file

@ -122,10 +122,10 @@
const uint8_t *psrc_m = (const uint8_t *)(psrc); \ const uint8_t *psrc_m = (const uint8_t *)(psrc); \
uint32_t val_m; \ uint32_t val_m; \
\ \
asm volatile("ulw %[val_m], %[psrc_m] \n\t" \ asm volatile("lwr %[val_m], 0(%[psrc_m]) \n\t" \
\ "lwl %[val_m], 3(%[psrc_m]) \n\t" \
: [val_m] "=r"(val_m) \ : [val_m] "=&r"(val_m) \
: [psrc_m] "m"(*psrc_m)); \ : [psrc_m] "r"(psrc_m)); \
\ \
val_m; \ val_m; \
}) })
@ -136,10 +136,10 @@
const uint8_t *psrc_m = (const uint8_t *)(psrc); \ const uint8_t *psrc_m = (const uint8_t *)(psrc); \
uint64_t val_m = 0; \ uint64_t val_m = 0; \
\ \
asm volatile("uld %[val_m], %[psrc_m] \n\t" \ asm volatile("ldr %[val_m], 0(%[psrc_m]) \n\t" \
\ "ldl %[val_m], 7(%[psrc_m]) \n\t" \
: [val_m] "=r"(val_m) \ : [val_m] "=&r"(val_m) \
: [psrc_m] "m"(*psrc_m)); \ : [psrc_m] "r"(psrc_m)); \
\ \
val_m; \ val_m; \
}) })

View file

@ -171,17 +171,20 @@ static inline int sem_destroy(sem_t *sem) {
#define sem_wait(sem) (semaphore_wait(*sem)) #define sem_wait(sem) (semaphore_wait(*sem))
#define sem_post(sem) semaphore_signal(*sem) #define sem_post(sem) semaphore_signal(*sem)
#define sem_destroy(sem) semaphore_destroy(mach_task_self(), *sem) #define sem_destroy(sem) semaphore_destroy(mach_task_self(), *sem)
#define thread_sleep(nms)
/* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec =
1000*nms;nanosleep(&ts, NULL);} */
#else #else
#include <unistd.h> #include <unistd.h>
#include <sched.h> #include <sched.h>
#define thread_sleep(nms) sched_yield(); #endif /* __APPLE__ */
/* Not Windows. Assume pthreads */
/* thread_sleep implementation: yield unless Linux/Unix. */
#if defined(__unix__) || defined(__APPLE__)
#define thread_sleep(nms)
/* {struct timespec ts;ts.tv_sec=0; /* {struct timespec ts;ts.tv_sec=0;
ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
#endif #else
/* Not Windows. Assume pthreads */ #define thread_sleep(nms) sched_yield();
#endif /* __unix__ || __APPLE__ */
#endif #endif

View file

@ -14,7 +14,7 @@
SECTION .text SECTION .text
;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) ;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
global sym(vp8_dequantize_b_impl_mmx) PRIVATE globalsym(vp8_dequantize_b_impl_mmx)
sym(vp8_dequantize_b_impl_mmx): sym(vp8_dequantize_b_impl_mmx):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -56,7 +56,7 @@ sym(vp8_dequantize_b_impl_mmx):
;short *dq, 1 ;short *dq, 1
;unsigned char *dest, 2 ;unsigned char *dest, 2
;int stride) 3 ;int stride) 3
global sym(vp8_dequant_idct_add_mmx) PRIVATE globalsym(vp8_dequant_idct_add_mmx)
sym(vp8_dequant_idct_add_mmx): sym(vp8_dequant_idct_add_mmx):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -35,7 +35,7 @@ SECTION .text
;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, ;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred,
;int pitch, unsigned char *dest,int stride) ;int pitch, unsigned char *dest,int stride)
global sym(vp8_short_idct4x4llm_mmx) PRIVATE globalsym(vp8_short_idct4x4llm_mmx)
sym(vp8_short_idct4x4llm_mmx): sym(vp8_short_idct4x4llm_mmx):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -225,7 +225,7 @@ sym(vp8_short_idct4x4llm_mmx):
;int pred_stride, ;int pred_stride,
;unsigned char *dst_ptr, ;unsigned char *dst_ptr,
;int stride) ;int stride)
global sym(vp8_dc_only_idct_add_mmx) PRIVATE globalsym(vp8_dc_only_idct_add_mmx)
sym(vp8_dc_only_idct_add_mmx): sym(vp8_dc_only_idct_add_mmx):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -21,7 +21,7 @@
SECTION .text SECTION .text
global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE globalsym(vp8_idct_dequant_0_2x_sse2)
sym(vp8_idct_dequant_0_2x_sse2): sym(vp8_idct_dequant_0_2x_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -103,7 +103,7 @@ sym(vp8_idct_dequant_0_2x_sse2):
; unsigned char *dst - 2 ; unsigned char *dst - 2
; int dst_stride - 3 ; int dst_stride - 3
; ) ; )
global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE globalsym(vp8_idct_dequant_full_2x_sse2)
sym(vp8_idct_dequant_full_2x_sse2): sym(vp8_idct_dequant_full_2x_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -360,7 +360,7 @@ sym(vp8_idct_dequant_full_2x_sse2):
; int dst_stride - 3 ; int dst_stride - 3
; short *dc - 4 ; short *dc - 4
; ) ; )
global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE globalsym(vp8_idct_dequant_dc_0_2x_sse2)
sym(vp8_idct_dequant_dc_0_2x_sse2): sym(vp8_idct_dequant_dc_0_2x_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -436,7 +436,7 @@ sym(vp8_idct_dequant_dc_0_2x_sse2):
; int dst_stride - 3 ; int dst_stride - 3
; short *dc - 4 ; short *dc - 4
; ) ; )
global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE globalsym(vp8_idct_dequant_dc_full_2x_sse2)
sym(vp8_idct_dequant_dc_full_2x_sse2): sym(vp8_idct_dequant_dc_full_2x_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -14,7 +14,7 @@
SECTION .text SECTION .text
;void vp8_short_inv_walsh4x4_sse2(short *input, short *mb_dqcoeff) ;void vp8_short_inv_walsh4x4_sse2(short *input, short *mb_dqcoeff)
global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE globalsym(vp8_short_inv_walsh4x4_sse2)
sym(vp8_short_inv_walsh4x4_sse2): sym(vp8_short_inv_walsh4x4_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -135,7 +135,7 @@ SECTION .text
; const char *limit, ; const char *limit,
; const char *thresh ; const char *thresh
;) ;)
global sym(vp8_loop_filter_bh_y_sse2) PRIVATE globalsym(vp8_loop_filter_bh_y_sse2)
sym(vp8_loop_filter_bh_y_sse2): sym(vp8_loop_filter_bh_y_sse2):
%if LIBVPX_YASM_WIN64 %if LIBVPX_YASM_WIN64
@ -277,7 +277,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2
; const char *thresh ; const char *thresh
;) ;)
global sym(vp8_loop_filter_bv_y_sse2) PRIVATE globalsym(vp8_loop_filter_bv_y_sse2)
sym(vp8_loop_filter_bv_y_sse2): sym(vp8_loop_filter_bv_y_sse2):
%if LIBVPX_YASM_WIN64 %if LIBVPX_YASM_WIN64

View file

@ -288,7 +288,7 @@ SECTION .text
; const char *limit, ; const char *limit,
; const char *thresh, ; const char *thresh,
;) ;)
global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE globalsym(vp8_loop_filter_horizontal_edge_sse2)
sym(vp8_loop_filter_horizontal_edge_sse2): sym(vp8_loop_filter_horizontal_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -336,7 +336,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
; const char *thresh, ; const char *thresh,
; int count ; int count
;) ;)
global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE globalsym(vp8_loop_filter_horizontal_edge_uv_sse2)
sym(vp8_loop_filter_horizontal_edge_uv_sse2): sym(vp8_loop_filter_horizontal_edge_uv_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -563,7 +563,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
; const char *limit, ; const char *limit,
; const char *thresh, ; const char *thresh,
;) ;)
global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE globalsym(vp8_mbloop_filter_horizontal_edge_sse2)
sym(vp8_mbloop_filter_horizontal_edge_sse2): sym(vp8_mbloop_filter_horizontal_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -609,7 +609,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
; const char *thresh, ; const char *thresh,
; unsigned char *v ; unsigned char *v
;) ;)
global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE globalsym(vp8_mbloop_filter_horizontal_edge_uv_sse2)
sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -930,7 +930,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
; const char *limit, ; const char *limit,
; const char *thresh, ; const char *thresh,
;) ;)
global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE globalsym(vp8_loop_filter_vertical_edge_sse2)
sym(vp8_loop_filter_vertical_edge_sse2): sym(vp8_loop_filter_vertical_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -995,7 +995,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
; const char *thresh, ; const char *thresh,
; unsigned char *v ; unsigned char *v
;) ;)
global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE globalsym(vp8_loop_filter_vertical_edge_uv_sse2)
sym(vp8_loop_filter_vertical_edge_uv_sse2): sym(vp8_loop_filter_vertical_edge_uv_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -1144,7 +1144,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
; const char *limit, ; const char *limit,
; const char *thresh, ; const char *thresh,
;) ;)
global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE globalsym(vp8_mbloop_filter_vertical_edge_sse2)
sym(vp8_mbloop_filter_vertical_edge_sse2): sym(vp8_mbloop_filter_vertical_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -1211,7 +1211,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
; const char *thresh, ; const char *thresh,
; unsigned char *v ; unsigned char *v
;) ;)
global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE globalsym(vp8_mbloop_filter_vertical_edge_uv_sse2)
sym(vp8_mbloop_filter_vertical_edge_uv_sse2): sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -1271,7 +1271,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
; int src_pixel_step, ; int src_pixel_step,
; const char *blimit, ; const char *blimit,
;) ;)
global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE globalsym(vp8_loop_filter_simple_horizontal_edge_sse2)
sym(vp8_loop_filter_simple_horizontal_edge_sse2): sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -1376,7 +1376,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
; int src_pixel_step, ; int src_pixel_step,
; const char *blimit, ; const char *blimit,
;) ;)
global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE globalsym(vp8_loop_filter_simple_vertical_edge_sse2)
sym(vp8_loop_filter_simple_vertical_edge_sse2): sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value. push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value. mov rbp, rsp ; set new base pointer value.

View file

@ -21,7 +21,7 @@ SECTION .text
; int dst_stride, ; int dst_stride,
; int src_weight ; int src_weight
;) ;)
global sym(vp8_filter_by_weight16x16_sse2) PRIVATE globalsym(vp8_filter_by_weight16x16_sse2)
sym(vp8_filter_by_weight16x16_sse2): sym(vp8_filter_by_weight16x16_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -99,7 +99,7 @@ sym(vp8_filter_by_weight16x16_sse2):
; int dst_stride, ; int dst_stride,
; int src_weight ; int src_weight
;) ;)
global sym(vp8_filter_by_weight8x8_sse2) PRIVATE globalsym(vp8_filter_by_weight8x8_sse2)
sym(vp8_filter_by_weight8x8_sse2): sym(vp8_filter_by_weight8x8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -167,7 +167,7 @@ sym(vp8_filter_by_weight8x8_sse2):
; unsigned int *variance, 4 ; unsigned int *variance, 4
; unsigned int *sad, 5 ; unsigned int *sad, 5
;) ;)
global sym(vp8_variance_and_sad_16x16_sse2) PRIVATE globalsym(vp8_variance_and_sad_16x16_sse2)
sym(vp8_variance_and_sad_16x16_sse2): sym(vp8_variance_and_sad_16x16_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -19,7 +19,7 @@ SECTION .text
; unsigned char *dst, ; unsigned char *dst,
; int dst_stride ; int dst_stride
; ) ; )
global sym(vp8_copy_mem8x8_mmx) PRIVATE globalsym(vp8_copy_mem8x8_mmx)
sym(vp8_copy_mem8x8_mmx): sym(vp8_copy_mem8x8_mmx):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -82,7 +82,7 @@ sym(vp8_copy_mem8x8_mmx):
; unsigned char *dst, ; unsigned char *dst,
; int dst_stride ; int dst_stride
; ) ; )
global sym(vp8_copy_mem8x4_mmx) PRIVATE globalsym(vp8_copy_mem8x4_mmx)
sym(vp8_copy_mem8x4_mmx): sym(vp8_copy_mem8x4_mmx):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -19,7 +19,7 @@ SECTION .text
; unsigned char *dst, ; unsigned char *dst,
; int dst_stride ; int dst_stride
; ) ; )
global sym(vp8_copy_mem16x16_sse2) PRIVATE globalsym(vp8_copy_mem16x16_sse2)
sym(vp8_copy_mem16x16_sse2): sym(vp8_copy_mem16x16_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -27,7 +27,7 @@ SECTION .text
; unsigned int output_width, ; unsigned int output_width,
; short * vp8_filter ; short * vp8_filter
;) ;)
global sym(vp8_filter_block1d_h6_mmx) PRIVATE globalsym(vp8_filter_block1d_h6_mmx)
sym(vp8_filter_block1d_h6_mmx): sym(vp8_filter_block1d_h6_mmx):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -124,7 +124,7 @@ sym(vp8_filter_block1d_h6_mmx):
; unsigned int output_width, ; unsigned int output_width,
; short * vp8_filter ; short * vp8_filter
;) ;)
global sym(vp8_filter_block1dc_v6_mmx) PRIVATE globalsym(vp8_filter_block1dc_v6_mmx)
sym(vp8_filter_block1dc_v6_mmx): sym(vp8_filter_block1dc_v6_mmx):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -33,7 +33,7 @@ SECTION .text
; unsigned int output_width, ; unsigned int output_width,
; short *vp8_filter ; short *vp8_filter
;) ;)
global sym(vp8_filter_block1d8_h6_sse2) PRIVATE globalsym(vp8_filter_block1d8_h6_sse2)
sym(vp8_filter_block1d8_h6_sse2): sym(vp8_filter_block1d8_h6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -153,7 +153,7 @@ sym(vp8_filter_block1d8_h6_sse2):
; even number. This function handles 8 pixels in horizontal direction, calculating ONE ; even number. This function handles 8 pixels in horizontal direction, calculating ONE
; rows each iteration to take advantage of the 128 bits operations. ; rows each iteration to take advantage of the 128 bits operations.
;*************************************************************************************/ ;*************************************************************************************/
global sym(vp8_filter_block1d16_h6_sse2) PRIVATE globalsym(vp8_filter_block1d16_h6_sse2)
sym(vp8_filter_block1d16_h6_sse2): sym(vp8_filter_block1d16_h6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -333,7 +333,7 @@ sym(vp8_filter_block1d16_h6_sse2):
; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The
; input pixel array has output_height rows. ; input pixel array has output_height rows.
;*************************************************************************************/ ;*************************************************************************************/
global sym(vp8_filter_block1d8_v6_sse2) PRIVATE globalsym(vp8_filter_block1d8_v6_sse2)
sym(vp8_filter_block1d8_v6_sse2): sym(vp8_filter_block1d8_v6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -428,7 +428,7 @@ sym(vp8_filter_block1d8_v6_sse2):
; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
; input pixel array has output_height rows. ; input pixel array has output_height rows.
;*************************************************************************************/ ;*************************************************************************************/
global sym(vp8_filter_block1d16_v6_sse2) PRIVATE globalsym(vp8_filter_block1d16_v6_sse2)
sym(vp8_filter_block1d16_v6_sse2): sym(vp8_filter_block1d16_v6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -538,7 +538,7 @@ sym(vp8_filter_block1d16_v6_sse2):
; const short *vp8_filter ; const short *vp8_filter
;) ;)
; First-pass filter only when yoffset==0 ; First-pass filter only when yoffset==0
global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE globalsym(vp8_filter_block1d8_h6_only_sse2)
sym(vp8_filter_block1d8_h6_only_sse2): sym(vp8_filter_block1d8_h6_only_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -651,7 +651,7 @@ sym(vp8_filter_block1d8_h6_only_sse2):
; const short *vp8_filter ; const short *vp8_filter
;) ;)
; First-pass filter only when yoffset==0 ; First-pass filter only when yoffset==0
global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE globalsym(vp8_filter_block1d16_h6_only_sse2)
sym(vp8_filter_block1d16_h6_only_sse2): sym(vp8_filter_block1d16_h6_only_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -816,7 +816,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
; const short *vp8_filter ; const short *vp8_filter
;) ;)
; Second-pass filter only when xoffset==0 ; Second-pass filter only when xoffset==0
global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE globalsym(vp8_filter_block1d8_v6_only_sse2)
sym(vp8_filter_block1d8_v6_only_sse2): sym(vp8_filter_block1d8_v6_only_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -908,7 +908,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
; unsigned int output_height, ; unsigned int output_height,
; unsigned int output_width ; unsigned int output_width
;) ;)
global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE globalsym(vp8_unpack_block1d16_h6_sse2)
sym(vp8_unpack_block1d16_h6_sse2): sym(vp8_unpack_block1d16_h6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -35,7 +35,7 @@ SECTION .text
; unsigned int output_height, ; unsigned int output_height,
; unsigned int vp8_filter_index ; unsigned int vp8_filter_index
;) ;)
global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE globalsym(vp8_filter_block1d8_h6_ssse3)
sym(vp8_filter_block1d8_h6_ssse3): sym(vp8_filter_block1d8_h6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -178,7 +178,7 @@ vp8_filter_block1d8_h4_ssse3:
; unsigned int output_height, ; unsigned int output_height,
; unsigned int vp8_filter_index ; unsigned int vp8_filter_index
;) ;)
global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE globalsym(vp8_filter_block1d16_h6_ssse3)
sym(vp8_filter_block1d16_h6_ssse3): sym(vp8_filter_block1d16_h6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -285,7 +285,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
; unsigned int output_height, ; unsigned int output_height,
; unsigned int vp8_filter_index ; unsigned int vp8_filter_index
;) ;)
global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE globalsym(vp8_filter_block1d4_h6_ssse3)
sym(vp8_filter_block1d4_h6_ssse3): sym(vp8_filter_block1d4_h6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -415,7 +415,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
; unsigned int output_height, ; unsigned int output_height,
; unsigned int vp8_filter_index ; unsigned int vp8_filter_index
;) ;)
global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE globalsym(vp8_filter_block1d16_v6_ssse3)
sym(vp8_filter_block1d16_v6_ssse3): sym(vp8_filter_block1d16_v6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -603,7 +603,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
; unsigned int output_height, ; unsigned int output_height,
; unsigned int vp8_filter_index ; unsigned int vp8_filter_index
;) ;)
global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE globalsym(vp8_filter_block1d8_v6_ssse3)
sym(vp8_filter_block1d8_v6_ssse3): sym(vp8_filter_block1d8_v6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -743,7 +743,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
; unsigned int output_height, ; unsigned int output_height,
; unsigned int vp8_filter_index ; unsigned int vp8_filter_index
;) ;)
global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE globalsym(vp8_filter_block1d4_v6_ssse3)
sym(vp8_filter_block1d4_v6_ssse3): sym(vp8_filter_block1d4_v6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -882,7 +882,7 @@ sym(vp8_filter_block1d4_v6_ssse3):
; unsigned char *dst_ptr, ; unsigned char *dst_ptr,
; int dst_pitch ; int dst_pitch
;) ;)
global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE globalsym(vp8_bilinear_predict16x16_ssse3)
sym(vp8_bilinear_predict16x16_ssse3): sym(vp8_bilinear_predict16x16_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -1145,7 +1145,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
; unsigned char *dst_ptr, ; unsigned char *dst_ptr,
; int dst_pitch ; int dst_pitch
;) ;)
global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE globalsym(vp8_bilinear_predict8x8_ssse3)
sym(vp8_bilinear_predict8x8_ssse3): sym(vp8_bilinear_predict8x8_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -222,7 +222,7 @@ void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount) {
validate_buffer(w->buffer + w->pos, 1, w->buffer_end, w->error); validate_buffer(w->buffer + w->pos, 1, w->buffer_end, w->error);
w->buffer[w->pos++] = (lowvalue >> (24 - offset)); w->buffer[w->pos++] = (lowvalue >> (24 - offset)) & 0xff;
lowvalue <<= offset; lowvalue <<= offset;
shift = count; shift = count;
lowvalue &= 0xffffff; lowvalue &= 0xffffff;

View file

@ -24,19 +24,19 @@
"punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ "punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \
"punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ "punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
"or %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \ "por %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \
"punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \ "punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \
"punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ "punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
"or %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \ "por %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \
"punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ "punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \
"punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ "punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
"or %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \ "por %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \
"punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ "punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \
"punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ "punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \
"pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
"or %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ "por %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
"punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \ "punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \
"punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \ "punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \
"punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \ "punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \
@ -46,6 +46,7 @@
void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) { void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
uint64_t tmp[1]; uint64_t tmp[1];
int16_t *ip = input; int16_t *ip = input;
double ff_ph_op1, ff_ph_op3;
#if _MIPS_SIM == _ABIO32 #if _MIPS_SIM == _ABIO32
register double ftmp0 asm("$f0"); register double ftmp0 asm("$f0");
@ -83,14 +84,17 @@ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
DECLARE_ALIGNED(8, const uint64_t, ff_pw_51000) = { 0x0000c7380000c738ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_51000) = { 0x0000c7380000c738ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_14500) = { 0x000038a4000038a4ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_14500) = { 0x000038a4000038a4ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_7500) = { 0x00001d4c00001d4cULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_7500) = { 0x00001d4c00001d4cULL };
DECLARE_ALIGNED(8, const uint64_t, ff_ph_op1) = { 0x14e808a914e808a9ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_ph_op3) = { 0xeb1808a9eb1808a9ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_5352) = { 0x000014e8000014e8ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_5352) = { 0x000014e8000014e8ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_2217) = { 0x000008a9000008a9ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_2217) = { 0x000008a9000008a9ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_ph_8) = { 0x0008000800080008ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_ph_8) = { 0x0008000800080008ULL };
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "dli %[tmp0], 0x14e808a914e808a9 \n\t"
"dmtc1 %[tmp0], %[ff_ph_op1] \n\t"
"dli %[tmp0], 0xeb1808a9eb1808a9 \n\t"
"dmtc1 %[tmp0], %[ff_ph_op3] \n\t"
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
MMI_ADDU(%[ip], %[ip], %[pitch]) MMI_ADDU(%[ip], %[ip], %[pitch])
@ -129,7 +133,7 @@ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
// op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12 // op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12
MMI_LI(%[tmp0], 0x0c) MMI_LI(%[tmp0], 0x0c)
"mtc1 %[tmp0], %[ftmp11] \n\t" "dmtc1 %[tmp0], %[ftmp11] \n\t"
"ldc1 %[ftmp12], %[ff_pw_14500] \n\t" "ldc1 %[ftmp12], %[ff_pw_14500] \n\t"
"punpcklhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" "punpcklhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
"pmaddhw %[ftmp5], %[ftmp9], %[ff_ph_op1] \n\t" "pmaddhw %[ftmp5], %[ftmp9], %[ff_ph_op1] \n\t"
@ -169,7 +173,7 @@ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
"paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"paddh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "paddh %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
MMI_LI(%[tmp0], 0x04) MMI_LI(%[tmp0], 0x04)
"mtc1 %[tmp0], %[ftmp9] \n\t" "dmtc1 %[tmp0], %[ftmp9] \n\t"
"psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
@ -211,15 +215,16 @@ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
[ftmp3] "=&f"(ftmp3), [ftmp4] "=&f"(ftmp4), [ftmp5] "=&f"(ftmp5), [ftmp3] "=&f"(ftmp3), [ftmp4] "=&f"(ftmp4), [ftmp5] "=&f"(ftmp5),
[ftmp6] "=&f"(ftmp6), [ftmp7] "=&f"(ftmp7), [ftmp8] "=&f"(ftmp8), [ftmp6] "=&f"(ftmp6), [ftmp7] "=&f"(ftmp7), [ftmp8] "=&f"(ftmp8),
[ftmp9] "=&f"(ftmp9), [ftmp10] "=&f"(ftmp10), [ftmp11] "=&f"(ftmp11), [ftmp9] "=&f"(ftmp9), [ftmp10] "=&f"(ftmp10), [ftmp11] "=&f"(ftmp11),
[ftmp12] "=&f"(ftmp12), [tmp0] "=&r"(tmp[0]), [ip]"+&r"(ip) [ftmp12] "=&f"(ftmp12), [tmp0] "=&r"(tmp[0]), [ip]"+&r"(ip),
[ff_ph_op1] "=&f"(ff_ph_op1), [ff_ph_op3] "=&f"(ff_ph_op3)
: [ff_ph_01] "m"(ff_ph_01), [ff_ph_07] "m"(ff_ph_07), : [ff_ph_01] "m"(ff_ph_01), [ff_ph_07] "m"(ff_ph_07),
[ff_ph_op1] "f"(ff_ph_op1), [ff_ph_op3] "f"(ff_ph_op3),
[ff_pw_14500] "m"(ff_pw_14500), [ff_pw_7500] "m"(ff_pw_7500), [ff_pw_14500] "m"(ff_pw_14500), [ff_pw_7500] "m"(ff_pw_7500),
[ff_pw_12000] "m"(ff_pw_12000), [ff_pw_51000] "m"(ff_pw_51000), [ff_pw_12000] "m"(ff_pw_12000), [ff_pw_51000] "m"(ff_pw_51000),
[ff_pw_5352]"m"(ff_pw_5352), [ff_pw_2217]"m"(ff_pw_2217), [ff_pw_5352]"m"(ff_pw_5352), [ff_pw_2217]"m"(ff_pw_2217),
[ff_ph_8]"m"(ff_ph_8), [pitch]"r"(pitch), [output] "r"(output) [ff_ph_8]"m"(ff_ph_8), [pitch]"r"(pitch), [output] "r"(output)
: "memory" : "memory"
); );
/* clang-format on */
} }
void vp8_short_fdct8x4_mmi(int16_t *input, int16_t *output, int pitch) { void vp8_short_fdct8x4_mmi(int16_t *input, int16_t *output, int pitch) {
@ -228,17 +233,22 @@ void vp8_short_fdct8x4_mmi(int16_t *input, int16_t *output, int pitch) {
} }
void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) { void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
double ftmp[13]; double ftmp[13], ff_ph_01, ff_pw_01, ff_pw_03, ff_pw_mask;
uint32_t tmp[1]; uint64_t tmp[1];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_01) = { 0x0001000100010001ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_01) = { 0x0000000100000001ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_03) = { 0x0000000300000003ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_mask) = { 0x0001000000010000ULL };
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"dli %[tmp0], 0x0001000100010001 \n\t"
"dmtc1 %[tmp0], %[ff_ph_01] \n\t"
"dli %[tmp0], 0x0000000100000001 \n\t"
"dmtc1 %[tmp0], %[ff_pw_01] \n\t"
"dli %[tmp0], 0x0000000300000003 \n\t"
"dmtc1 %[tmp0], %[ff_pw_03] \n\t"
"dli %[tmp0], 0x0001000000010000 \n\t"
"dmtc1 %[tmp0], %[ff_pw_mask] \n\t"
MMI_LI(%[tmp0], 0x02) MMI_LI(%[tmp0], 0x02)
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "dmtc1 %[tmp0], %[ftmp11] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
@ -337,52 +347,52 @@ void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
"psubw %[ftmp4], %[ftmp9], %[ftmp10] \n\t" "psubw %[ftmp4], %[ftmp9], %[ftmp10] \n\t"
MMI_LI(%[tmp0], 0x03) MMI_LI(%[tmp0], 0x03)
"mtc1 %[tmp0], %[ftmp11] \n\t" "dmtc1 %[tmp0], %[ftmp11] \n\t"
"pcmpgtw %[ftmp9], %[ftmp0], %[ftmp1] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp1] \n\t"
"and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "pand %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t"
"paddw %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "paddw %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"paddw %[ftmp1], %[ftmp1], %[ff_pw_03] \n\t" "paddw %[ftmp1], %[ftmp1], %[ff_pw_03] \n\t"
"psraw %[ftmp1], %[ftmp1], %[ftmp11] \n\t" "psraw %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
"pcmpgtw %[ftmp9], %[ftmp0], %[ftmp2] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp2] \n\t"
"and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "pand %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t"
"paddw %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "paddw %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddw %[ftmp2], %[ftmp2], %[ff_pw_03] \n\t" "paddw %[ftmp2], %[ftmp2], %[ff_pw_03] \n\t"
"psraw %[ftmp2], %[ftmp2], %[ftmp11] \n\t" "psraw %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
"pcmpgtw %[ftmp9], %[ftmp0], %[ftmp3] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp3] \n\t"
"and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "pand %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t"
"paddw %[ftmp3], %[ftmp3], %[ftmp9] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
"paddw %[ftmp3], %[ftmp3], %[ff_pw_03] \n\t" "paddw %[ftmp3], %[ftmp3], %[ff_pw_03] \n\t"
"psraw %[ftmp3], %[ftmp3], %[ftmp11] \n\t" "psraw %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
"pcmpgtw %[ftmp9], %[ftmp0], %[ftmp4] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp4] \n\t"
"and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "pand %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t"
"paddw %[ftmp4], %[ftmp4], %[ftmp9] \n\t" "paddw %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
"paddw %[ftmp4], %[ftmp4], %[ff_pw_03] \n\t" "paddw %[ftmp4], %[ftmp4], %[ff_pw_03] \n\t"
"psraw %[ftmp4], %[ftmp4], %[ftmp11] \n\t" "psraw %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
"pcmpgtw %[ftmp9], %[ftmp0], %[ftmp5] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp5] \n\t"
"and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "pand %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t"
"paddw %[ftmp5], %[ftmp5], %[ftmp9] \n\t" "paddw %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
"paddw %[ftmp5], %[ftmp5], %[ff_pw_03] \n\t" "paddw %[ftmp5], %[ftmp5], %[ff_pw_03] \n\t"
"psraw %[ftmp5], %[ftmp5], %[ftmp11] \n\t" "psraw %[ftmp5], %[ftmp5], %[ftmp11] \n\t"
"pcmpgtw %[ftmp9], %[ftmp0], %[ftmp6] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp6] \n\t"
"and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "pand %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t"
"paddw %[ftmp6], %[ftmp6], %[ftmp9] \n\t" "paddw %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"paddw %[ftmp6], %[ftmp6], %[ff_pw_03] \n\t" "paddw %[ftmp6], %[ftmp6], %[ff_pw_03] \n\t"
"psraw %[ftmp6], %[ftmp6], %[ftmp11] \n\t" "psraw %[ftmp6], %[ftmp6], %[ftmp11] \n\t"
"pcmpgtw %[ftmp9], %[ftmp0], %[ftmp7] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp7] \n\t"
"and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "pand %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t"
"paddw %[ftmp7], %[ftmp7], %[ftmp9] \n\t" "paddw %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
"paddw %[ftmp7], %[ftmp7], %[ff_pw_03] \n\t" "paddw %[ftmp7], %[ftmp7], %[ff_pw_03] \n\t"
"psraw %[ftmp7], %[ftmp7], %[ftmp11] \n\t" "psraw %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
"pcmpgtw %[ftmp9], %[ftmp0], %[ftmp8] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp8] \n\t"
"and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "pand %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t"
"paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t"
"paddw %[ftmp8], %[ftmp8], %[ff_pw_03] \n\t" "paddw %[ftmp8], %[ftmp8], %[ff_pw_03] \n\t"
"psraw %[ftmp8], %[ftmp8], %[ftmp11] \n\t" "psraw %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
@ -393,7 +403,7 @@ void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
"packsswh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" "packsswh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
MMI_LI(%[tmp0], 0x72) MMI_LI(%[tmp0], 0x72)
"mtc1 %[tmp0], %[ftmp11] \n\t" "dmtc1 %[tmp0], %[ftmp11] \n\t"
"pshufh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" "pshufh %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
"pshufh %[ftmp2], %[ftmp2], %[ftmp11] \n\t" "pshufh %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
"pshufh %[ftmp3], %[ftmp3], %[ftmp11] \n\t" "pshufh %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
@ -413,13 +423,12 @@ void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[12]), [ff_pw_mask]"=&f"(ff_pw_mask),
[tmp0]"=&r"(tmp[0]), [tmp0]"=&r"(tmp[0]), [ff_pw_01]"=&f"(ff_pw_01),
[ip]"+&r"(input) [ip]"+&r"(input), [ff_pw_03]"=&f"(ff_pw_03),
: [op]"r"(output), [ff_ph_01]"=&f"(ff_ph_01)
[ff_pw_01]"f"(ff_pw_01), [pitch]"r"((mips_reg)pitch), : [op]"r"(output), [pitch]"r"((mips_reg)pitch)
[ff_pw_03]"f"(ff_pw_03), [ff_pw_mask]"f"(ff_pw_mask),
[ff_ph_01]"f"(ff_ph_01)
: "memory" : "memory"
); );
/* clang-format on */
} }

View file

@ -42,24 +42,25 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
double ftmp[13]; double ftmp[13];
uint64_t tmp[1]; uint64_t tmp[1];
DECLARE_ALIGNED(8, const uint64_t, ones) = { 0xffffffffffffffffULL }; int64_t eob = 0;
int eob = 0; double ones;
__asm__ volatile( __asm__ volatile(
// loop 0 ~ 7 // loop 0 ~ 7
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"pcmpeqh %[ones], %[ones], %[ones] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[coeff_ptr]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[coeff_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[coeff_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[coeff_ptr]) \n\t"
"li %[tmp0], 0x0f \n\t" "dli %[tmp0], 0x0f \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t" "dmtc1 %[tmp0], %[ftmp9] \n\t"
"gsldlc1 %[ftmp2], 0x0f(%[coeff_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x0f(%[coeff_ptr]) \n\t"
"gsldrc1 %[ftmp2], 0x08(%[coeff_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[coeff_ptr]) \n\t"
"psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t" "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t"
"xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t" "pxor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
"psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t" "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t"
"xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t" "pxor %[ftmp2], %[ftmp4], %[ftmp2] \n\t"
"psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
"gsldlc1 %[ftmp5], 0x07(%[round_ptr]) \n\t" "gsldlc1 %[ftmp5], 0x07(%[round_ptr]) \n\t"
@ -75,8 +76,8 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
"pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
"pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t" "pxor %[ftmp7], %[ftmp5], %[ftmp3] \n\t"
"xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t" "pxor %[ftmp8], %[ftmp6], %[ftmp4] \n\t"
"psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
"gssdlc1 %[ftmp7], 0x07(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp7], 0x07(%[qcoeff_ptr]) \n\t"
@ -90,10 +91,10 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
"gsldrc1 %[ftmp2], 0x08(%[inv_zig_zag]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[inv_zig_zag]) \n\t"
"pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
"pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"xor %[ftmp5], %[ftmp5], %[ones] \n\t" "pxor %[ftmp5], %[ftmp5], %[ones] \n\t"
"xor %[ftmp6], %[ftmp6], %[ones] \n\t" "pxor %[ftmp6], %[ftmp6], %[ones] \n\t"
"and %[ftmp5], %[ftmp5], %[ftmp1] \n\t" "pand %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
"and %[ftmp6], %[ftmp6], %[ftmp2] \n\t" "pand %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"pmaxsh %[ftmp10], %[ftmp5], %[ftmp6] \n\t" "pmaxsh %[ftmp10], %[ftmp5], %[ftmp6] \n\t"
"gsldlc1 %[ftmp5], 0x07(%[dequant_ptr]) \n\t" "gsldlc1 %[ftmp5], 0x07(%[dequant_ptr]) \n\t"
@ -114,10 +115,10 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
"gsldrc1 %[ftmp2], 0x18(%[coeff_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x18(%[coeff_ptr]) \n\t"
"psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t" "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t"
"xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t" "pxor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
"psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t" "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t"
"xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t" "pxor %[ftmp2], %[ftmp4], %[ftmp2] \n\t"
"psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
"gsldlc1 %[ftmp5], 0x17(%[round_ptr]) \n\t" "gsldlc1 %[ftmp5], 0x17(%[round_ptr]) \n\t"
@ -133,8 +134,8 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
"pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
"pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t" "pxor %[ftmp7], %[ftmp5], %[ftmp3] \n\t"
"xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t" "pxor %[ftmp8], %[ftmp6], %[ftmp4] \n\t"
"psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
"gssdlc1 %[ftmp7], 0x17(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp7], 0x17(%[qcoeff_ptr]) \n\t"
@ -148,10 +149,10 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
"gsldrc1 %[ftmp2], 0x18(%[inv_zig_zag]) \n\t" "gsldrc1 %[ftmp2], 0x18(%[inv_zig_zag]) \n\t"
"pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
"pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"xor %[ftmp5], %[ftmp5], %[ones] \n\t" "pxor %[ftmp5], %[ftmp5], %[ones] \n\t"
"xor %[ftmp6], %[ftmp6], %[ones] \n\t" "pxor %[ftmp6], %[ftmp6], %[ones] \n\t"
"and %[ftmp5], %[ftmp5], %[ftmp1] \n\t" "pand %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
"and %[ftmp6], %[ftmp6], %[ftmp2] \n\t" "pand %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"pmaxsh %[ftmp11], %[ftmp5], %[ftmp6] \n\t" "pmaxsh %[ftmp11], %[ftmp5], %[ftmp6] \n\t"
"gsldlc1 %[ftmp5], 0x17(%[dequant_ptr]) \n\t" "gsldlc1 %[ftmp5], 0x17(%[dequant_ptr]) \n\t"
@ -165,34 +166,34 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
"gssdlc1 %[ftmp6], 0x1f(%[dqcoeff_ptr]) \n\t" "gssdlc1 %[ftmp6], 0x1f(%[dqcoeff_ptr]) \n\t"
"gssdrc1 %[ftmp6], 0x18(%[dqcoeff_ptr]) \n\t" "gssdrc1 %[ftmp6], 0x18(%[dqcoeff_ptr]) \n\t"
"li %[tmp0], 0x10 \n\t" "dli %[tmp0], 0x10 \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t" "dmtc1 %[tmp0], %[ftmp9] \n\t"
"pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"psrlw %[ftmp11], %[ftmp10], %[ftmp9] \n\t" "psrlw %[ftmp11], %[ftmp10], %[ftmp9] \n\t"
"pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"li %[tmp0], 0xaa \n\t" "dli %[tmp0], 0xaa \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t" "dmtc1 %[tmp0], %[ftmp9] \n\t"
"pshufh %[ftmp11], %[ftmp10], %[ftmp9] \n\t" "pshufh %[ftmp11], %[ftmp10], %[ftmp9] \n\t"
"pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"li %[tmp0], 0xffff \n\t" "dli %[tmp0], 0xffff \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t" "dmtc1 %[tmp0], %[ftmp9] \n\t"
"and %[ftmp10], %[ftmp10], %[ftmp9] \n\t" "pand %[ftmp10], %[ftmp10], %[ftmp9] \n\t"
"gssdlc1 %[ftmp10], 0x07(%[eob]) \n\t" "gssdlc1 %[ftmp10], 0x07(%[eob]) \n\t"
"gssdrc1 %[ftmp10], 0x00(%[eob]) \n\t" "gssdrc1 %[ftmp10], 0x00(%[eob]) \n\t"
: [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
[ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
[ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]), [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]),
[ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
[ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
[tmp0] "=&r"(tmp[0]), [ones] "=&f"(ones)
: [coeff_ptr] "r"((mips_reg)coeff_ptr), : [coeff_ptr] "r"((mips_reg)coeff_ptr),
[qcoeff_ptr] "r"((mips_reg)qcoeff_ptr), [qcoeff_ptr] "r"((mips_reg)qcoeff_ptr),
[dequant_ptr] "r"((mips_reg)dequant_ptr), [dequant_ptr] "r"((mips_reg)dequant_ptr),
[round_ptr] "r"((mips_reg)round_ptr), [round_ptr] "r"((mips_reg)round_ptr),
[quant_ptr] "r"((mips_reg)quant_ptr), [quant_ptr] "r"((mips_reg)quant_ptr),
[dqcoeff_ptr] "r"((mips_reg)dqcoeff_ptr), [dqcoeff_ptr] "r"((mips_reg)dqcoeff_ptr),
[inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob), [inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob)
[ones] "f"(ones)
: "memory"); : "memory");
*d->eob = eob; *d->eob = eob;
@ -217,7 +218,7 @@ void vp8_regular_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
// memset(dqcoeff_ptr, 0, 32); // memset(dqcoeff_ptr, 0, 32);
/* clang-format off */ /* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gssdlc1 %[ftmp0], 0x07(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x07(%[qcoeff_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[qcoeff_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[qcoeff_ptr]) \n\t"
"gssdlc1 %[ftmp0], 0x0f(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x0f(%[qcoeff_ptr]) \n\t"

View file

@ -1430,6 +1430,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) {
VP8_COMMON *cm = &cpi->common; VP8_COMMON *cm = &cpi->common;
int last_w, last_h; int last_w, last_h;
unsigned int prev_number_of_layers; unsigned int prev_number_of_layers;
unsigned int raw_target_rate;
if (!cpi) return; if (!cpi) return;
@ -1570,6 +1571,10 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) {
cpi->oxcf.maximum_buffer_size_in_ms = 240000; cpi->oxcf.maximum_buffer_size_in_ms = 240000;
} }
raw_target_rate = (unsigned int)((int64_t)cpi->oxcf.Width * cpi->oxcf.Height *
8 * 3 * cpi->framerate / 1000);
if (cpi->oxcf.target_bandwidth > raw_target_rate)
cpi->oxcf.target_bandwidth = raw_target_rate;
/* Convert target bandwidth from Kbit/s to Bit/s */ /* Convert target bandwidth from Kbit/s to Bit/s */
cpi->oxcf.target_bandwidth *= 1000; cpi->oxcf.target_bandwidth *= 1000;
@ -3615,7 +3620,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
if (cpi->this_key_frame_forced) { if (cpi->this_key_frame_forced) {
if (cpi->active_best_quality > cpi->avg_frame_qindex * 7 / 8) { if (cpi->active_best_quality > cpi->avg_frame_qindex * 7 / 8) {
cpi->active_best_quality = cpi->avg_frame_qindex * 7 / 8; cpi->active_best_quality = cpi->avg_frame_qindex * 7 / 8;
} else if (cpi->active_best_quality<cpi->avg_frame_qindex>> 2) { } else if (cpi->active_best_quality < (cpi->avg_frame_qindex >> 2)) {
cpi->active_best_quality = cpi->avg_frame_qindex >> 2; cpi->active_best_quality = cpi->avg_frame_qindex >> 2;
} }
} }
@ -4533,9 +4538,11 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
/* Actual bits spent */ /* Actual bits spent */
cpi->total_actual_bits += cpi->projected_frame_size; cpi->total_actual_bits += cpi->projected_frame_size;
#if 0 && CONFIG_INTERNAL_STATS
/* Debug stats */ /* Debug stats */
cpi->total_target_vs_actual += cpi->total_target_vs_actual +=
(cpi->this_frame_target - cpi->projected_frame_size); (cpi->this_frame_target - cpi->projected_frame_size);
#endif
cpi->buffer_level = cpi->bits_off_target; cpi->buffer_level = cpi->bits_off_target;

View file

@ -14,6 +14,8 @@
/* Trees map alphabets into huffman-like codes suitable for an arithmetic /* Trees map alphabets into huffman-like codes suitable for an arithmetic
bit coder. Timothy S Murphy 11 October 2004 */ bit coder. Timothy S Murphy 11 October 2004 */
#include <stdint.h>
#include "./vpx_config.h" #include "./vpx_config.h"
#include "vp8/common/treecoder.h" #include "vp8/common/treecoder.h"
@ -48,7 +50,9 @@ static INLINE unsigned int vp8_cost_branch(const unsigned int ct[2],
vp8_prob p) { vp8_prob p) {
/* Imitate existing calculation */ /* Imitate existing calculation */
return ((ct[0] * vp8_cost_zero(p)) + (ct[1] * vp8_cost_one(p))) >> 8; return (unsigned int)(((((uint64_t)ct[0]) * vp8_cost_zero(p)) +
(((uint64_t)ct[1]) * vp8_cost_one(p))) >>
8);
} }
/* Small functions to write explicit values and tokens, as well as /* Small functions to write explicit values and tokens, as well as

View file

@ -14,7 +14,7 @@
SECTION .text SECTION .text
;int vp8_block_error_sse2(short *coeff_ptr, short *dcoef_ptr) ;int vp8_block_error_sse2(short *coeff_ptr, short *dcoef_ptr)
global sym(vp8_block_error_sse2) PRIVATE globalsym(vp8_block_error_sse2)
sym(vp8_block_error_sse2): sym(vp8_block_error_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -62,7 +62,7 @@ sym(vp8_block_error_sse2):
ret ret
;int vp8_mbblock_error_sse2_impl(short *coeff_ptr, short *dcoef_ptr, int dc); ;int vp8_mbblock_error_sse2_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
global sym(vp8_mbblock_error_sse2_impl) PRIVATE globalsym(vp8_mbblock_error_sse2_impl)
sym(vp8_mbblock_error_sse2_impl): sym(vp8_mbblock_error_sse2_impl):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -132,7 +132,7 @@ sym(vp8_mbblock_error_sse2_impl):
;int vp8_mbuverror_sse2_impl(short *s_ptr, short *d_ptr); ;int vp8_mbuverror_sse2_impl(short *s_ptr, short *d_ptr);
global sym(vp8_mbuverror_sse2_impl) PRIVATE globalsym(vp8_mbuverror_sse2_impl)
sym(vp8_mbuverror_sse2_impl): sym(vp8_mbuverror_sse2_impl):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -19,7 +19,7 @@ SECTION .text
; unsigned char *dst_ptr, ; unsigned char *dst_ptr,
; int dst_stride, ; int dst_stride,
; int height); ; int height);
global sym(vp8_copy32xn_sse2) PRIVATE globalsym(vp8_copy32xn_sse2)
sym(vp8_copy32xn_sse2): sym(vp8_copy32xn_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -91,7 +91,7 @@ SECTION .text
; unsigned char *dst_ptr, ; unsigned char *dst_ptr,
; int dst_stride, ; int dst_stride,
; int height); ; int height);
global sym(vp8_copy32xn_sse3) PRIVATE globalsym(vp8_copy32xn_sse3)
sym(vp8_copy32xn_sse3): sym(vp8_copy32xn_sse3):
STACK_FRAME_CREATE_X3 STACK_FRAME_CREATE_X3

View file

@ -63,7 +63,7 @@
SECTION .text SECTION .text
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch) ;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
global sym(vp8_short_fdct4x4_sse2) PRIVATE globalsym(vp8_short_fdct4x4_sse2)
sym(vp8_short_fdct4x4_sse2): sym(vp8_short_fdct4x4_sse2):
STACK_FRAME_CREATE STACK_FRAME_CREATE
@ -168,7 +168,7 @@ sym(vp8_short_fdct4x4_sse2):
STACK_FRAME_DESTROY STACK_FRAME_DESTROY
;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch) ;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
global sym(vp8_short_fdct8x4_sse2) PRIVATE globalsym(vp8_short_fdct8x4_sse2)
sym(vp8_short_fdct8x4_sse2): sym(vp8_short_fdct8x4_sse2):
STACK_FRAME_CREATE STACK_FRAME_CREATE

View file

@ -14,7 +14,7 @@
SECTION .text SECTION .text
;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch) ;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch)
global sym(vp8_short_walsh4x4_sse2) PRIVATE globalsym(vp8_short_walsh4x4_sse2)
sym(vp8_short_walsh4x4_sse2): sym(vp8_short_walsh4x4_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -22,7 +22,7 @@ SECTION .text
; int filter_weight, | 5 ; int filter_weight, | 5
; unsigned int *accumulator, | 6 ; unsigned int *accumulator, | 6
; unsigned short *count) | 7 ; unsigned short *count) | 7
global sym(vp8_temporal_filter_apply_sse2) PRIVATE globalsym(vp8_temporal_filter_apply_sse2)
sym(vp8_temporal_filter_apply_sse2): sym(vp8_temporal_filter_apply_sse2):
push rbp push rbp

View file

@ -264,9 +264,12 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
const vpx_image_t *img) { const vpx_image_t *img) {
switch (img->fmt) { switch (img->fmt) {
case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_I420: break; case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_NV12: break;
default: default:
ERROR("Invalid image format. Only YV12 and I420 images are supported"); ERROR(
"Invalid image format. Only YV12, I420 and NV12 images are "
"supported");
} }
if ((img->d_w != ctx->cfg.g_w) || (img->d_h != ctx->cfg.g_h)) if ((img->d_w != ctx->cfg.g_w) || (img->d_h != ctx->cfg.g_h))

View file

@ -687,7 +687,7 @@ static vpx_codec_err_t vp8_set_decryptor(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK; return VPX_CODEC_OK;
} }
vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = { static vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = {
{ VP8_SET_REFERENCE, vp8_set_reference }, { VP8_SET_REFERENCE, vp8_set_reference },
{ VP8_COPY_REFERENCE, vp8_get_reference }, { VP8_COPY_REFERENCE, vp8_get_reference },
{ VP8_SET_POSTPROC, vp8_set_postproc }, { VP8_SET_POSTPROC, vp8_set_postproc },

View file

@ -77,10 +77,8 @@ typedef struct {
// properly. // properly.
int frame_index; // Display order in the video, it's equivalent to the int frame_index; // Display order in the video, it's equivalent to the
// show_idx defined in EncodeFrameInfo. // show_idx defined in EncodeFrameInfo.
#if CONFIG_RATE_CTRL
int frame_coding_index; // The coding order (starting from zero) of this int frame_coding_index; // The coding order (starting from zero) of this
// frame. // frame.
#endif // CONFIG_RATE_CTRL
vpx_codec_frame_buffer_t raw_frame_buffer; vpx_codec_frame_buffer_t raw_frame_buffer;
YV12_BUFFER_CONFIG buf; YV12_BUFFER_CONFIG buf;
} RefCntBuffer; } RefCntBuffer;
@ -240,13 +238,11 @@ typedef struct VP9Common {
// TODO(angiebird): current_video_frame/current_frame_coding_index into a // TODO(angiebird): current_video_frame/current_frame_coding_index into a
// structure // structure
unsigned int current_video_frame; unsigned int current_video_frame;
#if CONFIG_RATE_CTRL
// Each show or no show frame is assigned with a coding index based on its // Each show or no show frame is assigned with a coding index based on its
// coding order (starting from zero). // coding order (starting from zero).
// Current frame's coding index. // Current frame's coding index.
int current_frame_coding_index; int current_frame_coding_index;
#endif
BITSTREAM_PROFILE profile; BITSTREAM_PROFILE profile;
// VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3. // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3.
@ -276,9 +272,7 @@ typedef struct VP9Common {
static INLINE void init_frame_indexes(VP9_COMMON *cm) { static INLINE void init_frame_indexes(VP9_COMMON *cm) {
cm->current_video_frame = 0; cm->current_video_frame = 0;
#if CONFIG_RATE_CTRL
cm->current_frame_coding_index = 0; cm->current_frame_coding_index = 0;
#endif // CONFIG_RATE_CTRL
} }
static INLINE void update_frame_indexes(VP9_COMMON *cm, int show_frame) { static INLINE void update_frame_indexes(VP9_COMMON *cm, int show_frame) {
@ -287,9 +281,7 @@ static INLINE void update_frame_indexes(VP9_COMMON *cm, int show_frame) {
// update not a real frame // update not a real frame
++cm->current_video_frame; ++cm->current_video_frame;
} }
#if CONFIG_RATE_CTRL
++cm->current_frame_coding_index; ++cm->current_frame_coding_index;
#endif // CONFIG_RATE_CTRL
} }
typedef struct { typedef struct {

View file

@ -22,7 +22,7 @@ SECTION .text
; int dst_stride, ; int dst_stride,
; int src_weight ; int src_weight
;) ;)
global sym(vp9_filter_by_weight16x16_sse2) PRIVATE globalsym(vp9_filter_by_weight16x16_sse2)
sym(vp9_filter_by_weight16x16_sse2): sym(vp9_filter_by_weight16x16_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -100,7 +100,7 @@ sym(vp9_filter_by_weight16x16_sse2):
; int dst_stride, ; int dst_stride,
; int src_weight ; int src_weight
;) ;)
global sym(vp9_filter_by_weight8x8_sse2) PRIVATE globalsym(vp9_filter_by_weight8x8_sse2)
sym(vp9_filter_by_weight8x8_sse2): sym(vp9_filter_by_weight8x8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -168,7 +168,7 @@ sym(vp9_filter_by_weight8x8_sse2):
; unsigned int *variance, 4 ; unsigned int *variance, 4
; unsigned int *sad, 5 ; unsigned int *sad, 5
;) ;)
global sym(vp9_variance_and_sad_16x16_sse2) PRIVATE globalsym(vp9_variance_and_sad_16x16_sse2)
sym(vp9_variance_and_sad_16x16_sse2): sym(vp9_variance_and_sad_16x16_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -153,6 +153,11 @@ static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) {
} }
static void vp9_dec_free_mi(VP9_COMMON *cm) { static void vp9_dec_free_mi(VP9_COMMON *cm) {
#if CONFIG_VP9_POSTPROC
// MFQE allocates an additional mip and swaps it with cm->mip.
vpx_free(cm->postproc_state.prev_mip);
cm->postproc_state.prev_mip = NULL;
#endif
vpx_free(cm->mip); vpx_free(cm->mip);
cm->mip = NULL; cm->mip = NULL;
vpx_free(cm->mi_grid_base); vpx_free(cm->mi_grid_base);

View file

@ -3766,9 +3766,6 @@ static int wiener_var_segment(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
int mi_col, int orig_rdmult) { int mi_col, int orig_rdmult) {
const int gf_group_index = cpi->twopass.gf_group.index; const int gf_group_index = cpi->twopass.gf_group.index;
TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index];
TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
int tpl_stride = tpl_frame->stride;
int64_t intra_cost = 0; int64_t intra_cost = 0;
int64_t mc_dep_cost = 0; int64_t mc_dep_cost = 0;
int mi_wide = num_8x8_blocks_wide_lookup[bsize]; int mi_wide = num_8x8_blocks_wide_lookup[bsize];
@ -3779,11 +3776,18 @@ static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
int count = 0; int count = 0;
double r0, rk, beta; double r0, rk, beta;
if (tpl_frame->is_valid == 0) return orig_rdmult; TplDepFrame *tpl_frame;
TplDepStats *tpl_stats;
if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult; int tpl_stride;
if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult; if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult;
tpl_frame = &cpi->tpl_stats[gf_group_index];
if (tpl_frame->is_valid == 0) return orig_rdmult;
tpl_stats = tpl_frame->tpl_stats_ptr;
tpl_stride = tpl_frame->stride;
if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult;
for (row = mi_row; row < mi_row + mi_high; ++row) { for (row = mi_row; row < mi_row + mi_high; ++row) {
for (col = mi_col; col < mi_col + mi_wide; ++col) { for (col = mi_col; col < mi_col + mi_wide; ++col) {
@ -5086,8 +5090,8 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
(void)*tp_orig; (void)*tp_orig;
// Avoid checking for rectangular partitions for speed >= 6. // Avoid checking for rectangular partitions for speed >= 5.
if (cpi->oxcf.speed >= 6) do_rect = 0; if (cpi->oxcf.speed >= 5) do_rect = 0;
assert(num_8x8_blocks_wide_lookup[bsize] == assert(num_8x8_blocks_wide_lookup[bsize] ==
num_8x8_blocks_high_lookup[bsize]); num_8x8_blocks_high_lookup[bsize]);

View file

@ -1024,6 +1024,8 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
#if CONFIG_RATE_CTRL #if CONFIG_RATE_CTRL
free_partition_info(cpi); free_partition_info(cpi);
free_motion_vector_info(cpi); free_motion_vector_info(cpi);
free_fp_motion_vector_info(cpi);
free_tpl_stats_info(cpi);
#endif #endif
vp9_free_ref_frame_buffers(cm->buffer_pool); vp9_free_ref_frame_buffers(cm->buffer_pool);
@ -1523,8 +1525,29 @@ static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height); vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
} }
static void set_rc_buffer_sizes(RATE_CONTROL *rc, void vp9_check_reset_rc_flag(VP9_COMP *cpi) {
const VP9EncoderConfig *oxcf) { RATE_CONTROL *rc = &cpi->rc;
if (cpi->common.current_video_frame >
(unsigned int)cpi->svc.number_spatial_layers) {
if (cpi->use_svc) {
vp9_svc_check_reset_layer_rc_flag(cpi);
} else {
if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
rc->rc_1_frame = 0;
rc->rc_2_frame = 0;
rc->bits_off_target = rc->optimal_buffer_level;
rc->buffer_level = rc->optimal_buffer_level;
}
}
}
}
void vp9_set_rc_buffer_sizes(VP9_COMP *cpi) {
RATE_CONTROL *rc = &cpi->rc;
const VP9EncoderConfig *oxcf = &cpi->oxcf;
const int64_t bandwidth = oxcf->target_bandwidth; const int64_t bandwidth = oxcf->target_bandwidth;
const int64_t starting = oxcf->starting_buffer_level_ms; const int64_t starting = oxcf->starting_buffer_level_ms;
const int64_t optimal = oxcf->optimal_buffer_level_ms; const int64_t optimal = oxcf->optimal_buffer_level_ms;
@ -1535,6 +1558,11 @@ static void set_rc_buffer_sizes(RATE_CONTROL *rc,
(optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000; (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
rc->maximum_buffer_size = rc->maximum_buffer_size =
(maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000; (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
// Under a configuration change, where maximum_buffer_size may change,
// keep buffer level clipped to the maximum allowed buffer size.
rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
} }
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
@ -1991,12 +2019,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
} }
cpi->encode_breakout = cpi->oxcf.encode_breakout; cpi->encode_breakout = cpi->oxcf.encode_breakout;
set_rc_buffer_sizes(rc, &cpi->oxcf); vp9_set_rc_buffer_sizes(cpi);
// Under a configuration change, where maximum_buffer_size may change,
// keep buffer level clipped to the maximum allowed buffer size.
rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
// Set up frame rate and related parameters rate control values. // Set up frame rate and related parameters rate control values.
vp9_new_framerate(cpi, cpi->framerate); vp9_new_framerate(cpi, cpi->framerate);
@ -2057,23 +2080,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
(int)cpi->oxcf.target_bandwidth); (int)cpi->oxcf.target_bandwidth);
} }
// Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the vp9_check_reset_rc_flag(cpi);
// configuration change has a large change in avg_frame_bandwidth.
// For SVC check for resetting based on spatial layer average bandwidth.
// Also reset buffer level to optimal level.
if (cm->current_video_frame > (unsigned int)cpi->svc.number_spatial_layers) {
if (cpi->use_svc) {
vp9_svc_check_reset_layer_rc_flag(cpi);
} else {
if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
rc->rc_1_frame = 0;
rc->rc_2_frame = 0;
rc->bits_off_target = rc->optimal_buffer_level;
rc->buffer_level = rc->optimal_buffer_level;
}
}
}
cpi->alt_ref_source = NULL; cpi->alt_ref_source = NULL;
rc->is_src_frame_alt_ref = 0; rc->is_src_frame_alt_ref = 0;
@ -2457,6 +2464,8 @@ VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
vp9_extrc_init(&cpi->ext_ratectrl);
#if !CONFIG_REALTIME_ONLY #if !CONFIG_REALTIME_ONLY
if (oxcf->pass == 1) { if (oxcf->pass == 1) {
vp9_init_first_pass(cpi); vp9_init_first_pass(cpi);
@ -2656,6 +2665,8 @@ VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
encode_command_init(&cpi->encode_command); encode_command_init(&cpi->encode_command);
partition_info_init(cpi); partition_info_init(cpi);
motion_vector_info_init(cpi); motion_vector_info_init(cpi);
fp_motion_vector_info_init(cpi);
tpl_stats_info_init(cpi);
#endif #endif
return cpi; return cpi;
@ -2827,6 +2838,8 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
} }
#endif #endif
vp9_extrc_delete(&cpi->ext_ratectrl);
vp9_remove_common(cm); vp9_remove_common(cm);
vp9_free_ref_frame_buffers(cm->buffer_pool); vp9_free_ref_frame_buffers(cm->buffer_pool);
#if CONFIG_VP9_POSTPROC #if CONFIG_VP9_POSTPROC
@ -3309,6 +3322,13 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
return; return;
} }
if (cpi->loopfilter_ctrl == NO_LOOPFILTER ||
(!is_reference_frame && cpi->loopfilter_ctrl == LOOPFILTER_REFERENCE)) {
lf->filter_level = 0;
vpx_extend_frame_inner_borders(cm->frame_to_show);
return;
}
if (xd->lossless) { if (xd->lossless) {
lf->filter_level = 0; lf->filter_level = 0;
lf->last_filt_level = 0; lf->last_filt_level = 0;
@ -3742,8 +3762,11 @@ static void set_frame_size(VP9_COMP *cpi) {
} }
#endif // !CONFIG_REALTIME_ONLY #endif // !CONFIG_REALTIME_ONLY
if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && !cpi->use_svc && if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) { oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) {
// For SVC scaled width/height will have been set (svc->resize_set=1)
// in get_svc_params based on the layer width/height.
if (!cpi->use_svc || !cpi->svc.resize_set) {
oxcf->scaled_frame_width = oxcf->scaled_frame_width =
(oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den; (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den;
oxcf->scaled_frame_height = oxcf->scaled_frame_height =
@ -3751,6 +3774,7 @@ static void set_frame_size(VP9_COMP *cpi) {
// There has been a change in frame size. // There has been a change in frame size.
vp9_set_size_literal(cpi, oxcf->scaled_frame_width, vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
oxcf->scaled_frame_height); oxcf->scaled_frame_height);
}
// TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
set_mv_search_params(cpi); set_mv_search_params(cpi);
@ -4035,8 +4059,11 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
// when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
// avoid this frame-level upsampling (for non intra_only frames). // avoid this frame-level upsampling (for non intra_only frames).
// For SVC single_layer mode, dynamic resize is allowed and we need to
// scale references for this case.
if (frame_is_intra_only(cm) == 0 && if (frame_is_intra_only(cm) == 0 &&
!(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) { ((svc->single_layer_svc && cpi->oxcf.resize_mode == RESIZE_DYNAMIC) ||
!(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref))) {
vp9_scale_references(cpi); vp9_scale_references(cpi);
} }
@ -4181,6 +4208,27 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
return 1; return 1;
} }
static int get_ref_frame_flags(const VP9_COMP *cpi) {
const int *const map = cpi->common.ref_frame_map;
const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
if (gold_is_last) flags &= ~VP9_GOLD_FLAG;
if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
(cpi->svc.number_temporal_layers == 1 &&
cpi->svc.number_spatial_layers == 1))
flags &= ~VP9_GOLD_FLAG;
if (alt_is_last) flags &= ~VP9_ALT_FLAG;
if (gold_is_alt) flags &= ~VP9_ALT_FLAG;
return flags;
}
#if !CONFIG_REALTIME_ONLY #if !CONFIG_REALTIME_ONLY
#define MAX_QSTEP_ADJ 4 #define MAX_QSTEP_ADJ 4
static int get_qstep_adj(int rate_excess, int rate_limit) { static int get_qstep_adj(int rate_excess, int rate_limit) {
@ -4189,8 +4237,149 @@ static int get_qstep_adj(int rate_excess, int rate_limit) {
return VPXMIN(qstep, MAX_QSTEP_ADJ); return VPXMIN(qstep, MAX_QSTEP_ADJ);
} }
static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, #if CONFIG_RATE_CTRL
uint8_t *dest) { static void init_rq_history(RATE_QINDEX_HISTORY *rq_history) {
rq_history->recode_count = 0;
rq_history->q_index_high = 255;
rq_history->q_index_low = 0;
}
static void update_rq_history(RATE_QINDEX_HISTORY *rq_history, int target_bits,
int actual_bits, int q_index) {
rq_history->q_index_history[rq_history->recode_count] = q_index;
rq_history->rate_history[rq_history->recode_count] = actual_bits;
if (actual_bits <= target_bits) {
rq_history->q_index_high = q_index;
}
if (actual_bits >= target_bits) {
rq_history->q_index_low = q_index;
}
rq_history->recode_count += 1;
}
static int guess_q_index_from_model(const RATE_QSTEP_MODEL *rq_model,
int target_bits) {
// The model predicts bits as follows.
// target_bits = bias - ratio * log2(q_step)
// Given the target_bits, we compute the q_step as follows.
double q_step;
assert(rq_model->ratio > 0);
q_step = pow(2.0, (rq_model->bias - target_bits) / rq_model->ratio);
// TODO(angiebird): Make this function support highbitdepth.
return vp9_convert_q_to_qindex(q_step, VPX_BITS_8);
}
static int guess_q_index_linear(int prev_q_index, int target_bits,
int actual_bits, int gap) {
int q_index = prev_q_index;
if (actual_bits < target_bits) {
q_index -= gap;
q_index = VPXMAX(q_index, 0);
} else {
q_index += gap;
q_index = VPXMIN(q_index, 255);
}
return q_index;
}
static double get_bits_percent_diff(int target_bits, int actual_bits) {
double diff;
target_bits = VPXMAX(target_bits, 1);
diff = abs(target_bits - actual_bits) * 1. / target_bits;
return diff * 100;
}
static int rq_model_predict_q_index(const RATE_QSTEP_MODEL *rq_model,
const RATE_QINDEX_HISTORY *rq_history,
int target_bits) {
int q_index = 128;
if (rq_history->recode_count > 0) {
const int actual_bits =
rq_history->rate_history[rq_history->recode_count - 1];
const int prev_q_index =
rq_history->q_index_history[rq_history->recode_count - 1];
const double percent_diff = get_bits_percent_diff(target_bits, actual_bits);
if (percent_diff > 50) {
// Binary search.
// When the actual_bits and target_bits are far apart, binary search
// q_index is faster.
q_index = (rq_history->q_index_low + rq_history->q_index_high) / 2;
} else {
if (rq_model->ready) {
q_index = guess_q_index_from_model(rq_model, target_bits);
} else {
// TODO(angiebird): Find a better way to set the gap.
q_index =
guess_q_index_linear(prev_q_index, target_bits, actual_bits, 20);
}
}
} else {
if (rq_model->ready) {
q_index = guess_q_index_from_model(rq_model, target_bits);
}
}
assert(rq_history->q_index_low <= rq_history->q_index_high);
if (q_index <= rq_history->q_index_low) {
q_index = rq_history->q_index_low + 1;
}
if (q_index >= rq_history->q_index_high) {
q_index = rq_history->q_index_high - 1;
}
return q_index;
}
static void rq_model_update(const RATE_QINDEX_HISTORY *rq_history,
int target_bits, RATE_QSTEP_MODEL *rq_model) {
const int recode_count = rq_history->recode_count;
const double delta = 0.00001;
if (recode_count >= 2) {
const int q_index1 = rq_history->q_index_history[recode_count - 2];
const int q_index2 = rq_history->q_index_history[recode_count - 1];
const int r1 = rq_history->rate_history[recode_count - 2];
const int r2 = rq_history->rate_history[recode_count - 1];
int valid = 0;
// lower q_index should yield higher bit rate
if (q_index1 < q_index2) {
valid = r1 > r2;
} else if (q_index1 > q_index2) {
valid = r1 < r2;
}
// Only update the model when the q_index and rate behave normally.
if (valid) {
// Fit the ratio and bias of rq_model based on last two recode histories.
const double s1 = vp9_convert_qindex_to_q(q_index1, VPX_BITS_8);
const double s2 = vp9_convert_qindex_to_q(q_index2, VPX_BITS_8);
if (fabs(log2(s1) - log2(s2)) > delta) {
rq_model->ratio = (r2 - r1) / (log2(s1) - log2(s2));
rq_model->bias = r1 + (rq_model->ratio) * log2(s1);
if (rq_model->ratio > delta && rq_model->bias > delta) {
rq_model->ready = 1;
}
}
}
} else if (recode_count == 1) {
if (rq_model->ready) {
// Update the ratio only when the initial model exists and we only have
// one recode history.
const int prev_q = rq_history->q_index_history[recode_count - 1];
const double prev_q_step = vp9_convert_qindex_to_q(prev_q, VPX_BITS_8);
if (fabs(log2(prev_q_step)) > delta) {
const int actual_bits = rq_history->rate_history[recode_count - 1];
rq_model->ratio =
rq_model->ratio + (target_bits - actual_bits) / log2(prev_q_step);
}
}
}
}
#endif // CONFIG_RATE_CTRL
static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest
#if CONFIG_RATE_CTRL
,
RATE_QINDEX_HISTORY *rq_history
#endif // CONFIG_RATE_CTRL
) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf; const VP9EncoderConfig *const oxcf = &cpi->oxcf;
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc; RATE_CONTROL *const rc = &cpi->rc;
@ -4208,6 +4397,14 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
int qrange_adj = 1; int qrange_adj = 1;
#endif #endif
#if CONFIG_RATE_CTRL
const FRAME_UPDATE_TYPE update_type =
cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];
const ENCODE_FRAME_TYPE frame_type = get_encode_frame_type(update_type);
RATE_QSTEP_MODEL *rq_model = &cpi->rq_model[frame_type];
init_rq_history(rq_history);
#endif // CONFIG_RATE_CTRL
if (cm->show_existing_frame) { if (cm->show_existing_frame) {
rc->this_frame_target = 0; rc->this_frame_target = 0;
if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi); if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
@ -4254,6 +4451,11 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
loop_at_this_size = 0; loop_at_this_size = 0;
} }
#if CONFIG_RATE_CTRL
if (cpi->encode_command.use_external_target_frame_bits) {
q = rq_model_predict_q_index(rq_model, rq_history, rc->this_frame_target);
}
#endif // CONFIG_RATE_CTRL
// Decide frame size bounds first time through. // Decide frame size bounds first time through.
if (loop_count == 0) { if (loop_count == 0) {
vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target, vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
@ -4300,6 +4502,19 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
q = cpi->encode_command.external_quantize_index; q = cpi->encode_command.external_quantize_index;
} }
#endif #endif
if (cpi->ext_ratectrl.ready) {
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
vpx_rc_encodeframe_decision_t encode_frame_decision;
FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index];
const int ref_frame_flags = get_ref_frame_flags(cpi);
RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];
get_ref_frame_bufs(cpi, ref_frame_bufs);
vp9_extrc_get_encodeframe_decision(
&cpi->ext_ratectrl, cm->current_video_frame,
cm->current_frame_coding_index, update_type, ref_frame_bufs,
ref_frame_flags, &encode_frame_decision);
q = encode_frame_decision.q_index;
}
vp9_set_quantizer(cpi, q); vp9_set_quantizer(cpi, q);
@ -4339,6 +4554,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
} }
if (cpi->ext_ratectrl.ready) {
break;
}
#if CONFIG_RATE_CTRL #if CONFIG_RATE_CTRL
// This part needs to be after save_coding_context() because // This part needs to be after save_coding_context() because
// restore_coding_context will be called in the end of this function. // restore_coding_context will be called in the end of this function.
@ -4347,7 +4565,28 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
if (cpi->encode_command.use_external_quantize_index) { if (cpi->encode_command.use_external_quantize_index) {
break; break;
} }
#endif
if (cpi->encode_command.use_external_target_frame_bits) {
const double percent_diff = get_bits_percent_diff(
rc->this_frame_target, rc->projected_frame_size);
update_rq_history(rq_history, rc->this_frame_target,
rc->projected_frame_size, q);
loop_count += 1;
rq_model_update(rq_history, rc->this_frame_target, rq_model);
// Check if we hit the target bitrate.
if (percent_diff <= cpi->encode_command.target_frame_bits_error_percent ||
rq_history->recode_count >= RATE_CTRL_MAX_RECODE_NUM ||
rq_history->q_index_low >= rq_history->q_index_high) {
break;
}
loop = 1;
restore_coding_context(cpi);
continue;
}
#endif // CONFIG_RATE_CTRL
if (oxcf->rc_mode == VPX_Q) { if (oxcf->rc_mode == VPX_Q) {
loop = 0; loop = 0;
@ -4562,27 +4801,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
} }
#endif // !CONFIG_REALTIME_ONLY #endif // !CONFIG_REALTIME_ONLY
static int get_ref_frame_flags(const VP9_COMP *cpi) {
const int *const map = cpi->common.ref_frame_map;
const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
if (gold_is_last) flags &= ~VP9_GOLD_FLAG;
if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
(cpi->svc.number_temporal_layers == 1 &&
cpi->svc.number_spatial_layers == 1))
flags &= ~VP9_GOLD_FLAG;
if (alt_is_last) flags &= ~VP9_ALT_FLAG;
if (gold_is_alt) flags &= ~VP9_ALT_FLAG;
return flags;
}
static void set_ext_overrides(VP9_COMP *cpi) { static void set_ext_overrides(VP9_COMP *cpi) {
// Overrides the defaults with the externally supplied values with // Overrides the defaults with the externally supplied values with
// vp9_update_reference() and vp9_update_entropy() calls // vp9_update_reference() and vp9_update_entropy() calls
@ -4887,9 +5105,7 @@ static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group; const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
ref_buffer->frame_index = ref_buffer->frame_index =
cm->current_video_frame + gf_group->arf_src_offset[gf_group->index]; cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
#if CONFIG_RATE_CTRL
ref_buffer->frame_coding_index = cm->current_frame_coding_index; ref_buffer->frame_coding_index = cm->current_frame_coding_index;
#endif // CONFIG_RATE_CTRL
} }
} }
@ -5092,6 +5308,7 @@ static void update_encode_frame_result(
#if CONFIG_RATE_CTRL #if CONFIG_RATE_CTRL
const PARTITION_INFO *partition_info, const PARTITION_INFO *partition_info,
const MOTION_VECTOR_INFO *motion_vector_info, const MOTION_VECTOR_INFO *motion_vector_info,
const TplDepStats *tpl_stats_info,
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
ENCODE_FRAME_RESULT *encode_frame_result); ENCODE_FRAME_RESULT *encode_frame_result);
#endif // !CONFIG_REALTIME_ONLY #endif // !CONFIG_REALTIME_ONLY
@ -5197,8 +5414,12 @@ static void encode_frame_to_data_rate(
if (!encode_without_recode_loop(cpi, size, dest)) return; if (!encode_without_recode_loop(cpi, size, dest)) return;
} else { } else {
#if !CONFIG_REALTIME_ONLY #if !CONFIG_REALTIME_ONLY
#if CONFIG_RATE_CTRL
encode_with_recode_loop(cpi, size, dest, &encode_frame_result->rq_history);
#else // CONFIG_RATE_CTRL
encode_with_recode_loop(cpi, size, dest); encode_with_recode_loop(cpi, size, dest);
#endif #endif // CONFIG_RATE_CTRL
#endif // !CONFIG_REALTIME_ONLY
} }
// TODO(jingning): When using show existing frame mode, we assume that the // TODO(jingning): When using show existing frame mode, we assume that the
@ -5263,6 +5484,13 @@ static void encode_frame_to_data_rate(
// build the bitstream // build the bitstream
vp9_pack_bitstream(cpi, dest, size); vp9_pack_bitstream(cpi, dest, size);
{
const RefCntBuffer *coded_frame_buf =
get_ref_cnt_buffer(cm, cm->new_fb_idx);
vp9_extrc_update_encodeframe_result(
&cpi->ext_ratectrl, (*size) << 3, cpi->Source, &coded_frame_buf->buf,
cm->bit_depth, cpi->oxcf.input_bit_depth);
}
#if CONFIG_REALTIME_ONLY #if CONFIG_REALTIME_ONLY
(void)encode_frame_result; (void)encode_frame_result;
assert(encode_frame_result == NULL); assert(encode_frame_result == NULL);
@ -5293,9 +5521,9 @@ static void encode_frame_to_data_rate(
ref_frame_flags, ref_frame_flags,
cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index], cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi), cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi),
cpi->oxcf.input_bit_depth, cm->bit_depth, cpi->td.counts, cm->bit_depth, cpi->oxcf.input_bit_depth, cpi->td.counts,
#if CONFIG_RATE_CTRL #if CONFIG_RATE_CTRL
cpi->partition_info, cpi->motion_vector_info, cpi->partition_info, cpi->motion_vector_info, cpi->tpl_stats_info,
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
encode_frame_result); encode_frame_result);
} }
@ -5450,6 +5678,11 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
unsigned int *frame_flags, unsigned int *frame_flags,
ENCODE_FRAME_RESULT *encode_frame_result) { ENCODE_FRAME_RESULT *encode_frame_result) {
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
if (cpi->common.current_frame_coding_index == 0) {
vp9_extrc_send_firstpass_stats(&cpi->ext_ratectrl,
&cpi->twopass.first_pass_info);
}
#if CONFIG_MISMATCH_DEBUG #if CONFIG_MISMATCH_DEBUG
mismatch_move_frame_idx_w(); mismatch_move_frame_idx_w();
#endif #endif
@ -7141,6 +7374,48 @@ static void free_tpl_buffer(VP9_COMP *cpi) {
} }
} }
#if CONFIG_RATE_CTRL
static void accumulate_frame_tpl_stats(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
int show_frame_count = 0;
int frame_idx;
// Accumulate tpl stats for each frame in the current group of picture.
for (frame_idx = 1; frame_idx < gf_group->gf_group_size; ++frame_idx) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
const int tpl_stride = tpl_frame->stride;
int64_t intra_cost_base = 0;
int64_t inter_cost_base = 0;
int64_t mc_dep_cost_base = 0;
int64_t mc_ref_cost_base = 0;
int64_t mc_flow_base = 0;
int row, col;
if (!tpl_frame->is_valid) continue;
for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
for (col = 0; col < cm->mi_cols; ++col) {
TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
intra_cost_base += this_stats->intra_cost;
inter_cost_base += this_stats->inter_cost;
mc_dep_cost_base += this_stats->mc_dep_cost;
mc_ref_cost_base += this_stats->mc_ref_cost;
mc_flow_base += this_stats->mc_flow;
}
}
cpi->tpl_stats_info[show_frame_count].intra_cost = intra_cost_base;
cpi->tpl_stats_info[show_frame_count].inter_cost = inter_cost_base;
cpi->tpl_stats_info[show_frame_count].mc_dep_cost = mc_dep_cost_base;
cpi->tpl_stats_info[show_frame_count].mc_ref_cost = mc_ref_cost_base;
cpi->tpl_stats_info[show_frame_count].mc_flow = mc_flow_base;
++show_frame_count;
}
}
#endif // CONFIG_RATE_CTRL
static void setup_tpl_stats(VP9_COMP *cpi) { static void setup_tpl_stats(VP9_COMP *cpi) {
GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE]; GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
const GF_GROUP *gf_group = &cpi->twopass.gf_group; const GF_GROUP *gf_group = &cpi->twopass.gf_group;
@ -7163,6 +7438,34 @@ static void setup_tpl_stats(VP9_COMP *cpi) {
dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize); dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
#endif // DUMP_TPL_STATS #endif // DUMP_TPL_STATS
#endif // CONFIG_NON_GREEDY_MV #endif // CONFIG_NON_GREEDY_MV
#if CONFIG_RATE_CTRL
accumulate_frame_tpl_stats(cpi);
#endif // CONFIG_RATE_CTRL
}
void vp9_get_ref_frame_info(FRAME_UPDATE_TYPE update_type, int ref_frame_flags,
RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES],
int *ref_frame_coding_indexes,
int *ref_frame_valid_list) {
if (update_type != KF_UPDATE) {
const VP9_REFFRAME inter_ref_flags[MAX_INTER_REF_FRAMES] = { VP9_LAST_FLAG,
VP9_GOLD_FLAG,
VP9_ALT_FLAG };
int i;
for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
assert(ref_frame_bufs[i] != NULL);
ref_frame_coding_indexes[i] = ref_frame_bufs[i]->frame_coding_index;
ref_frame_valid_list[i] = (ref_frame_flags & inter_ref_flags[i]) != 0;
}
} else {
// No reference frame is available when this is a key frame.
int i;
for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
ref_frame_coding_indexes[i] = -1;
ref_frame_valid_list[i] = 0;
}
}
} }
#if !CONFIG_REALTIME_ONLY #if !CONFIG_REALTIME_ONLY
@ -7312,6 +7615,7 @@ static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,
} }
} }
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
static void update_encode_frame_result( static void update_encode_frame_result(
int ref_frame_flags, FRAME_UPDATE_TYPE update_type, int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf, const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
@ -7320,12 +7624,13 @@ static void update_encode_frame_result(
#if CONFIG_RATE_CTRL #if CONFIG_RATE_CTRL
const PARTITION_INFO *partition_info, const PARTITION_INFO *partition_info,
const MOTION_VECTOR_INFO *motion_vector_info, const MOTION_VECTOR_INFO *motion_vector_info,
const TplDepStats *tpl_stats_info,
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
ENCODE_FRAME_RESULT *encode_frame_result) { ENCODE_FRAME_RESULT *encode_frame_result) {
#if CONFIG_RATE_CTRL #if CONFIG_RATE_CTRL
PSNR_STATS psnr; PSNR_STATS psnr;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
vpx_calc_highbd_psnr(source_frame, coded_frame_buf->buf, &psnr, bit_depth, vpx_calc_highbd_psnr(source_frame, &coded_frame_buf->buf, &psnr, bit_depth,
input_bit_depth); input_bit_depth);
#else // CONFIG_VP9_HIGHBITDEPTH #else // CONFIG_VP9_HIGHBITDEPTH
(void)bit_depth; (void)bit_depth;
@ -7334,31 +7639,16 @@ static void update_encode_frame_result(
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index; encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;
if (update_type != KF_UPDATE) { vp9_get_ref_frame_info(update_type, ref_frame_flags, ref_frame_bufs,
const VP9_REFFRAME inter_ref_flags[MAX_INTER_REF_FRAMES] = { VP9_LAST_FLAG, encode_frame_result->ref_frame_coding_indexes,
VP9_GOLD_FLAG, encode_frame_result->ref_frame_valid_list);
VP9_ALT_FLAG };
int i;
for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
assert(ref_frame_bufs[i] != NULL);
encode_frame_result->ref_frame_coding_indexes[i] =
ref_frame_bufs[i]->frame_coding_index;
encode_frame_result->ref_frame_valid_list[i] =
(ref_frame_flags & inter_ref_flags[i]) != 0;
}
} else {
// No reference frame is available when this is a key frame.
int i;
for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
encode_frame_result->ref_frame_coding_indexes[i] = -1;
encode_frame_result->ref_frame_valid_list[i] = 0;
}
}
encode_frame_result->psnr = psnr.psnr[0]; encode_frame_result->psnr = psnr.psnr[0];
encode_frame_result->sse = psnr.sse[0]; encode_frame_result->sse = psnr.sse[0];
copy_frame_counts(counts, &encode_frame_result->frame_counts); copy_frame_counts(counts, &encode_frame_result->frame_counts);
encode_frame_result->partition_info = partition_info; encode_frame_result->partition_info = partition_info;
encode_frame_result->motion_vector_info = motion_vector_info; encode_frame_result->motion_vector_info = motion_vector_info;
encode_frame_result->tpl_stats_info = tpl_stats_info;
if (encode_frame_result->coded_frame.allocated) { if (encode_frame_result->coded_frame.allocated) {
yv12_buffer_to_image_buffer(&coded_frame_buf->buf, yv12_buffer_to_image_buffer(&coded_frame_buf->buf,
&encode_frame_result->coded_frame); &encode_frame_result->coded_frame);
@ -7384,6 +7674,7 @@ void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
encode_frame_result->frame_coding_index = -1; encode_frame_result->frame_coding_index = -1;
vp9_zero(encode_frame_result->coded_frame); vp9_zero(encode_frame_result->coded_frame);
encode_frame_result->coded_frame.allocated = 0; encode_frame_result->coded_frame.allocated = 0;
init_rq_history(&encode_frame_result->rq_history);
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
} }

View file

@ -15,6 +15,7 @@
#include "./vpx_config.h" #include "./vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h" #include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vpx_ext_ratectrl.h"
#include "vpx/vp8cx.h" #include "vpx/vp8cx.h"
#if CONFIG_INTERNAL_STATS #if CONFIG_INTERNAL_STATS
#include "vpx_dsp/ssim.h" #include "vpx_dsp/ssim.h"
@ -38,6 +39,7 @@
#include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_context_tree.h"
#include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_ethread.h"
#include "vp9/encoder/vp9_ext_ratectrl.h"
#include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_job_queue.h" #include "vp9/encoder/vp9_job_queue.h"
#include "vp9/encoder/vp9_lookahead.h" #include "vp9/encoder/vp9_lookahead.h"
@ -147,6 +149,12 @@ typedef enum {
kVeryHighSad = 6, kVeryHighSad = 6,
} CONTENT_STATE_SB; } CONTENT_STATE_SB;
typedef enum {
LOOPFILTER_ALL = 0,
LOOPFILTER_REFERENCE = 1, // Disable loopfilter on non reference frames.
NO_LOOPFILTER = 2, // Disable loopfilter on all frames.
} LOOPFILTER_CONTROL;
typedef struct VP9EncoderConfig { typedef struct VP9EncoderConfig {
BITSTREAM_PROFILE profile; BITSTREAM_PROFILE profile;
vpx_bit_depth_t bit_depth; // Codec bit-depth. vpx_bit_depth_t bit_depth; // Codec bit-depth.
@ -532,24 +540,83 @@ typedef struct MOTION_VECTOR_INFO {
int_mv mv[2]; int_mv mv[2];
} MOTION_VECTOR_INFO; } MOTION_VECTOR_INFO;
typedef struct GOP_COMMAND {
int use; // use this command to set gop or not. If not, use vp9's decision.
int show_frame_count;
int use_alt_ref;
} GOP_COMMAND;
static INLINE void gop_command_on(GOP_COMMAND *gop_command,
int show_frame_count, int use_alt_ref) {
gop_command->use = 1;
gop_command->show_frame_count = show_frame_count;
gop_command->use_alt_ref = use_alt_ref;
}
static INLINE void gop_command_off(GOP_COMMAND *gop_command) {
gop_command->use = 0;
gop_command->show_frame_count = 0;
gop_command->use_alt_ref = 0;
}
static INLINE int gop_command_coding_frame_count(
const GOP_COMMAND *gop_command) {
if (gop_command->use == 0) {
assert(0);
return -1;
}
return gop_command->show_frame_count + gop_command->use_alt_ref;
}
// TODO(angiebird): See if we can merge this one with FrameType in
// simple_encode.h
typedef enum ENCODE_FRAME_TYPE {
ENCODE_FRAME_TYPE_KEY,
ENCODE_FRAME_TYPE_INTER,
ENCODE_FRAME_TYPE_ALTREF,
ENCODE_FRAME_TYPE_OVERLAY,
ENCODE_FRAME_TYPE_GOLDEN,
ENCODE_FRAME_TYPES,
} ENCODE_FRAME_TYPE;
// TODO(angiebird): Merge this function with get_frame_type_from_update_type()
static INLINE ENCODE_FRAME_TYPE
get_encode_frame_type(FRAME_UPDATE_TYPE update_type) {
switch (update_type) {
case KF_UPDATE: return ENCODE_FRAME_TYPE_KEY;
case ARF_UPDATE: return ENCODE_FRAME_TYPE_ALTREF;
case GF_UPDATE: return ENCODE_FRAME_TYPE_GOLDEN;
case OVERLAY_UPDATE: return ENCODE_FRAME_TYPE_OVERLAY;
case LF_UPDATE: return ENCODE_FRAME_TYPE_INTER;
default:
fprintf(stderr, "Unsupported update_type %d\n", update_type);
abort();
return ENCODE_FRAME_TYPE_INTER;
}
}
typedef struct RATE_QSTEP_MODEL {
// The rq model predicts the bit usage as follows.
// rate = bias - ratio * log2(q_step)
int ready;
double bias;
double ratio;
} RATE_QSTEP_MODEL;
typedef struct ENCODE_COMMAND { typedef struct ENCODE_COMMAND {
int use_external_quantize_index; int use_external_quantize_index;
int external_quantize_index; int external_quantize_index;
// A list of binary flags set from the external controller.
// Each binary flag indicates whether the frame is an arf or not. int use_external_target_frame_bits;
const int *external_arf_indexes; int target_frame_bits;
double target_frame_bits_error_percent;
GOP_COMMAND gop_command;
} ENCODE_COMMAND; } ENCODE_COMMAND;
static INLINE void encode_command_init(ENCODE_COMMAND *encode_command) { static INLINE void encode_command_set_gop_command(
vp9_zero(*encode_command); ENCODE_COMMAND *encode_command, GOP_COMMAND gop_command) {
encode_command->use_external_quantize_index = 0; encode_command->gop_command = gop_command;
encode_command->external_quantize_index = -1;
encode_command->external_arf_indexes = NULL;
}
static INLINE void encode_command_set_external_arf_indexes(
ENCODE_COMMAND *encode_command, const int *external_arf_indexes) {
encode_command->external_arf_indexes = external_arf_indexes;
} }
static INLINE void encode_command_set_external_quantize_index( static INLINE void encode_command_set_external_quantize_index(
@ -564,9 +631,35 @@ static INLINE void encode_command_reset_external_quantize_index(
encode_command->external_quantize_index = -1; encode_command->external_quantize_index = -1;
} }
static INLINE void encode_command_set_target_frame_bits(
ENCODE_COMMAND *encode_command, int target_frame_bits,
double target_frame_bits_error_percent) {
encode_command->use_external_target_frame_bits = 1;
encode_command->target_frame_bits = target_frame_bits;
encode_command->target_frame_bits_error_percent =
target_frame_bits_error_percent;
}
static INLINE void encode_command_reset_target_frame_bits(
ENCODE_COMMAND *encode_command) {
encode_command->use_external_target_frame_bits = 0;
encode_command->target_frame_bits = -1;
encode_command->target_frame_bits_error_percent = 0;
}
static INLINE void encode_command_init(ENCODE_COMMAND *encode_command) {
vp9_zero(*encode_command);
encode_command_reset_external_quantize_index(encode_command);
encode_command_reset_target_frame_bits(encode_command);
gop_command_off(&encode_command->gop_command);
}
// Returns number of units in size of 4, if not multiple not a multiple of 4, // Returns number of units in size of 4, if not multiple not a multiple of 4,
// round it up. For example, size is 7, return 2. // round it up. For example, size is 7, return 2.
static INLINE int get_num_unit_4x4(int size) { return (size + 3) >> 2; } static INLINE int get_num_unit_4x4(int size) { return (size + 3) >> 2; }
// Returns number of units in size of 16, if not multiple not a multiple of 16,
// round it up. For example, size is 17, return 2.
static INLINE int get_num_unit_16x16(int size) { return (size + 15) >> 4; }
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
typedef struct VP9_COMP { typedef struct VP9_COMP {
@ -873,11 +966,18 @@ typedef struct VP9_COMP {
int multi_layer_arf; int multi_layer_arf;
vpx_roi_map_t roi; vpx_roi_map_t roi;
LOOPFILTER_CONTROL loopfilter_ctrl;
#if CONFIG_RATE_CTRL #if CONFIG_RATE_CTRL
ENCODE_COMMAND encode_command; ENCODE_COMMAND encode_command;
PARTITION_INFO *partition_info; PARTITION_INFO *partition_info;
MOTION_VECTOR_INFO *motion_vector_info; MOTION_VECTOR_INFO *motion_vector_info;
MOTION_VECTOR_INFO *fp_motion_vector_info;
TplDepStats *tpl_stats_info;
RATE_QSTEP_MODEL rq_model[ENCODE_FRAME_TYPES];
#endif #endif
EXT_RATECTRL ext_ratectrl;
} VP9_COMP; } VP9_COMP;
#if CONFIG_RATE_CTRL #if CONFIG_RATE_CTRL
@ -902,6 +1002,13 @@ static INLINE void free_partition_info(struct VP9_COMP *cpi) {
cpi->partition_info = NULL; cpi->partition_info = NULL;
} }
static INLINE void reset_mv_info(MOTION_VECTOR_INFO *mv_info) {
mv_info->ref_frame[0] = NONE;
mv_info->ref_frame[1] = NONE;
mv_info->mv[0].as_int = INVALID_MV;
mv_info->mv[1].as_int = INVALID_MV;
}
// Allocates memory for the motion vector information. // Allocates memory for the motion vector information.
// The unit size is each 4x4 block. // The unit size is each 4x4 block.
// Only called once in vp9_create_compressor(). // Only called once in vp9_create_compressor().
@ -923,6 +1030,53 @@ static INLINE void free_motion_vector_info(struct VP9_COMP *cpi) {
cpi->motion_vector_info = NULL; cpi->motion_vector_info = NULL;
} }
// Allocates memory for the tpl stats information.
// Only called once in vp9_create_compressor().
static INLINE void tpl_stats_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
CHECK_MEM_ERROR(
cm, cpi->tpl_stats_info,
(TplDepStats *)vpx_calloc(MAX_LAG_BUFFERS, sizeof(TplDepStats)));
memset(cpi->tpl_stats_info, 0, MAX_LAG_BUFFERS * sizeof(TplDepStats));
}
// Frees memory of the tpl stats information.
// Only called once in dealloc_compressor_data().
static INLINE void free_tpl_stats_info(struct VP9_COMP *cpi) {
vpx_free(cpi->tpl_stats_info);
cpi->tpl_stats_info = NULL;
}
// Allocates memory for the first pass motion vector information.
// The unit size is each 16x16 block.
// Only called once in vp9_create_compressor().
static INLINE void fp_motion_vector_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int unit_width = get_num_unit_16x16(cpi->frame_info.frame_width);
const int unit_height = get_num_unit_16x16(cpi->frame_info.frame_height);
CHECK_MEM_ERROR(cm, cpi->fp_motion_vector_info,
(MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height,
sizeof(MOTION_VECTOR_INFO)));
}
static INLINE void fp_motion_vector_info_reset(
int frame_width, int frame_height,
MOTION_VECTOR_INFO *fp_motion_vector_info) {
const int unit_width = get_num_unit_16x16(frame_width);
const int unit_height = get_num_unit_16x16(frame_height);
int i;
for (i = 0; i < unit_width * unit_height; ++i) {
reset_mv_info(fp_motion_vector_info + i);
}
}
// Frees memory of the first pass motion vector information.
// Only called once in dealloc_compressor_data().
static INLINE void free_fp_motion_vector_info(struct VP9_COMP *cpi) {
vpx_free(cpi->fp_motion_vector_info);
cpi->fp_motion_vector_info = NULL;
}
// This is the c-version counter part of ImageBuffer // This is the c-version counter part of ImageBuffer
typedef struct IMAGE_BUFFER { typedef struct IMAGE_BUFFER {
int allocated; int allocated;
@ -930,6 +1084,17 @@ typedef struct IMAGE_BUFFER {
int plane_height[3]; int plane_height[3];
uint8_t *plane_buffer[3]; uint8_t *plane_buffer[3];
} IMAGE_BUFFER; } IMAGE_BUFFER;
#define RATE_CTRL_MAX_RECODE_NUM 7
typedef struct RATE_QINDEX_HISTORY {
int recode_count;
int q_index_history[RATE_CTRL_MAX_RECODE_NUM];
int rate_history[RATE_CTRL_MAX_RECODE_NUM];
int q_index_high;
int q_index_low;
} RATE_QINDEX_HISTORY;
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
typedef struct ENCODE_FRAME_RESULT { typedef struct ENCODE_FRAME_RESULT {
@ -944,7 +1109,9 @@ typedef struct ENCODE_FRAME_RESULT {
FRAME_COUNTS frame_counts; FRAME_COUNTS frame_counts;
const PARTITION_INFO *partition_info; const PARTITION_INFO *partition_info;
const MOTION_VECTOR_INFO *motion_vector_info; const MOTION_VECTOR_INFO *motion_vector_info;
const TplDepStats *tpl_stats_info;
IMAGE_BUFFER coded_frame; IMAGE_BUFFER coded_frame;
RATE_QINDEX_HISTORY rq_history;
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
int quantize_index; int quantize_index;
} ENCODE_FRAME_RESULT; } ENCODE_FRAME_RESULT;
@ -1000,6 +1167,14 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
void vp9_set_svc(VP9_COMP *cpi, int use_svc); void vp9_set_svc(VP9_COMP *cpi, int use_svc);
// Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the
// configuration change has a large change in avg_frame_bandwidth.
// For SVC check for resetting based on spatial layer average bandwidth.
// Also reset buffer level to optimal level.
void vp9_check_reset_rc_flag(VP9_COMP *cpi);
void vp9_set_rc_buffer_sizes(VP9_COMP *cpi);
static INLINE int stack_pop(int *stack, int stack_size) { static INLINE int stack_pop(int *stack, int stack_size) {
int idx; int idx;
const int r = stack[0]; const int r = stack[0];
@ -1112,6 +1287,11 @@ void vp9_scale_references(VP9_COMP *cpi);
void vp9_update_reference_frames(VP9_COMP *cpi); void vp9_update_reference_frames(VP9_COMP *cpi);
void vp9_get_ref_frame_info(FRAME_UPDATE_TYPE update_type, int ref_frame_flags,
RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES],
int *ref_frame_coding_indexes,
int *ref_frame_valid_list);
void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv); void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv);
YV12_BUFFER_CONFIG *vp9_svc_twostage_scale( YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(

View file

@ -0,0 +1,150 @@
/*
* Copyright (c) 2020 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/encoder/vp9_ext_ratectrl.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/common/vp9_common.h"
#include "vpx_dsp/psnr.h"
void vp9_extrc_init(EXT_RATECTRL *ext_ratectrl) { vp9_zero(*ext_ratectrl); }
void vp9_extrc_create(vpx_rc_funcs_t funcs, vpx_rc_config_t ratectrl_config,
EXT_RATECTRL *ext_ratectrl) {
vpx_rc_firstpass_stats_t *rc_firstpass_stats;
vp9_extrc_delete(ext_ratectrl);
ext_ratectrl->funcs = funcs;
ext_ratectrl->ratectrl_config = ratectrl_config;
ext_ratectrl->funcs.create_model(ext_ratectrl->funcs.priv,
&ext_ratectrl->ratectrl_config,
&ext_ratectrl->model);
rc_firstpass_stats = &ext_ratectrl->rc_firstpass_stats;
rc_firstpass_stats->num_frames = ratectrl_config.show_frame_count;
rc_firstpass_stats->frame_stats =
vpx_malloc(sizeof(*rc_firstpass_stats->frame_stats) *
rc_firstpass_stats->num_frames);
ext_ratectrl->ready = 1;
}
void vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl) {
if (ext_ratectrl->ready) {
ext_ratectrl->funcs.delete_model(ext_ratectrl->model);
vpx_free(ext_ratectrl->rc_firstpass_stats.frame_stats);
}
vp9_extrc_init(ext_ratectrl);
}
static void gen_rc_firstpass_stats(const FIRSTPASS_STATS *stats,
vpx_rc_frame_stats_t *rc_frame_stats) {
rc_frame_stats->frame = stats->frame;
rc_frame_stats->weight = stats->weight;
rc_frame_stats->intra_error = stats->intra_error;
rc_frame_stats->coded_error = stats->coded_error;
rc_frame_stats->sr_coded_error = stats->sr_coded_error;
rc_frame_stats->frame_noise_energy = stats->frame_noise_energy;
rc_frame_stats->pcnt_inter = stats->pcnt_inter;
rc_frame_stats->pcnt_motion = stats->pcnt_motion;
rc_frame_stats->pcnt_second_ref = stats->pcnt_second_ref;
rc_frame_stats->pcnt_neutral = stats->pcnt_neutral;
rc_frame_stats->pcnt_intra_low = stats->pcnt_intra_low;
rc_frame_stats->pcnt_intra_high = stats->pcnt_intra_high;
rc_frame_stats->intra_skip_pct = stats->intra_skip_pct;
rc_frame_stats->intra_smooth_pct = stats->intra_smooth_pct;
rc_frame_stats->inactive_zone_rows = stats->inactive_zone_rows;
rc_frame_stats->inactive_zone_cols = stats->inactive_zone_cols;
rc_frame_stats->MVr = stats->MVr;
rc_frame_stats->mvr_abs = stats->mvr_abs;
rc_frame_stats->MVc = stats->MVc;
rc_frame_stats->mvc_abs = stats->mvc_abs;
rc_frame_stats->MVrv = stats->MVrv;
rc_frame_stats->MVcv = stats->MVcv;
rc_frame_stats->mv_in_out_count = stats->mv_in_out_count;
rc_frame_stats->duration = stats->duration;
rc_frame_stats->count = stats->count;
}
void vp9_extrc_send_firstpass_stats(EXT_RATECTRL *ext_ratectrl,
const FIRST_PASS_INFO *first_pass_info) {
if (ext_ratectrl->ready) {
vpx_rc_firstpass_stats_t *rc_firstpass_stats =
&ext_ratectrl->rc_firstpass_stats;
int i;
assert(rc_firstpass_stats->num_frames == first_pass_info->num_frames);
for (i = 0; i < rc_firstpass_stats->num_frames; ++i) {
gen_rc_firstpass_stats(&first_pass_info->stats[i],
&rc_firstpass_stats->frame_stats[i]);
}
ext_ratectrl->funcs.send_firstpass_stats(ext_ratectrl->model,
rc_firstpass_stats);
}
}
static int extrc_get_frame_type(FRAME_UPDATE_TYPE update_type) {
// TODO(angiebird): Add unit test to make sure this function behaves like
// get_frame_type_from_update_type()
// TODO(angiebird): Merge this function with get_frame_type_from_update_type()
switch (update_type) {
case KF_UPDATE: return 0; // kFrameTypeKey;
case ARF_UPDATE: return 2; // kFrameTypeAltRef;
case GF_UPDATE: return 4; // kFrameTypeGolden;
case OVERLAY_UPDATE: return 3; // kFrameTypeOverlay;
case LF_UPDATE: return 1; // kFrameTypeInter;
default:
fprintf(stderr, "Unsupported update_type %d\n", update_type);
abort();
return 1;
}
}
void vp9_extrc_get_encodeframe_decision(
EXT_RATECTRL *ext_ratectrl, int show_index, int coding_index,
FRAME_UPDATE_TYPE update_type,
RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int ref_frame_flags,
vpx_rc_encodeframe_decision_t *encode_frame_decision) {
if (ext_ratectrl->ready) {
vpx_rc_encodeframe_info_t encode_frame_info;
encode_frame_info.show_index = show_index;
encode_frame_info.coding_index = coding_index;
encode_frame_info.frame_type = extrc_get_frame_type(update_type);
vp9_get_ref_frame_info(update_type, ref_frame_flags, ref_frame_bufs,
encode_frame_info.ref_frame_coding_indexes,
encode_frame_info.ref_frame_valid_list);
ext_ratectrl->funcs.get_encodeframe_decision(
ext_ratectrl->model, &encode_frame_info, encode_frame_decision);
}
}
void vp9_extrc_update_encodeframe_result(EXT_RATECTRL *ext_ratectrl,
int64_t bit_count,
const YV12_BUFFER_CONFIG *source_frame,
const YV12_BUFFER_CONFIG *coded_frame,
uint32_t bit_depth,
uint32_t input_bit_depth) {
if (ext_ratectrl->ready) {
PSNR_STATS psnr;
vpx_rc_encodeframe_result_t encode_frame_result;
encode_frame_result.bit_count = bit_count;
encode_frame_result.pixel_count =
source_frame->y_width * source_frame->y_height +
2 * source_frame->uv_width * source_frame->uv_height;
#if CONFIG_VP9_HIGHBITDEPTH
vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth,
input_bit_depth);
#else
(void)bit_depth;
(void)input_bit_depth;
vpx_calc_psnr(source_frame, coded_frame, &psnr);
#endif
encode_frame_result.sse = psnr.sse[0];
ext_ratectrl->funcs.update_encodeframe_result(ext_ratectrl->model,
&encode_frame_result);
}
}

View file

@ -0,0 +1,48 @@
/*
* Copyright (c) 2020 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VPX_VP9_ENCODER_VP9_EXT_RATECTRL_H_
#define VPX_VP9_ENCODER_VP9_EXT_RATECTRL_H_
#include "vpx/vpx_ext_ratectrl.h"
#include "vp9/encoder/vp9_firstpass.h"
typedef struct EXT_RATECTRL {
int ready;
vpx_rc_model_t model;
vpx_rc_funcs_t funcs;
vpx_rc_config_t ratectrl_config;
vpx_rc_firstpass_stats_t rc_firstpass_stats;
} EXT_RATECTRL;
void vp9_extrc_init(EXT_RATECTRL *ext_ratectrl);
void vp9_extrc_create(vpx_rc_funcs_t funcs, vpx_rc_config_t ratectrl_config,
EXT_RATECTRL *ext_ratectrl);
void vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl);
void vp9_extrc_send_firstpass_stats(EXT_RATECTRL *ext_ratectrl,
const FIRST_PASS_INFO *first_pass_info);
void vp9_extrc_get_encodeframe_decision(
EXT_RATECTRL *ext_ratectrl, int show_index, int coding_index,
FRAME_UPDATE_TYPE update_type,
RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int ref_frame_flags,
vpx_rc_encodeframe_decision_t *encode_frame_decision);
void vp9_extrc_update_encodeframe_result(EXT_RATECTRL *ext_ratectrl,
int64_t bit_count,
const YV12_BUFFER_CONFIG *source_frame,
const YV12_BUFFER_CONFIG *coded_frame,
uint32_t bit_depth,
uint32_t input_bit_depth);
#endif // VPX_VP9_ENCODER_VP9_EXT_RATECTRL_H_

View file

@ -18,18 +18,26 @@
static void copy_and_extend_plane(const uint8_t *src, int src_pitch, static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
uint8_t *dst, int dst_pitch, int w, int h, uint8_t *dst, int dst_pitch, int w, int h,
int extend_top, int extend_left, int extend_top, int extend_left,
int extend_bottom, int extend_right) { int extend_bottom, int extend_right,
int i, linesize; int interleave_step) {
int i, j, linesize;
const int step = interleave_step < 1 ? 1 : interleave_step;
// copy the left and right most columns out // copy the left and right most columns out
const uint8_t *src_ptr1 = src; const uint8_t *src_ptr1 = src;
const uint8_t *src_ptr2 = src + w - 1; const uint8_t *src_ptr2 = src + (w - 1) * step;
uint8_t *dst_ptr1 = dst - extend_left; uint8_t *dst_ptr1 = dst - extend_left;
uint8_t *dst_ptr2 = dst + w; uint8_t *dst_ptr2 = dst + w;
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
memset(dst_ptr1, src_ptr1[0], extend_left); memset(dst_ptr1, src_ptr1[0], extend_left);
if (step == 1) {
memcpy(dst_ptr1 + extend_left, src_ptr1, w); memcpy(dst_ptr1 + extend_left, src_ptr1, w);
} else {
for (j = 0; j < w; j++) {
dst_ptr1[extend_left + j] = src_ptr1[step * j];
}
}
memset(dst_ptr2, src_ptr2[0], extend_right); memset(dst_ptr2, src_ptr2[0], extend_right);
src_ptr1 += src_pitch; src_ptr1 += src_pitch;
src_ptr2 += src_pitch; src_ptr2 += src_pitch;
@ -122,6 +130,8 @@ void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
const int el_uv = el_y >> uv_width_subsampling; const int el_uv = el_y >> uv_width_subsampling;
const int eb_uv = eb_y >> uv_height_subsampling; const int eb_uv = eb_y >> uv_height_subsampling;
const int er_uv = er_y >> uv_width_subsampling; const int er_uv = er_y >> uv_width_subsampling;
// detect nv12 colorspace
const int chroma_step = src->v_buffer - src->u_buffer == 1 ? 2 : 1;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (src->flags & YV12_FLAG_HIGHBITDEPTH) { if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
@ -142,15 +152,15 @@ void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer, copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
dst->y_stride, src->y_crop_width, src->y_crop_height, dst->y_stride, src->y_crop_width, src->y_crop_height,
et_y, el_y, eb_y, er_y); et_y, el_y, eb_y, er_y, 1);
copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer, copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer,
dst->uv_stride, src->uv_crop_width, src->uv_crop_height, dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv); et_uv, el_uv, eb_uv, er_uv, chroma_step);
copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer, copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer,
dst->uv_stride, src->uv_crop_width, src->uv_crop_height, dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv); et_uv, el_uv, eb_uv, er_uv, chroma_step);
} }
void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
@ -176,16 +186,18 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
const int srch_uv = ROUND_POWER_OF_TWO(srch, 1); const int srch_uv = ROUND_POWER_OF_TWO(srch, 1);
const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1); const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1);
// detect nv12 colorspace
const int chroma_step = src->v_buffer - src->u_buffer == 1 ? 2 : 1;
copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride, copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
dst->y_buffer + dst_y_offset, dst->y_stride, srcw, srch, dst->y_buffer + dst_y_offset, dst->y_stride, srcw, srch,
et_y, el_y, eb_y, er_y); et_y, el_y, eb_y, er_y, 1);
copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride, copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
dst->u_buffer + dst_uv_offset, dst->uv_stride, srcw_uv, dst->u_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
srch_uv, et_uv, el_uv, eb_uv, er_uv); srch_uv, et_uv, el_uv, eb_uv, er_uv, chroma_step);
copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride, copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
dst->v_buffer + dst_uv_offset, dst->uv_stride, srcw_uv, dst->v_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
srch_uv, et_uv, el_uv, eb_uv, er_uv); srch_uv, et_uv, el_uv, eb_uv, er_uv, chroma_step);
} }

View file

@ -389,6 +389,29 @@ static int get_search_range(const VP9_COMP *cpi) {
return sr; return sr;
} }
// Reduce limits to keep the motion search within MV_MAX of ref_mv. Not doing
// this can be problematic for big videos (8K) and may cause assert failure
// (or memory violation) in mv_cost. Limits are only modified if they would
// be non-empty. Returns 1 if limits are non-empty.
static int intersect_limits_with_mv_max(MvLimits *mv_limits, const MV *ref_mv) {
const int row_min =
VPXMAX(mv_limits->row_min, (ref_mv->row + 7 - MV_MAX) >> 3);
const int row_max =
VPXMIN(mv_limits->row_max, (ref_mv->row - 1 + MV_MAX) >> 3);
const int col_min =
VPXMAX(mv_limits->col_min, (ref_mv->col + 7 - MV_MAX) >> 3);
const int col_max =
VPXMIN(mv_limits->col_max, (ref_mv->col - 1 + MV_MAX) >> 3);
if (row_min > row_max || col_min > col_max) {
return 0;
}
mv_limits->row_min = row_min;
mv_limits->row_max = row_max;
mv_limits->col_min = col_min;
mv_limits->col_max = col_max;
return 1;
}
static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
const MV *ref_mv, MV *best_mv, const MV *ref_mv, MV *best_mv,
int *best_motion_err) { int *best_motion_err) {
@ -403,9 +426,14 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int step_param = 3; int step_param = 3;
int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
const int sr = get_search_range(cpi); const int sr = get_search_range(cpi);
const MvLimits tmp_mv_limits = x->mv_limits;
step_param += sr; step_param += sr;
further_steps -= sr; further_steps -= sr;
if (!intersect_limits_with_mv_max(&x->mv_limits, ref_mv)) {
return;
}
// Override the default variance function to use MSE. // Override the default variance function to use MSE.
v_fn_ptr.vf = get_block_variance_fn(bsize); v_fn_ptr.vf = get_block_variance_fn(bsize);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
@ -451,6 +479,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
} }
} }
} }
x->mv_limits = tmp_mv_limits;
} }
static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) { static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) {
@ -810,6 +839,22 @@ static void accumulate_fp_mb_row_stat(TileDataEnc *this_tile,
fp_acc_data->image_data_start_row); fp_acc_data->image_data_start_row);
} }
#if CONFIG_RATE_CTRL
static void store_fp_motion_vector(VP9_COMP *cpi, const MV *mv,
const int mb_row, const int mb_col,
MV_REFERENCE_FRAME frame_type,
const int mv_idx) {
VP9_COMMON *const cm = &cpi->common;
const int mb_index = mb_row * cm->mb_cols + mb_col;
MOTION_VECTOR_INFO *this_motion_vector_info =
&cpi->fp_motion_vector_info[mb_index];
this_motion_vector_info->ref_frame[mv_idx] = frame_type;
if (frame_type != INTRA_FRAME) {
this_motion_vector_info->mv[mv_idx].as_mv = *mv;
}
}
#endif // CONFIG_RATE_CTRL
#define NZ_MOTION_PENALTY 128 #define NZ_MOTION_PENALTY 128
#define INTRA_MODE_PENALTY 1024 #define INTRA_MODE_PENALTY 1024
void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
@ -1044,6 +1089,11 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
struct buf_2d unscaled_last_source_buf_2d; struct buf_2d unscaled_last_source_buf_2d;
vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
#if CONFIG_RATE_CTRL
// Store zero mv as default
store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);
#endif // CONFIG_RAGE_CTRL
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@ -1108,6 +1158,9 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0); vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0);
} }
} }
#if CONFIG_RATE_CTRL
store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);
#endif // CONFIG_RAGE_CTRL
// Search in an older reference frame. // Search in an older reference frame.
if ((cm->current_video_frame > 1) && gld_yv12 != NULL) { if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {
@ -1129,6 +1182,9 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error); first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error);
#if CONFIG_RATE_CTRL
store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col, GOLDEN_FRAME, 1);
#endif // CONFIG_RAGE_CTRL
if (gf_motion_error < motion_error && gf_motion_error < this_error) if (gf_motion_error < motion_error && gf_motion_error < this_error)
++(fp_acc_data->second_ref_count); ++(fp_acc_data->second_ref_count);
@ -1302,6 +1358,9 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
} }
} else { } else {
fp_acc_data->sr_coded_error += (int64_t)this_error; fp_acc_data->sr_coded_error += (int64_t)this_error;
#if CONFIG_RATE_CTRL
store_fp_motion_vector(cpi, NULL, mb_row, mb_col, INTRA_FRAME, 0);
#endif // CONFIG_RAGE_CTRL
} }
fp_acc_data->coded_error += (int64_t)this_error; fp_acc_data->coded_error += (int64_t)this_error;
@ -1328,6 +1387,12 @@ static void first_pass_encode(VP9_COMP *cpi, FIRSTPASS_DATA *fp_acc_data) {
// Tiling is ignored in the first pass. // Tiling is ignored in the first pass.
vp9_tile_init(tile, cm, 0, 0); vp9_tile_init(tile, cm, 0, 0);
#if CONFIG_RATE_CTRL
fp_motion_vector_info_reset(cpi->frame_info.frame_width,
cpi->frame_info.frame_height,
cpi->fp_motion_vector_info);
#endif
for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
best_ref_mv = zero_mv; best_ref_mv = zero_mv;
vp9_first_pass_encode_tile_mb_row(cpi, &cpi->td, fp_acc_data, &tile_data, vp9_first_pass_encode_tile_mb_row(cpi, &cpi->td, fp_acc_data, &tile_data,
@ -2479,9 +2544,6 @@ typedef struct RANGE {
* structs. * structs.
*/ */
static int get_gop_coding_frame_num( static int get_gop_coding_frame_num(
#if CONFIG_RATE_CTRL
const int *external_arf_indexes,
#endif
int *use_alt_ref, const FRAME_INFO *frame_info, int *use_alt_ref, const FRAME_INFO *frame_info,
const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc, const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc,
int gf_start_show_idx, const RANGE *active_gf_interval, int gf_start_show_idx, const RANGE *active_gf_interval,
@ -2497,24 +2559,6 @@ static int get_gop_coding_frame_num(
(frame_info->frame_height + frame_info->frame_width) / 4.0; (frame_info->frame_height + frame_info->frame_width) / 4.0;
double zero_motion_accumulator = 1.0; double zero_motion_accumulator = 1.0;
int gop_coding_frames; int gop_coding_frames;
#if CONFIG_RATE_CTRL
(void)mv_ratio_accumulator_thresh;
(void)active_gf_interval;
(void)gop_intra_factor;
if (external_arf_indexes != NULL && rc->frames_to_key > 1) {
// gop_coding_frames = 1 is necessary to filter out the overlay frame,
// since the arf is in this group of picture and its overlay is in the next.
gop_coding_frames = 1;
*use_alt_ref = 1;
while (gop_coding_frames < rc->frames_to_key) {
const int frame_index = gf_start_show_idx + gop_coding_frames;
++gop_coding_frames;
if (external_arf_indexes[frame_index] == 1) break;
}
return gop_coding_frames;
}
#endif // CONFIG_RATE_CTRL
*use_alt_ref = 1; *use_alt_ref = 1;
gop_coding_frames = 0; gop_coding_frames = 0;
@ -2741,15 +2785,26 @@ static void define_gf_group(VP9_COMP *cpi, int gf_start_show_idx) {
gop_intra_factor = 1.0; gop_intra_factor = 1.0;
} }
{
gop_coding_frames = get_gop_coding_frame_num(
#if CONFIG_RATE_CTRL #if CONFIG_RATE_CTRL
cpi->encode_command.external_arf_indexes, {
#endif const GOP_COMMAND *gop_command = &cpi->encode_command.gop_command;
assert(allow_alt_ref == 1);
if (gop_command->use) {
gop_coding_frames = gop_command_coding_frame_count(gop_command);
use_alt_ref = gop_command->use_alt_ref;
} else {
gop_coding_frames = get_gop_coding_frame_num(
&use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx, &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx,
&active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames); &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);
use_alt_ref &= allow_alt_ref; use_alt_ref &= allow_alt_ref;
} }
}
#else
gop_coding_frames = get_gop_coding_frame_num(
&use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx,
&active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);
use_alt_ref &= allow_alt_ref;
#endif
// Was the group length constrained by the requirement for a new KF? // Was the group length constrained by the requirement for a new KF?
rc->constrained_gf_group = (gop_coding_frames >= rc->frames_to_key) ? 1 : 0; rc->constrained_gf_group = (gop_coding_frames >= rc->frames_to_key) ? 1 : 0;
@ -3675,6 +3730,7 @@ void vp9_get_next_group_of_picture(const VP9_COMP *cpi, int *first_is_key_frame,
int *use_alt_ref, int *coding_frame_count, int *use_alt_ref, int *coding_frame_count,
int *first_show_idx, int *first_show_idx,
int *last_gop_use_alt_ref) { int *last_gop_use_alt_ref) {
const GOP_COMMAND *gop_command = &cpi->encode_command.gop_command;
// We make a copy of rc here because we want to get information from the // We make a copy of rc here because we want to get information from the
// encoder without changing its state. // encoder without changing its state.
// TODO(angiebird): Avoid copying rc here. // TODO(angiebird): Avoid copying rc here.
@ -3697,14 +3753,19 @@ void vp9_get_next_group_of_picture(const VP9_COMP *cpi, int *first_is_key_frame,
*first_is_key_frame = 1; *first_is_key_frame = 1;
} }
if (gop_command->use) {
*coding_frame_count = gop_command_coding_frame_count(gop_command);
*use_alt_ref = gop_command->use_alt_ref;
assert(*coding_frame_count < rc.frames_to_key);
} else {
*coding_frame_count = vp9_get_gop_coding_frame_count( *coding_frame_count = vp9_get_gop_coding_frame_count(
cpi->encode_command.external_arf_indexes, &cpi->oxcf, &cpi->frame_info, &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info, &rc,
&cpi->twopass.first_pass_info, &rc, *first_show_idx, multi_layer_arf, *first_show_idx, multi_layer_arf, allow_alt_ref, *first_is_key_frame,
allow_alt_ref, *first_is_key_frame, *last_gop_use_alt_ref, use_alt_ref); *last_gop_use_alt_ref, use_alt_ref);
}
} }
int vp9_get_gop_coding_frame_count(const int *external_arf_indexes, int vp9_get_gop_coding_frame_count(const VP9EncoderConfig *oxcf,
const VP9EncoderConfig *oxcf,
const FRAME_INFO *frame_info, const FRAME_INFO *frame_info,
const FIRST_PASS_INFO *first_pass_info, const FIRST_PASS_INFO *first_pass_info,
const RATE_CONTROL *rc, int show_idx, const RATE_CONTROL *rc, int show_idx,
@ -3727,9 +3788,6 @@ int vp9_get_gop_coding_frame_count(const int *external_arf_indexes,
} }
frame_count = get_gop_coding_frame_num( frame_count = get_gop_coding_frame_num(
#if CONFIG_RATE_CTRL
external_arf_indexes,
#endif
use_alt_ref, frame_info, first_pass_info, rc, show_idx, use_alt_ref, frame_info, first_pass_info, rc, show_idx,
&active_gf_interval, gop_intra_factor, oxcf->lag_in_frames); &active_gf_interval, gop_intra_factor, oxcf->lag_in_frames);
*use_alt_ref &= allow_alt_ref; *use_alt_ref &= allow_alt_ref;
@ -3738,8 +3796,7 @@ int vp9_get_gop_coding_frame_count(const int *external_arf_indexes,
// Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of // Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of
// coding frames (including show frame and alt ref) can be determined. // coding frames (including show frame and alt ref) can be determined.
int vp9_get_coding_frame_num(const int *external_arf_indexes, int vp9_get_coding_frame_num(const VP9EncoderConfig *oxcf,
const VP9EncoderConfig *oxcf,
const FRAME_INFO *frame_info, const FRAME_INFO *frame_info,
const FIRST_PASS_INFO *first_pass_info, const FIRST_PASS_INFO *first_pass_info,
int multi_layer_arf, int allow_alt_ref) { int multi_layer_arf, int allow_alt_ref) {
@ -3750,7 +3807,6 @@ int vp9_get_coding_frame_num(const int *external_arf_indexes,
int show_idx = 0; int show_idx = 0;
int last_gop_use_alt_ref = 0; int last_gop_use_alt_ref = 0;
vp9_rc_init(oxcf, 1, &rc); vp9_rc_init(oxcf, 1, &rc);
rc.static_scene_max_gf_interval = 250;
while (show_idx < first_pass_info->num_frames) { while (show_idx < first_pass_info->num_frames) {
int use_alt_ref; int use_alt_ref;
@ -3763,9 +3819,8 @@ int vp9_get_coding_frame_num(const int *external_arf_indexes,
} }
gop_coding_frame_count = vp9_get_gop_coding_frame_count( gop_coding_frame_count = vp9_get_gop_coding_frame_count(
external_arf_indexes, oxcf, frame_info, first_pass_info, &rc, show_idx, oxcf, frame_info, first_pass_info, &rc, show_idx, multi_layer_arf,
multi_layer_arf, allow_alt_ref, first_is_key_frame, allow_alt_ref, first_is_key_frame, last_gop_use_alt_ref, &use_alt_ref);
last_gop_use_alt_ref, &use_alt_ref);
rc.source_alt_ref_active = use_alt_ref; rc.source_alt_ref_active = use_alt_ref;
last_gop_use_alt_ref = use_alt_ref; last_gop_use_alt_ref = use_alt_ref;
@ -3777,6 +3832,30 @@ int vp9_get_coding_frame_num(const int *external_arf_indexes,
} }
return coding_frame_num; return coding_frame_num;
} }
void vp9_get_key_frame_map(const VP9EncoderConfig *oxcf,
const FRAME_INFO *frame_info,
const FIRST_PASS_INFO *first_pass_info,
int *key_frame_map) {
int show_idx = 0;
RATE_CONTROL rc;
vp9_rc_init(oxcf, 1, &rc);
// key_frame_map points to an int array with size equal to
// first_pass_info->num_frames, which is also the number of show frames in the
// video.
memset(key_frame_map, 0,
sizeof(*key_frame_map) * first_pass_info->num_frames);
while (show_idx < first_pass_info->num_frames) {
int key_frame_group_size;
key_frame_map[show_idx] = 1;
key_frame_group_size = vp9_get_frames_to_next_key(
oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval);
assert(key_frame_group_size > 0);
show_idx += key_frame_group_size;
}
assert(show_idx == first_pass_info->num_frames);
}
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass) { FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass) {

View file

@ -264,7 +264,6 @@ void vp9_get_next_group_of_picture(const struct VP9_COMP *cpi,
/*!\brief Call this function before coding a new group of pictures to get /*!\brief Call this function before coding a new group of pictures to get
* information about it. * information about it.
* \param[in] external_arf_indexes External arf indexs passed in
* \param[in] oxcf Encoder config * \param[in] oxcf Encoder config
* \param[in] frame_info Frame info * \param[in] frame_info Frame info
* \param[in] first_pass_info First pass stats * \param[in] first_pass_info First pass stats
@ -279,8 +278,7 @@ void vp9_get_next_group_of_picture(const struct VP9_COMP *cpi,
* *
* \return Returns coding frame count * \return Returns coding frame count
*/ */
int vp9_get_gop_coding_frame_count(const int *external_arf_indexes, int vp9_get_gop_coding_frame_count(const struct VP9EncoderConfig *oxcf,
const struct VP9EncoderConfig *oxcf,
const FRAME_INFO *frame_info, const FRAME_INFO *frame_info,
const FIRST_PASS_INFO *first_pass_info, const FIRST_PASS_INFO *first_pass_info,
const RATE_CONTROL *rc, int show_idx, const RATE_CONTROL *rc, int show_idx,
@ -288,11 +286,20 @@ int vp9_get_gop_coding_frame_count(const int *external_arf_indexes,
int first_is_key_frame, int first_is_key_frame,
int last_gop_use_alt_ref, int *use_alt_ref); int last_gop_use_alt_ref, int *use_alt_ref);
int vp9_get_coding_frame_num(const int *external_arf_indexes, int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf,
const struct VP9EncoderConfig *oxcf,
const FRAME_INFO *frame_info, const FRAME_INFO *frame_info,
const FIRST_PASS_INFO *first_pass_info, const FIRST_PASS_INFO *first_pass_info,
int multi_layer_arf, int allow_alt_ref); int multi_layer_arf, int allow_alt_ref);
/*!\brief Compute a key frame binary map indicates whether key frames appear
* in the corresponding positions. The passed in key_frame_map must point to an
* integer array with length equal to first_pass_info->num_frames, which is the
* number of show frames in the video.
*/
void vp9_get_key_frame_map(const struct VP9EncoderConfig *oxcf,
const FRAME_INFO *frame_info,
const FIRST_PASS_INFO *first_pass_info,
int *key_frame_map);
#endif // CONFIG_RATE_CTRL #endif // CONFIG_RATE_CTRL
FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass); FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass);

View file

@ -1127,7 +1127,7 @@ static INLINE void update_thresh_freq_fact_row_mt(
} }
static INLINE void update_thresh_freq_fact( static INLINE void update_thresh_freq_fact(
VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance, VP9_COMP *cpi, TileDataEnc *tile_data, unsigned int source_variance,
BLOCK_SIZE bsize, MV_REFERENCE_FRAME ref_frame, THR_MODES best_mode_idx, BLOCK_SIZE bsize, MV_REFERENCE_FRAME ref_frame, THR_MODES best_mode_idx,
PREDICTION_MODE mode) { PREDICTION_MODE mode) {
THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)]; THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)];

View file

@ -249,7 +249,7 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
// way for CBR mode, for the buffering updates below. Look into removing one // way for CBR mode, for the buffering updates below. Look into removing one
// of these (i.e., bits_off_target). // of these (i.e., bits_off_target).
// Update the buffer level before encoding with the per-frame-bandwidth, // Update the buffer level before encoding with the per-frame-bandwidth,
static void update_buffer_level_preencode(VP9_COMP *cpi) { void vp9_update_buffer_level_preencode(VP9_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc; RATE_CONTROL *const rc = &cpi->rc;
rc->bits_off_target += rc->avg_frame_bandwidth; rc->bits_off_target += rc->avg_frame_bandwidth;
// Clip the buffer level to the maximum specified buffer size. // Clip the buffer level to the maximum specified buffer size.
@ -431,11 +431,17 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
oxcf->init_framerate, rc->min_gf_interval); oxcf->init_framerate, rc->min_gf_interval);
rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2; rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL;
} else {
rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
}
rc->force_max_q = 0; rc->force_max_q = 0;
rc->last_post_encode_dropped_scene_change = 0; rc->last_post_encode_dropped_scene_change = 0;
rc->use_post_encode_drop = 0; rc->use_post_encode_drop = 0;
rc->ext_use_post_encode_drop = 0; rc->ext_use_post_encode_drop = 0;
rc->disable_overshoot_maxq_cbr = 0;
rc->arf_active_best_quality_adjustment_factor = 1.0; rc->arf_active_best_quality_adjustment_factor = 1.0;
rc->arf_increase_active_best_quality = 0; rc->arf_increase_active_best_quality = 0;
rc->preserve_arf_as_gld = 0; rc->preserve_arf_as_gld = 0;
@ -1690,8 +1696,10 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, int frame_target,
} else { } else {
// For very small rate targets where the fractional adjustment // For very small rate targets where the fractional adjustment
// may be tiny make sure there is at least a minimum range. // may be tiny make sure there is at least a minimum range.
const int tol_low = (cpi->sf.recode_tolerance_low * frame_target) / 100; const int tol_low =
const int tol_high = (cpi->sf.recode_tolerance_high * frame_target) / 100; (int)(((int64_t)cpi->sf.recode_tolerance_low * frame_target) / 100);
const int tol_high =
(int)(((int64_t)cpi->sf.recode_tolerance_high * frame_target) / 100);
*frame_under_shoot_limit = VPXMAX(frame_target - tol_low - 100, 0); *frame_under_shoot_limit = VPXMAX(frame_target - tol_low - 100, 0);
*frame_over_shoot_limit = *frame_over_shoot_limit =
VPXMIN(frame_target + tol_high + 100, cpi->rc.max_frame_bandwidth); VPXMIN(frame_target + tol_high + 100, cpi->rc.max_frame_bandwidth);
@ -1706,9 +1714,16 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) {
// Modify frame size target when down-scaling. // Modify frame size target when down-scaling.
if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC &&
rc->frame_size_selector != UNSCALED) rc->frame_size_selector != UNSCALED) {
rc->this_frame_target = (int)(rc->this_frame_target * rc->this_frame_target = (int)(rc->this_frame_target *
rate_thresh_mult[rc->frame_size_selector]); rate_thresh_mult[rc->frame_size_selector]);
}
#if CONFIG_RATE_CTRL
if (cpi->encode_command.use_external_target_frame_bits) {
rc->this_frame_target = cpi->encode_command.target_frame_bits;
}
#endif
// Target rate per SB64 (including partial SB64s. // Target rate per SB64 (including partial SB64s.
rc->sb64_target_rate = (int)(((int64_t)rc->this_frame_target * 64 * 64) / rc->sb64_target_rate = (int)(((int64_t)rc->this_frame_target * 64 * 64) /
@ -1981,6 +1996,7 @@ void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
cpi->rc.rc_2_frame = 0; cpi->rc.rc_2_frame = 0;
cpi->rc.rc_1_frame = 0; cpi->rc.rc_1_frame = 0;
cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth; cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth;
cpi->rc.last_q[INTER_FRAME] = cpi->common.base_qindex;
// For SVC on dropped frame when framedrop_mode != LAYER_DROP: // For SVC on dropped frame when framedrop_mode != LAYER_DROP:
// in this mode the whole superframe may be dropped if only a single layer // in this mode the whole superframe may be dropped if only a single layer
// has buffer underflow (below threshold). Since this can then lead to // has buffer underflow (below threshold). Since this can then lead to
@ -2098,7 +2114,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
vp9_cyclic_refresh_update_parameters(cpi); vp9_cyclic_refresh_update_parameters(cpi);
} }
static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { int vp9_calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
const VP9EncoderConfig *oxcf = &cpi->oxcf; const VP9EncoderConfig *oxcf = &cpi->oxcf;
const RATE_CONTROL *rc = &cpi->rc; const RATE_CONTROL *rc = &cpi->rc;
const SVC *const svc = &cpi->svc; const SVC *const svc = &cpi->svc;
@ -2147,7 +2163,7 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
return VPXMAX(min_frame_target, target); return VPXMAX(min_frame_target, target);
} }
static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { int vp9_calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
const RATE_CONTROL *rc = &cpi->rc; const RATE_CONTROL *rc = &cpi->rc;
const VP9EncoderConfig *oxcf = &cpi->oxcf; const VP9EncoderConfig *oxcf = &cpi->oxcf;
const SVC *const svc = &cpi->svc; const SVC *const svc = &cpi->svc;
@ -2253,7 +2269,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
// Assumption here is that LAST_FRAME is being updated for a keyframe. // Assumption here is that LAST_FRAME is being updated for a keyframe.
// Thus no change in update flags. // Thus no change in update flags.
target = calc_iframe_target_size_one_pass_cbr(cpi); target = vp9_calc_iframe_target_size_one_pass_cbr(cpi);
} }
} else { } else {
cm->frame_type = INTER_FRAME; cm->frame_type = INTER_FRAME;
@ -2266,7 +2282,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
(svc->spatial_layer_id == 0 && cm->current_video_frame > 0) (svc->spatial_layer_id == 0 && cm->current_video_frame > 0)
? 0 ? 0
: svc->layer_context[svc->temporal_layer_id].is_key_frame; : svc->layer_context[svc->temporal_layer_id].is_key_frame;
target = calc_pframe_target_size_one_pass_cbr(cpi); target = vp9_calc_pframe_target_size_one_pass_cbr(cpi);
} }
} }
@ -2275,7 +2291,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
svc->layer_context[layer].is_key_frame == 1) { svc->layer_context[layer].is_key_frame == 1) {
cm->frame_type = KEY_FRAME; cm->frame_type = KEY_FRAME;
cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
target = calc_iframe_target_size_one_pass_cbr(cpi); target = vp9_calc_iframe_target_size_one_pass_cbr(cpi);
} }
// Set the buffer idx and refresh flags for key frames in simulcast mode. // Set the buffer idx and refresh flags for key frames in simulcast mode.
// Note the buffer slot for long-term reference is set below (line 2255), // Note the buffer slot for long-term reference is set below (line 2255),
@ -2360,7 +2376,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
} }
if (svc->set_intra_only_frame) { if (svc->set_intra_only_frame) {
set_intra_only_frame(cpi); set_intra_only_frame(cpi);
target = calc_iframe_target_size_one_pass_cbr(cpi); target = vp9_calc_iframe_target_size_one_pass_cbr(cpi);
} }
// Any update/change of global cyclic refresh parameters (amount/delta-qp) // Any update/change of global cyclic refresh parameters (amount/delta-qp)
// should be done here, before the frame qp is selected. // should be done here, before the frame qp is selected.
@ -2371,7 +2387,8 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
if (cm->show_frame) update_buffer_level_svc_preencode(cpi); if (cm->show_frame) update_buffer_level_svc_preencode(cpi);
if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && svc->single_layer_svc == 1 && if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && svc->single_layer_svc == 1 &&
svc->spatial_layer_id == svc->first_spatial_layer_to_encode) { svc->spatial_layer_id == svc->first_spatial_layer_to_encode &&
svc->temporal_layer_id == 0) {
LAYER_CONTEXT *lc = NULL; LAYER_CONTEXT *lc = NULL;
cpi->resize_pending = vp9_resize_one_pass_cbr(cpi); cpi->resize_pending = vp9_resize_one_pass_cbr(cpi);
if (cpi->resize_pending) { if (cpi->resize_pending) {
@ -2385,6 +2402,11 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
cpi->resize_scale_num * lc->scaling_factor_num; cpi->resize_scale_num * lc->scaling_factor_num;
lc->scaling_factor_den_resize = lc->scaling_factor_den_resize =
cpi->resize_scale_den * lc->scaling_factor_den; cpi->resize_scale_den * lc->scaling_factor_den;
// Reset rate control for all temporal layers.
lc->rc.buffer_level = lc->rc.optimal_buffer_level;
lc->rc.bits_off_target = lc->rc.optimal_buffer_level;
lc->rc.rate_correction_factors[INTER_FRAME] =
rc->rate_correction_factors[INTER_FRAME];
} }
// Set the size for this current temporal layer. // Set the size for this current temporal layer.
lc = &svc->layer_context[svc->spatial_layer_id * lc = &svc->layer_context[svc->spatial_layer_id *
@ -2394,9 +2416,11 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
lc->scaling_factor_num_resize, lc->scaling_factor_num_resize,
lc->scaling_factor_den_resize, &width, &height); lc->scaling_factor_den_resize, &width, &height);
vp9_set_size_literal(cpi, width, height); vp9_set_size_literal(cpi, width, height);
svc->resize_set = 1;
} }
} else { } else {
cpi->resize_pending = 0; cpi->resize_pending = 0;
svc->resize_set = 0;
} }
} }
@ -2433,13 +2457,13 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
vp9_cyclic_refresh_update_parameters(cpi); vp9_cyclic_refresh_update_parameters(cpi);
if (frame_is_intra_only(cm)) if (frame_is_intra_only(cm))
target = calc_iframe_target_size_one_pass_cbr(cpi); target = vp9_calc_iframe_target_size_one_pass_cbr(cpi);
else else
target = calc_pframe_target_size_one_pass_cbr(cpi); target = vp9_calc_pframe_target_size_one_pass_cbr(cpi);
vp9_rc_set_frame_target(cpi, target); vp9_rc_set_frame_target(cpi, target);
if (cm->show_frame) update_buffer_level_preencode(cpi); if (cm->show_frame) vp9_update_buffer_level_preencode(cpi);
if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC) if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC)
cpi->resize_pending = vp9_resize_one_pass_cbr(cpi); cpi->resize_pending = vp9_resize_one_pass_cbr(cpi);
@ -2657,6 +2681,7 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
int min_width = (320 * 4) / 3; int min_width = (320 * 4) / 3;
int min_height = (180 * 4) / 3; int min_height = (180 * 4) / 3;
int down_size_on = 1; int down_size_on = 1;
int force_downsize_rate = 0;
cpi->resize_scale_num = 1; cpi->resize_scale_num = 1;
cpi->resize_scale_den = 1; cpi->resize_scale_den = 1;
// Don't resize on key frame; reset the counters on key frame. // Don't resize on key frame; reset the counters on key frame.
@ -2677,11 +2702,32 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
} }
#endif #endif
// Force downsize based on per-frame-bandwidth, for extreme case,
// for HD input.
if (cpi->resize_state == ORIG && cm->width * cm->height >= 1280 * 720) {
if (rc->avg_frame_bandwidth < 300000 / 30) {
resize_action = DOWN_ONEHALF;
cpi->resize_state = ONE_HALF;
force_downsize_rate = 1;
} else if (rc->avg_frame_bandwidth < 400000 / 30) {
resize_action = ONEHALFONLY_RESIZE ? DOWN_ONEHALF : DOWN_THREEFOUR;
cpi->resize_state = ONEHALFONLY_RESIZE ? ONE_HALF : THREE_QUARTER;
force_downsize_rate = 1;
}
} else if (cpi->resize_state == THREE_QUARTER &&
cm->width * cm->height >= 960 * 540) {
if (rc->avg_frame_bandwidth < 300000 / 30) {
resize_action = DOWN_ONEHALF;
cpi->resize_state = ONE_HALF;
force_downsize_rate = 1;
}
}
// Resize based on average buffer underflow and QP over some window. // Resize based on average buffer underflow and QP over some window.
// Ignore samples close to key frame, since QP is usually high after key. // Ignore samples close to key frame, since QP is usually high after key.
if (cpi->rc.frames_since_key > 2 * cpi->framerate) { if (!force_downsize_rate && cpi->rc.frames_since_key > cpi->framerate) {
const int window = (int)(4 * cpi->framerate); const int window = VPXMIN(30, (int)(2 * cpi->framerate));
cpi->resize_avg_qp += cm->base_qindex; cpi->resize_avg_qp += rc->last_q[INTER_FRAME];
if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100)) if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
++cpi->resize_buffer_underflow; ++cpi->resize_buffer_underflow;
++cpi->resize_count; ++cpi->resize_count;
@ -2742,7 +2788,7 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
// Reset buffer level to optimal, update target size. // Reset buffer level to optimal, update target size.
rc->buffer_level = rc->optimal_buffer_level; rc->buffer_level = rc->optimal_buffer_level;
rc->bits_off_target = rc->optimal_buffer_level; rc->bits_off_target = rc->optimal_buffer_level;
rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi); rc->this_frame_target = vp9_calc_pframe_target_size_one_pass_cbr(cpi);
// Get the projected qindex, based on the scaled target frame size (scaled // Get the projected qindex, based on the scaled target frame size (scaled
// so target_bits_per_mb in vp9_rc_regulate_q will be correct target). // so target_bits_per_mb in vp9_rc_regulate_q will be correct target).
target_bits_per_frame = (resize_action >= 0) target_bits_per_frame = (resize_action >= 0)
@ -2960,7 +3006,7 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
int scene_cut_force_key_frame = 0; int scene_cut_force_key_frame = 0;
int num_zero_temp_sad = 0; int num_zero_temp_sad = 0;
uint64_t avg_sad_current = 0; uint64_t avg_sad_current = 0;
uint32_t min_thresh = 10000; uint32_t min_thresh = 20000; // ~5 * 64 * 64
float thresh = 8.0f; float thresh = 8.0f;
uint32_t thresh_key = 140000; uint32_t thresh_key = 140000;
if (cpi->oxcf.speed <= 5) thresh_key = 240000; if (cpi->oxcf.speed <= 5) thresh_key = 240000;
@ -3217,7 +3263,7 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
int tl = 0; int tl = 0;
int sl = 0; int sl = 0;
SVC *svc = &cpi->svc; SVC *svc = &cpi->svc;
for (sl = 0; sl < svc->first_spatial_layer_to_encode; ++sl) { for (sl = 0; sl < VPXMAX(1, svc->first_spatial_layer_to_encode); ++sl) {
for (tl = 0; tl < svc->number_temporal_layers; ++tl) { for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
const int layer = const int layer =
LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);

View file

@ -195,7 +195,8 @@ typedef struct {
int use_post_encode_drop; int use_post_encode_drop;
// External flag to enable post encode frame dropping, controlled by user. // External flag to enable post encode frame dropping, controlled by user.
int ext_use_post_encode_drop; int ext_use_post_encode_drop;
// Flag to disable CBR feature to increase Q on overshoot detection.
int disable_overshoot_maxq_cbr;
int damped_adjustment[RATE_FACTOR_LEVELS]; int damped_adjustment[RATE_FACTOR_LEVELS];
double arf_active_best_quality_adjustment_factor; double arf_active_best_quality_adjustment_factor;
int arf_increase_active_best_quality; int arf_increase_active_best_quality;
@ -252,6 +253,9 @@ int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval);
// encode_frame_to_data_rate() function. // encode_frame_to_data_rate() function.
void vp9_rc_get_one_pass_vbr_params(struct VP9_COMP *cpi); void vp9_rc_get_one_pass_vbr_params(struct VP9_COMP *cpi);
void vp9_rc_get_one_pass_cbr_params(struct VP9_COMP *cpi); void vp9_rc_get_one_pass_cbr_params(struct VP9_COMP *cpi);
int vp9_calc_pframe_target_size_one_pass_cbr(const struct VP9_COMP *cpi);
int vp9_calc_iframe_target_size_one_pass_cbr(const struct VP9_COMP *cpi);
void vp9_update_buffer_level_preencode(struct VP9_COMP *cpi);
void vp9_rc_get_svc_params(struct VP9_COMP *cpi); void vp9_rc_get_svc_params(struct VP9_COMP *cpi);
// Post encode update of the rate control parameters based // Post encode update of the rate control parameters based

View file

@ -4443,6 +4443,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
tmp_best_sse = total_sse; tmp_best_sse = total_sse;
tmp_best_skippable = skippable; tmp_best_skippable = skippable;
tmp_best_mbmode = *mi; tmp_best_mbmode = *mi;
x->sum_y_eobs[TX_4X4] = 0;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
@ -4476,6 +4477,11 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
&rate, &rate_y, &distortion, &skippable, &total_sse, &rate, &rate_y, &distortion, &skippable, &total_sse,
(int)this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col); (int)this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col);
if (tmp_rd == INT64_MAX) continue; if (tmp_rd == INT64_MAX) continue;
x->sum_y_eobs[TX_4X4] = 0;
for (i = 0; i < 4; i++) {
x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i];
}
} else { } else {
total_sse = tmp_best_sse; total_sse = tmp_best_sse;
rate = tmp_best_rate; rate = tmp_best_rate;

View file

@ -621,7 +621,7 @@ static void set_rt_speed_feature_framesize_independent(
// increase in encoding time. // increase in encoding time.
if (cpi->use_svc && svc->spatial_layer_id > 0) sf->nonrd_keyframe = 1; if (cpi->use_svc && svc->spatial_layer_id > 0) sf->nonrd_keyframe = 1;
if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
cpi->oxcf.rc_mode == VPX_CBR) { cpi->oxcf.rc_mode == VPX_CBR && !cpi->rc.disable_overshoot_maxq_cbr) {
if (cm->width * cm->height <= 352 * 288 && !cpi->use_svc && if (cm->width * cm->height <= 352 * 288 && !cpi->use_svc &&
cpi->oxcf.content != VP9E_CONTENT_SCREEN) cpi->oxcf.content != VP9E_CONTENT_SCREEN)
sf->overshoot_detection_cbr_rt = RE_ENCODE_MAXQ; sf->overshoot_detection_cbr_rt = RE_ENCODE_MAXQ;
@ -634,6 +634,7 @@ static void set_rt_speed_feature_framesize_independent(
sf->use_compound_nonrd_pickmode = 1; sf->use_compound_nonrd_pickmode = 1;
} }
if (cm->width * cm->height > 1280 * 720) sf->cb_pred_filter_search = 1; if (cm->width * cm->height > 1280 * 720) sf->cb_pred_filter_search = 1;
if (!cpi->external_resize) sf->use_source_sad = 1;
} }
if (speed >= 6) { if (speed >= 6) {
@ -646,8 +647,6 @@ static void set_rt_speed_feature_framesize_independent(
sf->mv.reduce_first_step_size = 1; sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0; sf->skip_encode_sb = 0;
if (!cpi->external_resize) sf->use_source_sad = 1;
if (sf->use_source_sad) { if (sf->use_source_sad) {
sf->adapt_partition_source_sad = 1; sf->adapt_partition_source_sad = 1;
sf->adapt_partition_thresh = sf->adapt_partition_thresh =
@ -669,7 +668,7 @@ static void set_rt_speed_feature_framesize_independent(
sf->base_mv_aggressive = 1; sf->base_mv_aggressive = 1;
} }
if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
cpi->oxcf.rc_mode == VPX_CBR) cpi->oxcf.rc_mode == VPX_CBR && !cpi->rc.disable_overshoot_maxq_cbr)
sf->overshoot_detection_cbr_rt = FAST_DETECTION_MAXQ; sf->overshoot_detection_cbr_rt = FAST_DETECTION_MAXQ;
} }
@ -728,6 +727,9 @@ static void set_rt_speed_feature_framesize_independent(
if (speed >= 8) { if (speed >= 8) {
sf->adaptive_rd_thresh = 4; sf->adaptive_rd_thresh = 4;
sf->skip_encode_sb = 1; sf->skip_encode_sb = 1;
if (cpi->svc.number_spatial_layers > 1 && !cpi->svc.simulcast_mode)
sf->nonrd_keyframe = 0;
else
sf->nonrd_keyframe = 1; sf->nonrd_keyframe = 1;
if (!cpi->use_svc) cpi->max_copied_frame = 4; if (!cpi->use_svc) cpi->max_copied_frame = 4;
if (cpi->row_mt && cpi->oxcf.max_threads > 1) if (cpi->row_mt && cpi->oxcf.max_threads > 1)
@ -787,6 +789,15 @@ static void set_rt_speed_feature_framesize_independent(
if (cm->width * cm->height >= 640 * 360) sf->variance_part_thresh_mult = 2; if (cm->width * cm->height >= 640 * 360) sf->variance_part_thresh_mult = 2;
} }
// Disable split to 8x8 for low-resolution at very high Q.
// For variance partition (speed >= 6). Ignore the first few frames
// as avg_frame_qindex starts at max_q (worst_quality).
if (cm->frame_type != KEY_FRAME && cm->width * cm->height <= 320 * 240 &&
sf->partition_search_type == VAR_BASED_PARTITION &&
cpi->rc.avg_frame_qindex[INTER_FRAME] > 208 &&
cpi->common.current_video_frame > 8)
sf->disable_16x16part_nonkey = 1;
if (sf->nonrd_use_ml_partition) if (sf->nonrd_use_ml_partition)
sf->partition_search_type = ML_BASED_PARTITION; sf->partition_search_type = ML_BASED_PARTITION;

View file

@ -56,6 +56,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->num_encoded_top_layer = 0; svc->num_encoded_top_layer = 0;
svc->simulcast_mode = 0; svc->simulcast_mode = 0;
svc->single_layer_svc = 0; svc->single_layer_svc = 0;
svc->resize_set = 0;
for (i = 0; i < REF_FRAMES; ++i) { for (i = 0; i < REF_FRAMES; ++i) {
svc->fb_idx_spatial_layer_id[i] = 0xff; svc->fb_idx_spatial_layer_id[i] = 0xff;
@ -356,6 +357,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
if (is_one_pass_cbr_svc(cpi) && lc->speed > 0) { if (is_one_pass_cbr_svc(cpi) && lc->speed > 0) {
cpi->oxcf.speed = lc->speed; cpi->oxcf.speed = lc->speed;
} }
cpi->loopfilter_ctrl = lc->loopfilter_ctrl;
// Reset the frames_since_key and frames_to_key counters to their values // Reset the frames_since_key and frames_to_key counters to their values
// before the layer restore. Keep these defined for the stream (not layer). // before the layer restore. Keep these defined for the stream (not layer).
if (cpi->svc.number_temporal_layers > 1 || if (cpi->svc.number_temporal_layers > 1 ||
@ -770,9 +772,7 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
if (svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF && if (svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF &&
svc->number_spatial_layers > 1 && svc->number_spatial_layers <= 3 && svc->number_spatial_layers > 1 && svc->number_spatial_layers <= 3 &&
svc->number_temporal_layers <= 3 && svc->number_temporal_layers <= 3)
!(svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
svc->use_set_ref_frame_config))
svc->simulcast_mode = 1; svc->simulcast_mode = 1;
else else
svc->simulcast_mode = 0; svc->simulcast_mode = 0;
@ -866,8 +866,9 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
} }
} }
// Reset the drop flags for all spatial layers, on the base layer. // Reset the drop flags for all spatial layers, on the
if (svc->spatial_layer_id == 0) { // first_spatial_layer_to_encode.
if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
vp9_zero(svc->drop_spatial_layer); vp9_zero(svc->drop_spatial_layer);
// TODO(jianj/marpan): Investigate why setting svc->lst/gld/alt_fb_idx // TODO(jianj/marpan): Investigate why setting svc->lst/gld/alt_fb_idx
// causes an issue with frame dropping and temporal layers, when the frame // causes an issue with frame dropping and temporal layers, when the frame
@ -1261,7 +1262,7 @@ static void vp9_svc_update_ref_frame_bypass_mode(VP9_COMP *const cpi) {
BufferPool *const pool = cm->buffer_pool; BufferPool *const pool = cm->buffer_pool;
int i; int i;
for (i = 0; i < REF_FRAMES; i++) { for (i = 0; i < REF_FRAMES; i++) {
if (cm->frame_type == KEY_FRAME || if ((cm->frame_type == KEY_FRAME && !svc->simulcast_mode) ||
svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) { svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) {
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx); ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx);
svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id; svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id;

View file

@ -71,6 +71,7 @@ typedef struct {
int actual_num_seg2_blocks; int actual_num_seg2_blocks;
int counter_encode_maxq_scene_change; int counter_encode_maxq_scene_change;
uint8_t speed; uint8_t speed;
int loopfilter_ctrl;
} LAYER_CONTEXT; } LAYER_CONTEXT;
typedef struct SVC { typedef struct SVC {
@ -198,6 +199,7 @@ typedef struct SVC {
// Flag to indicate SVC is dynamically switched to a single layer. // Flag to indicate SVC is dynamically switched to a single layer.
int single_layer_svc; int single_layer_svc;
int resize_set;
} SVC; } SVC;
struct VP9_COMP; struct VP9_COMP;

View file

@ -0,0 +1,174 @@
/*
* Copyright (c) 2020 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/ratectrl_rtc.h"
#include <new>
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_picklpf.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_codec.h"
namespace libvpx {
std::unique_ptr<VP9RateControlRTC> VP9RateControlRTC::Create(
const VP9RateControlRtcConfig &cfg) {
std::unique_ptr<VP9RateControlRTC> rc_api(new (std::nothrow)
VP9RateControlRTC());
if (!rc_api) return nullptr;
rc_api->cpi_ = static_cast<VP9_COMP *>(vpx_memalign(32, sizeof(*cpi_)));
if (rc_api->cpi_ == nullptr) {
return nullptr;
}
rc_api->InitRateControl(cfg);
return rc_api;
}
void VP9RateControlRTC::InitRateControl(const VP9RateControlRtcConfig &rc_cfg) {
VP9_COMMON *cm = &cpi_->common;
VP9EncoderConfig *oxcf = &cpi_->oxcf;
RATE_CONTROL *const rc = &cpi_->rc;
cm->profile = PROFILE_0;
cm->bit_depth = VPX_BITS_8;
cm->show_frame = 1;
oxcf->rc_mode = VPX_CBR;
oxcf->pass = 0;
oxcf->aq_mode = NO_AQ;
oxcf->content = VP9E_CONTENT_DEFAULT;
oxcf->drop_frames_water_mark = 0;
UpdateRateControl(rc_cfg);
cpi_->use_svc = (cpi_->svc.number_spatial_layers > 1 ||
cpi_->svc.number_temporal_layers > 1)
? 1
: 0;
rc->rc_1_frame = 0;
rc->rc_2_frame = 0;
vp9_rc_init_minq_luts();
vp9_rc_init(oxcf, 0, rc);
cpi_->sf.use_nonrd_pick_mode = 1;
cm->current_video_frame = 0;
}
void VP9RateControlRTC::UpdateRateControl(
const VP9RateControlRtcConfig &rc_cfg) {
VP9_COMMON *cm = &cpi_->common;
VP9EncoderConfig *oxcf = &cpi_->oxcf;
RATE_CONTROL *const rc = &cpi_->rc;
cm->width = rc_cfg.width;
cm->height = rc_cfg.height;
oxcf->width = rc_cfg.width;
oxcf->height = rc_cfg.height;
oxcf->worst_allowed_q = vp9_quantizer_to_qindex(rc_cfg.max_quantizer);
oxcf->best_allowed_q = vp9_quantizer_to_qindex(rc_cfg.min_quantizer);
rc->worst_quality = oxcf->worst_allowed_q;
rc->best_quality = oxcf->best_allowed_q;
oxcf->target_bandwidth = 1000 * rc_cfg.target_bandwidth;
oxcf->starting_buffer_level_ms = rc_cfg.buf_initial_sz;
oxcf->optimal_buffer_level_ms = rc_cfg.buf_optimal_sz;
oxcf->maximum_buffer_size_ms = rc_cfg.buf_sz;
oxcf->under_shoot_pct = rc_cfg.undershoot_pct;
oxcf->over_shoot_pct = rc_cfg.overshoot_pct;
oxcf->ss_number_layers = rc_cfg.ss_number_layers;
oxcf->ts_number_layers = rc_cfg.ts_number_layers;
oxcf->temporal_layering_mode = (VP9E_TEMPORAL_LAYERING_MODE)(
(rc_cfg.ts_number_layers > 1) ? rc_cfg.ts_number_layers : 0);
cpi_->oxcf.rc_max_intra_bitrate_pct = rc_cfg.max_intra_bitrate_pct;
cpi_->framerate = rc_cfg.framerate;
cpi_->svc.number_spatial_layers = rc_cfg.ss_number_layers;
cpi_->svc.number_temporal_layers = rc_cfg.ts_number_layers;
for (int sl = 0; sl < cpi_->svc.number_spatial_layers; ++sl) {
for (int tl = 0; tl < cpi_->svc.number_temporal_layers; ++tl) {
const int layer =
LAYER_IDS_TO_IDX(sl, tl, cpi_->svc.number_temporal_layers);
LAYER_CONTEXT *lc = &cpi_->svc.layer_context[layer];
RATE_CONTROL *const lrc = &lc->rc;
oxcf->layer_target_bitrate[layer] =
1000 * rc_cfg.layer_target_bitrate[layer];
lrc->worst_quality =
vp9_quantizer_to_qindex(rc_cfg.max_quantizers[layer]);
lrc->best_quality = vp9_quantizer_to_qindex(rc_cfg.min_quantizers[layer]);
lc->scaling_factor_num = rc_cfg.scaling_factor_num[sl];
lc->scaling_factor_den = rc_cfg.scaling_factor_den[sl];
oxcf->ts_rate_decimator[tl] = rc_cfg.ts_rate_decimator[tl];
}
}
vp9_set_rc_buffer_sizes(cpi_);
vp9_new_framerate(cpi_, cpi_->framerate);
if (cpi_->svc.number_temporal_layers > 1 ||
cpi_->svc.number_spatial_layers > 1) {
if (cm->current_video_frame == 0) vp9_init_layer_context(cpi_);
vp9_update_layer_context_change_config(cpi_,
(int)cpi_->oxcf.target_bandwidth);
}
vp9_check_reset_rc_flag(cpi_);
}
void VP9RateControlRTC::ComputeQP(const VP9FrameParamsQpRTC &frame_params) {
VP9_COMMON *const cm = &cpi_->common;
int width, height;
cpi_->svc.spatial_layer_id = frame_params.spatial_layer_id;
cpi_->svc.temporal_layer_id = frame_params.temporal_layer_id;
if (cpi_->svc.number_spatial_layers > 1) {
const int layer = LAYER_IDS_TO_IDX(cpi_->svc.spatial_layer_id,
cpi_->svc.temporal_layer_id,
cpi_->svc.number_temporal_layers);
LAYER_CONTEXT *lc = &cpi_->svc.layer_context[layer];
get_layer_resolution(cpi_->oxcf.width, cpi_->oxcf.height,
lc->scaling_factor_num, lc->scaling_factor_den, &width,
&height);
cm->width = width;
cm->height = height;
}
vp9_set_mb_mi(cm, cm->width, cm->height);
cm->frame_type = frame_params.frame_type;
cpi_->refresh_golden_frame = (cm->frame_type == KEY_FRAME) ? 1 : 0;
cpi_->sf.use_nonrd_pick_mode = 1;
if (cpi_->svc.number_spatial_layers == 1 &&
cpi_->svc.number_temporal_layers == 1) {
int target;
if (frame_is_intra_only(cm))
target = vp9_calc_iframe_target_size_one_pass_cbr(cpi_);
else
target = vp9_calc_pframe_target_size_one_pass_cbr(cpi_);
vp9_rc_set_frame_target(cpi_, target);
vp9_update_buffer_level_preencode(cpi_);
} else {
vp9_update_temporal_layer_framerate(cpi_);
vp9_restore_layer_context(cpi_);
vp9_rc_get_svc_params(cpi_);
}
int bottom_index, top_index;
cpi_->common.base_qindex =
vp9_rc_pick_q_and_bounds(cpi_, &bottom_index, &top_index);
}
int VP9RateControlRTC::GetQP() const { return cpi_->common.base_qindex; }
int VP9RateControlRTC::GetLoopfilterLevel() const {
struct loopfilter *const lf = &cpi_->common.lf;
vp9_pick_filter_level(nullptr, cpi_, LPF_PICK_FROM_Q);
return lf->filter_level;
}
void VP9RateControlRTC::PostEncodeUpdate(uint64_t encoded_frame_size) {
vp9_rc_postencode_update(cpi_, encoded_frame_size);
if (cpi_->svc.number_spatial_layers > 1 ||
cpi_->svc.number_temporal_layers > 1)
vp9_save_layer_context(cpi_);
cpi_->common.current_video_frame++;
}
} // namespace libvpx

View file

@ -0,0 +1,116 @@
/*
* Copyright (c) 2020 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VPX_VP9_RATECTRL_RTC_H_
#define VPX_VP9_RATECTRL_RTC_H_
#include <cstdint>
#include <memory>
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_enums.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/vp9_iface_common.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/vp9_cx_iface.h"
#include "vpx_mem/vpx_mem.h"
namespace libvpx {
struct VP9RateControlRtcConfig {
int width;
int height;
// 0-63
int max_quantizer;
int min_quantizer;
int64_t target_bandwidth;
int64_t buf_initial_sz;
int64_t buf_optimal_sz;
int64_t buf_sz;
int undershoot_pct;
int overshoot_pct;
int max_intra_bitrate_pct;
double framerate;
// Number of spatial layers
int ss_number_layers;
// Number of temporal layers
int ts_number_layers;
int max_quantizers[VPX_MAX_LAYERS];
int min_quantizers[VPX_MAX_LAYERS];
int scaling_factor_num[VPX_SS_MAX_LAYERS];
int scaling_factor_den[VPX_SS_MAX_LAYERS];
int layer_target_bitrate[VPX_MAX_LAYERS];
int ts_rate_decimator[VPX_TS_MAX_LAYERS];
};
struct VP9FrameParamsQpRTC {
FRAME_TYPE frame_type;
int spatial_layer_id;
int temporal_layer_id;
};
// This interface allows using VP9 real-time rate control without initializing
// the encoder. To use this interface, you need to link with libvp9rc.a.
//
// #include "vp9/ratectrl_rtc.h"
// VP9RateControlRTC rc_api;
// VP9RateControlRtcConfig cfg;
// VP9FrameParamsQpRTC frame_params;
//
// YourFunctionToInitializeConfig(cfg);
// rc_api.InitRateControl(cfg);
// // start encoding
// while (frame_to_encode) {
// if (config_changed)
// rc_api.UpdateRateControl(cfg);
// YourFunctionToFillFrameParams(frame_params);
// rc_api.ComputeQP(frame_params);
// YourFunctionToUseQP(rc_api.GetQP());
// YourFunctionToUseLoopfilter(rc_api.GetLoopfilterLevel());
// // After encoding
// rc_api.PostEncode(encoded_frame_size);
// }
class VP9RateControlRTC {
public:
static std::unique_ptr<VP9RateControlRTC> Create(
const VP9RateControlRtcConfig &cfg);
~VP9RateControlRTC() {
if (cpi_) {
for (int sl = 0; sl < cpi_->svc.number_spatial_layers; sl++) {
for (int tl = 0; tl < cpi_->svc.number_temporal_layers; tl++) {
int layer = LAYER_IDS_TO_IDX(sl, tl, cpi_->oxcf.ts_number_layers);
LAYER_CONTEXT *const lc = &cpi_->svc.layer_context[layer];
vpx_free(lc->map);
vpx_free(lc->last_coded_q_map);
vpx_free(lc->consec_zero_mv);
}
}
vpx_free(cpi_);
}
}
void UpdateRateControl(const VP9RateControlRtcConfig &rc_cfg);
// GetQP() needs to be called after ComputeQP() to get the latest QP
int GetQP() const;
int GetLoopfilterLevel() const;
void ComputeQP(const VP9FrameParamsQpRTC &frame_params);
// Feedback to rate control with the size of current encoded frame
void PostEncodeUpdate(uint64_t encoded_frame_size);
private:
VP9RateControlRTC() {}
void InitRateControl(const VP9RateControlRtcConfig &cfg);
VP9_COMP *cpi_;
};
} // namespace libvpx
#endif // VPX_VP9_RATECTRL_RTC_H_

View file

@ -90,12 +90,20 @@ static int img_read(vpx_image_t *img, FILE *file) {
return 1; return 1;
} }
// Assume every config in VP9EncoderConfig is less than 100 characters.
#define ENCODE_CONFIG_BUF_SIZE 100
struct EncodeConfig {
char name[ENCODE_CONFIG_BUF_SIZE];
char value[ENCODE_CONFIG_BUF_SIZE];
};
class SimpleEncode::EncodeImpl { class SimpleEncode::EncodeImpl {
public: public:
VP9_COMP *cpi; VP9_COMP *cpi;
vpx_img_fmt_t img_fmt; vpx_img_fmt_t img_fmt;
vpx_image_t tmp_img; vpx_image_t tmp_img;
std::vector<FIRSTPASS_STATS> first_pass_stats; std::vector<FIRSTPASS_STATS> first_pass_stats;
std::vector<EncodeConfig> encode_config_list;
}; };
static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf, static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf,
@ -167,7 +175,8 @@ static RefFrameType mv_ref_frame_to_ref_frame_type(
static void update_motion_vector_info( static void update_motion_vector_info(
const MOTION_VECTOR_INFO *input_motion_vector_info, const int num_rows_4x4, const MOTION_VECTOR_INFO *input_motion_vector_info, const int num_rows_4x4,
const int num_cols_4x4, MotionVectorInfo *output_motion_vector_info) { const int num_cols_4x4, MotionVectorInfo *output_motion_vector_info,
int motion_vector_scale) {
const int num_units_4x4 = num_rows_4x4 * num_cols_4x4; const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
for (int i = 0; i < num_units_4x4; ++i) { for (int i = 0; i < num_units_4x4; ++i) {
const MV_REFERENCE_FRAME *in_ref_frame = const MV_REFERENCE_FRAME *in_ref_frame =
@ -185,16 +194,34 @@ static void update_motion_vector_info(
mv_ref_frame_to_ref_frame_type(in_ref_frame[1]); mv_ref_frame_to_ref_frame_type(in_ref_frame[1]);
output_motion_vector_info[i].mv_row[0] = output_motion_vector_info[i].mv_row[0] =
(double)input_motion_vector_info[i].mv[0].as_mv.row / (double)input_motion_vector_info[i].mv[0].as_mv.row /
kMotionVectorPrecision; motion_vector_scale;
output_motion_vector_info[i].mv_column[0] = output_motion_vector_info[i].mv_column[0] =
(double)input_motion_vector_info[i].mv[0].as_mv.col / (double)input_motion_vector_info[i].mv[0].as_mv.col /
kMotionVectorPrecision; motion_vector_scale;
output_motion_vector_info[i].mv_row[1] = output_motion_vector_info[i].mv_row[1] =
(double)input_motion_vector_info[i].mv[1].as_mv.row / (double)input_motion_vector_info[i].mv[1].as_mv.row /
kMotionVectorPrecision; motion_vector_scale;
output_motion_vector_info[i].mv_column[1] = output_motion_vector_info[i].mv_column[1] =
(double)input_motion_vector_info[i].mv[1].as_mv.col / (double)input_motion_vector_info[i].mv[1].as_mv.col /
kMotionVectorPrecision; motion_vector_scale;
}
}
static void update_tpl_stats_info(const TplDepStats *input_tpl_stats_info,
const int show_frame_count,
TplStatsInfo *output_tpl_stats_info) {
int frame_idx;
for (frame_idx = 0; frame_idx < show_frame_count; ++frame_idx) {
output_tpl_stats_info[frame_idx].intra_cost =
input_tpl_stats_info[frame_idx].intra_cost;
output_tpl_stats_info[frame_idx].inter_cost =
input_tpl_stats_info[frame_idx].inter_cost;
output_tpl_stats_info[frame_idx].mc_flow =
input_tpl_stats_info[frame_idx].mc_flow;
output_tpl_stats_info[frame_idx].mc_dep_cost =
input_tpl_stats_info[frame_idx].mc_dep_cost;
output_tpl_stats_info[frame_idx].mc_ref_cost =
input_tpl_stats_info[frame_idx].mc_ref_cost;
} }
} }
@ -471,12 +498,13 @@ static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
encode_frame_result->coding_data.reset( encode_frame_result->coding_data.reset(
new (std::nothrow) uint8_t[max_coding_data_byte_size]); new (std::nothrow) uint8_t[max_coding_data_byte_size]);
encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_width); encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);
encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_height); encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);
encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 * encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 *
encode_frame_result->num_cols_4x4); encode_frame_result->num_cols_4x4);
encode_frame_result->motion_vector_info.resize( encode_frame_result->motion_vector_info.resize(
encode_frame_result->num_rows_4x4 * encode_frame_result->num_cols_4x4); encode_frame_result->num_rows_4x4 * encode_frame_result->num_cols_4x4);
encode_frame_result->tpl_stats_info.resize(MAX_LAG_BUFFERS);
if (encode_frame_result->coding_data.get() == nullptr) { if (encode_frame_result->coding_data.get() == nullptr) {
return false; return false;
@ -485,8 +513,20 @@ static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
frame_height, img_fmt); frame_height, img_fmt);
} }
static void encode_frame_result_update_rq_history(
const RATE_QINDEX_HISTORY *rq_history,
EncodeFrameResult *encode_frame_result) {
encode_frame_result->recode_count = rq_history->recode_count;
for (int i = 0; i < encode_frame_result->recode_count; ++i) {
const int q_index = rq_history->q_index_history[i];
const int rate = rq_history->rate_history[i];
encode_frame_result->q_index_history.push_back(q_index);
encode_frame_result->rate_history.push_back(rate);
}
}
static void update_encode_frame_result( static void update_encode_frame_result(
EncodeFrameResult *encode_frame_result, EncodeFrameResult *encode_frame_result, const int show_frame_count,
const ENCODE_FRAME_RESULT *encode_frame_info) { const ENCODE_FRAME_RESULT *encode_frame_info) {
encode_frame_result->coding_data_bit_size = encode_frame_result->coding_data_bit_size =
encode_frame_result->coding_data_byte_size * 8; encode_frame_result->coding_data_byte_size * 8;
@ -511,9 +551,16 @@ static void update_encode_frame_result(
update_motion_vector_info(encode_frame_info->motion_vector_info, update_motion_vector_info(encode_frame_info->motion_vector_info,
encode_frame_result->num_rows_4x4, encode_frame_result->num_rows_4x4,
encode_frame_result->num_cols_4x4, encode_frame_result->num_cols_4x4,
&encode_frame_result->motion_vector_info[0]); &encode_frame_result->motion_vector_info[0],
kMotionVectorSubPixelPrecision);
update_frame_counts(&encode_frame_info->frame_counts, update_frame_counts(&encode_frame_info->frame_counts,
&encode_frame_result->frame_counts); &encode_frame_result->frame_counts);
if (encode_frame_result->frame_type == kFrameTypeAltRef) {
update_tpl_stats_info(encode_frame_info->tpl_stats_info, show_frame_count,
&encode_frame_result->tpl_stats_info[0]);
}
encode_frame_result_update_rq_history(&encode_frame_info->rq_history,
encode_frame_result);
} }
static void IncreaseGroupOfPictureIndex(GroupOfPicture *group_of_picture) { static void IncreaseGroupOfPictureIndex(GroupOfPicture *group_of_picture) {
@ -612,6 +659,9 @@ static void SetGroupOfPicture(int first_is_key_frame, int use_alt_ref,
group_of_picture->show_frame_count = coding_frame_count - use_alt_ref; group_of_picture->show_frame_count = coding_frame_count - use_alt_ref;
group_of_picture->start_show_index = first_show_idx; group_of_picture->start_show_index = first_show_idx;
group_of_picture->start_coding_index = start_coding_index; group_of_picture->start_coding_index = start_coding_index;
group_of_picture->first_is_key_frame = first_is_key_frame;
group_of_picture->use_alt_ref = use_alt_ref;
group_of_picture->last_gop_use_alt_ref = last_gop_use_alt_ref;
// We need to make a copy of start reference frame info because we // We need to make a copy of start reference frame info because we
// use it to simulate the ref frame update. // use it to simulate the ref frame update.
@ -692,6 +742,50 @@ static void UpdateGroupOfPicture(const VP9_COMP *cpi, int start_coding_index,
start_ref_frame_info, group_of_picture); start_ref_frame_info, group_of_picture);
} }
#define SET_STRUCT_VALUE(config, structure, ret, field) \
if (strcmp(config.name, #field) == 0) { \
structure->field = atoi(config.value); \
ret = 1; \
}
static void UpdateEncodeConfig(const EncodeConfig &config,
VP9EncoderConfig *oxcf) {
int ret = 0;
SET_STRUCT_VALUE(config, oxcf, ret, key_freq);
SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmin_section);
SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmax_section);
SET_STRUCT_VALUE(config, oxcf, ret, under_shoot_pct);
SET_STRUCT_VALUE(config, oxcf, ret, over_shoot_pct);
SET_STRUCT_VALUE(config, oxcf, ret, max_threads);
SET_STRUCT_VALUE(config, oxcf, ret, frame_parallel_decoding_mode);
SET_STRUCT_VALUE(config, oxcf, ret, tile_columns);
SET_STRUCT_VALUE(config, oxcf, ret, arnr_max_frames);
SET_STRUCT_VALUE(config, oxcf, ret, arnr_strength);
SET_STRUCT_VALUE(config, oxcf, ret, lag_in_frames);
SET_STRUCT_VALUE(config, oxcf, ret, encode_breakout);
SET_STRUCT_VALUE(config, oxcf, ret, enable_tpl_model);
SET_STRUCT_VALUE(config, oxcf, ret, enable_auto_arf);
if (ret == 0) {
fprintf(stderr, "Ignored unsupported encode_config %s\n", config.name);
}
}
static VP9EncoderConfig GetEncodeConfig(
int frame_width, int frame_height, vpx_rational_t frame_rate,
int target_bitrate, int encode_speed, vpx_enc_pass enc_pass,
const std::vector<EncodeConfig> &encode_config_list) {
VP9EncoderConfig oxcf =
vp9_get_encoder_config(frame_width, frame_height, frame_rate,
target_bitrate, encode_speed, enc_pass);
for (const auto &config : encode_config_list) {
UpdateEncodeConfig(config, &oxcf);
}
if (enc_pass == VPX_RC_FIRST_PASS) {
oxcf.lag_in_frames = 0;
}
return oxcf;
}
SimpleEncode::SimpleEncode(int frame_width, int frame_height, SimpleEncode::SimpleEncode(int frame_width, int frame_height,
int frame_rate_num, int frame_rate_den, int frame_rate_num, int frame_rate_den,
int target_bitrate, int num_frames, int target_bitrate, int num_frames,
@ -703,6 +797,7 @@ SimpleEncode::SimpleEncode(int frame_width, int frame_height,
frame_rate_den_ = frame_rate_den; frame_rate_den_ = frame_rate_den;
target_bitrate_ = target_bitrate; target_bitrate_ = target_bitrate;
num_frames_ = num_frames; num_frames_ = num_frames;
encode_speed_ = 0;
frame_coding_index_ = 0; frame_coding_index_ = 0;
show_frame_count_ = 0; show_frame_count_ = 0;
@ -724,16 +819,55 @@ SimpleEncode::SimpleEncode(int frame_width, int frame_height,
InitRefFrameInfo(&ref_frame_info_); InitRefFrameInfo(&ref_frame_info_);
} }
void SimpleEncode::SetEncodeSpeed(int encode_speed) {
encode_speed_ = encode_speed;
}
StatusCode SimpleEncode::SetEncodeConfig(const char *name, const char *value) {
if (name == nullptr || value == nullptr) {
fprintf(stderr, "SetEncodeConfig: null pointer, name %p value %p\n", name,
value);
return StatusError;
}
EncodeConfig config;
snprintf(config.name, ENCODE_CONFIG_BUF_SIZE, "%s", name);
snprintf(config.value, ENCODE_CONFIG_BUF_SIZE, "%s", value);
impl_ptr_->encode_config_list.push_back(config);
return StatusOk;
}
StatusCode SimpleEncode::DumpEncodeConfigs(int pass, FILE *fp) {
if (fp == nullptr) {
fprintf(stderr, "DumpEncodeConfigs: null pointer, fp %p\n", fp);
return StatusError;
}
vpx_enc_pass enc_pass;
if (pass == 1) {
enc_pass = VPX_RC_FIRST_PASS;
} else {
enc_pass = VPX_RC_LAST_PASS;
}
const vpx_rational_t frame_rate =
make_vpx_rational(frame_rate_num_, frame_rate_den_);
const VP9EncoderConfig oxcf =
GetEncodeConfig(frame_width_, frame_height_, frame_rate, target_bitrate_,
encode_speed_, enc_pass, impl_ptr_->encode_config_list);
vp9_dump_encoder_config(&oxcf, fp);
return StatusOk;
}
void SimpleEncode::ComputeFirstPassStats() { void SimpleEncode::ComputeFirstPassStats() {
vpx_rational_t frame_rate = vpx_rational_t frame_rate =
make_vpx_rational(frame_rate_num_, frame_rate_den_); make_vpx_rational(frame_rate_num_, frame_rate_den_);
const VP9EncoderConfig oxcf = const VP9EncoderConfig oxcf = GetEncodeConfig(
vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
target_bitrate_, VPX_RC_FIRST_PASS); VPX_RC_FIRST_PASS, impl_ptr_->encode_config_list);
VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt); VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
struct lookahead_ctx *lookahead = cpi->lookahead; struct lookahead_ctx *lookahead = cpi->lookahead;
int i; int i;
int use_highbitdepth = 0; int use_highbitdepth = 0;
const int num_rows_16x16 = get_num_unit_16x16(frame_height_);
const int num_cols_16x16 = get_num_unit_16x16(frame_width_);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth = cpi->common.use_highbitdepth; use_highbitdepth = cpi->common.use_highbitdepth;
#endif #endif
@ -766,6 +900,12 @@ void SimpleEncode::ComputeFirstPassStats() {
// vp9_get_compressed_data only generates first pass stats not // vp9_get_compressed_data only generates first pass stats not
// compresses data // compresses data
assert(size == 0); assert(size == 0);
// Get vp9 first pass motion vector info.
std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);
update_motion_vector_info(cpi->fp_motion_vector_info, num_rows_16x16,
num_cols_16x16, mv_info.data(),
kMotionVectorFullPixelPrecision);
fp_motion_vector_info_.push_back(mv_info);
} }
impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass)); impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));
} }
@ -776,6 +916,9 @@ void SimpleEncode::ComputeFirstPassStats() {
free_encoder(cpi); free_encoder(cpi);
rewind(in_file_); rewind(in_file_);
vpx_img_free(&img); vpx_img_free(&img);
// Generate key_frame_map based on impl_ptr_->first_pass_stats.
key_frame_map_ = ComputeKeyFrameMap();
} }
std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() { std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
@ -800,9 +943,44 @@ std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
return output_stats; return output_stats;
} }
void SimpleEncode::SetExternalGroupOfPicture( std::vector<std::vector<MotionVectorInfo>>
std::vector<int> external_arf_indexes) { SimpleEncode::ObserveFirstPassMotionVectors() {
external_arf_indexes_ = external_arf_indexes; return fp_motion_vector_info_;
}
void SimpleEncode::SetExternalGroupOfPicturesMap(int *gop_map,
int gop_map_size) {
for (int i = 0; i < gop_map_size; ++i) {
gop_map_.push_back(gop_map[i]);
}
// The following will check and modify gop_map_ to make sure the
// gop_map_ satisfies the constraints.
// 1) Each key frame position should be at the start of a gop.
// 2) The last gop should not use an alt ref.
assert(gop_map_.size() == key_frame_map_.size());
int last_gop_start = 0;
for (int i = 0; static_cast<size_t>(i) < gop_map_.size(); ++i) {
if (key_frame_map_[i] == 1 && gop_map_[i] == 0) {
fprintf(stderr, "Add an extra gop start at show_idx %d\n", i);
// Insert a gop start at key frame location.
gop_map_[i] |= kGopMapFlagStart;
gop_map_[i] |= kGopMapFlagUseAltRef;
}
if (gop_map_[i] & kGopMapFlagStart) {
last_gop_start = i;
}
}
if (gop_map_[last_gop_start] & kGopMapFlagUseAltRef) {
fprintf(stderr,
"Last group of pictures starting at show_idx %d shouldn't use alt "
"ref\n",
last_gop_start);
gop_map_[last_gop_start] &= ~kGopMapFlagUseAltRef;
}
}
std::vector<int> SimpleEncode::ObserveExternalGroupOfPicturesMap() {
return gop_map_;
} }
template <typename T> template <typename T>
@ -813,13 +991,40 @@ T *GetVectorData(const std::vector<T> &v) {
return const_cast<T *>(v.data()); return const_cast<T *>(v.data());
} }
static GOP_COMMAND GetGopCommand(const std::vector<int> &gop_map,
int start_show_index) {
GOP_COMMAND gop_command;
if (gop_map.size() > 0) {
assert(static_cast<size_t>(start_show_index) < gop_map.size());
assert((gop_map[start_show_index] & kGopMapFlagStart) != 0);
int end_show_index = start_show_index + 1;
// gop_map[end_show_index] & kGopMapFlagStart == 0 means this is
// the start of a gop.
while (static_cast<size_t>(end_show_index) < gop_map.size() &&
(gop_map[end_show_index] & kGopMapFlagStart) == 0) {
++end_show_index;
}
const int show_frame_count = end_show_index - start_show_index;
int use_alt_ref = (gop_map[start_show_index] & kGopMapFlagUseAltRef) != 0;
if (static_cast<size_t>(end_show_index) == gop_map.size()) {
// This is the last gop group, there must be no altref.
use_alt_ref = 0;
}
gop_command_on(&gop_command, show_frame_count, use_alt_ref);
} else {
gop_command_off(&gop_command);
}
return gop_command;
}
void SimpleEncode::StartEncode() { void SimpleEncode::StartEncode() {
assert(impl_ptr_->first_pass_stats.size() > 0); assert(impl_ptr_->first_pass_stats.size() > 0);
vpx_rational_t frame_rate = vpx_rational_t frame_rate =
make_vpx_rational(frame_rate_num_, frame_rate_den_); make_vpx_rational(frame_rate_num_, frame_rate_den_);
VP9EncoderConfig oxcf = VP9EncoderConfig oxcf = GetEncodeConfig(
vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
target_bitrate_, VPX_RC_LAST_PASS); VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
vpx_fixed_buf_t stats; vpx_fixed_buf_t stats;
stats.buf = GetVectorData(impl_ptr_->first_pass_stats); stats.buf = GetVectorData(impl_ptr_->first_pass_stats);
stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) * stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
@ -834,11 +1039,10 @@ void SimpleEncode::StartEncode() {
frame_coding_index_ = 0; frame_coding_index_ = 0;
show_frame_count_ = 0; show_frame_count_ = 0;
encode_command_set_external_arf_indexes(&impl_ptr_->cpi->encode_command,
GetVectorData(external_arf_indexes_));
UpdateKeyFrameGroup(show_frame_count_); UpdateKeyFrameGroup(show_frame_count_);
const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
encode_command_set_gop_command(&impl_ptr_->cpi->encode_command, gop_command);
UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_, UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
&group_of_picture_); &group_of_picture_);
rewind(in_file_); rewind(in_file_);
@ -914,6 +1118,9 @@ void SimpleEncode::PostUpdateState(
IncreaseGroupOfPictureIndex(&group_of_picture_); IncreaseGroupOfPictureIndex(&group_of_picture_);
if (IsGroupOfPictureFinished(group_of_picture_)) { if (IsGroupOfPictureFinished(group_of_picture_)) {
const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
encode_command_set_gop_command(&impl_ptr_->cpi->encode_command,
gop_command);
// This function needs to be called after ref_frame_info_ is updated // This function needs to be called after ref_frame_info_ is updated
// properly in PostUpdateRefFrameInfo() and UpdateKeyFrameGroup(). // properly in PostUpdateRefFrameInfo() and UpdateKeyFrameGroup().
UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_, UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
@ -985,7 +1192,10 @@ void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
abort(); abort();
} }
update_encode_frame_result(encode_frame_result, &encode_frame_info); const GroupOfPicture group_of_picture = this->ObserveGroupOfPicture();
const int show_frame_count = group_of_picture.show_frame_count;
update_encode_frame_result(encode_frame_result, show_frame_count,
&encode_frame_info);
PostUpdateState(*encode_frame_result); PostUpdateState(*encode_frame_result);
} else { } else {
// TODO(angiebird): Clean up encode_frame_result. // TODO(angiebird): Clean up encode_frame_result.
@ -1002,26 +1212,73 @@ void SimpleEncode::EncodeFrameWithQuantizeIndex(
encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command); encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command);
} }
void SimpleEncode::EncodeFrameWithTargetFrameBits(
EncodeFrameResult *encode_frame_result, int target_frame_bits,
double percent_diff) {
encode_command_set_target_frame_bits(&impl_ptr_->cpi->encode_command,
target_frame_bits, percent_diff);
EncodeFrame(encode_frame_result);
encode_command_reset_target_frame_bits(&impl_ptr_->cpi->encode_command);
}
static int GetCodingFrameNumFromGopMap(const std::vector<int> &gop_map) {
int start_show_index = 0;
int coding_frame_count = 0;
while (static_cast<size_t>(start_show_index) < gop_map.size()) {
const GOP_COMMAND gop_command = GetGopCommand(gop_map, start_show_index);
start_show_index += gop_command.show_frame_count;
coding_frame_count += gop_command_coding_frame_count(&gop_command);
}
assert(start_show_index == gop_map.size());
return coding_frame_count;
}
int SimpleEncode::GetCodingFrameNum() const { int SimpleEncode::GetCodingFrameNum() const {
assert(impl_ptr_->first_pass_stats.size() - 1 > 0); assert(impl_ptr_->first_pass_stats.size() > 0);
if (gop_map_.size() > 0) {
return GetCodingFrameNumFromGopMap(gop_map_);
}
// These are the default settings for now. // These are the default settings for now.
const int multi_layer_arf = 0; const int multi_layer_arf = 0;
const int allow_alt_ref = 1; const int allow_alt_ref = 1;
vpx_rational_t frame_rate = vpx_rational_t frame_rate =
make_vpx_rational(frame_rate_num_, frame_rate_den_); make_vpx_rational(frame_rate_num_, frame_rate_den_);
const VP9EncoderConfig oxcf = const VP9EncoderConfig oxcf = GetEncodeConfig(
vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
target_bitrate_, VPX_RC_LAST_PASS); VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
FRAME_INFO frame_info = vp9_get_frame_info(&oxcf); FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
FIRST_PASS_INFO first_pass_info; FIRST_PASS_INFO first_pass_info;
fps_init_first_pass_info(&first_pass_info, fps_init_first_pass_info(&first_pass_info,
GetVectorData(impl_ptr_->first_pass_stats), GetVectorData(impl_ptr_->first_pass_stats),
num_frames_); num_frames_);
return vp9_get_coding_frame_num(external_arf_indexes_.data(), &oxcf, return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info,
&frame_info, &first_pass_info,
multi_layer_arf, allow_alt_ref); multi_layer_arf, allow_alt_ref);
} }
std::vector<int> SimpleEncode::ComputeKeyFrameMap() const {
// The last entry of first_pass_stats is the overall stats.
assert(impl_ptr_->first_pass_stats.size() == num_frames_ + 1);
vpx_rational_t frame_rate =
make_vpx_rational(frame_rate_num_, frame_rate_den_);
const VP9EncoderConfig oxcf = GetEncodeConfig(
frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
FIRST_PASS_INFO first_pass_info;
fps_init_first_pass_info(&first_pass_info,
GetVectorData(impl_ptr_->first_pass_stats),
num_frames_);
std::vector<int> key_frame_map(num_frames_, 0);
vp9_get_key_frame_map(&oxcf, &frame_info, &first_pass_info,
GetVectorData(key_frame_map));
return key_frame_map;
}
std::vector<int> SimpleEncode::ObserveKeyFrameMap() const {
return key_frame_map_;
}
uint64_t SimpleEncode::GetFramePixelCount() const { uint64_t SimpleEncode::GetFramePixelCount() const {
assert(frame_width_ % 2 == 0); assert(frame_width_ % 2 == 0);
assert(frame_height_ % 2 == 0); assert(frame_height_ % 2 == 0);

View file

@ -19,13 +19,18 @@
namespace vp9 { namespace vp9 {
enum StatusCode {
StatusOk = 0,
StatusError,
};
// TODO(angiebird): Add description for each frame type. // TODO(angiebird): Add description for each frame type.
enum FrameType { enum FrameType {
kFrameTypeKey = 0, kFrameTypeKey = 0,
kFrameTypeInter, kFrameTypeInter = 1,
kFrameTypeAltRef, kFrameTypeAltRef = 2,
kFrameTypeOverlay, kFrameTypeOverlay = 3,
kFrameTypeGolden, kFrameTypeGolden = 4,
}; };
// TODO(angiebird): Add description for each reference frame type. // TODO(angiebird): Add description for each reference frame type.
@ -39,6 +44,14 @@ enum RefFrameType {
kRefFrameTypeNone = -1, kRefFrameTypeNone = -1,
}; };
enum GopMapFlag {
kGopMapFlagStart =
1 << 0, // Indicate this location is the start of a group of pictures.
kGopMapFlagUseAltRef =
1 << 1, // Indicate this group of pictures will use an alt ref. Only set
// this flag when kGopMapFlagStart is set.
};
// The frame is split to 4x4 blocks. // The frame is split to 4x4 blocks.
// This structure contains the information of each 4x4 block. // This structure contains the information of each 4x4 block.
struct PartitionInfo { struct PartitionInfo {
@ -50,9 +63,12 @@ struct PartitionInfo {
int height; // prediction block height int height; // prediction block height
}; };
constexpr int kMotionVectorPrecision = 8; constexpr int kMotionVectorSubPixelPrecision = 8;
constexpr int kMotionVectorFullPixelPrecision = 1;
// The frame is split to 4x4 blocks. // In the first pass. The frame is split to 16x16 blocks.
// This structure contains the information of each 16x16 block.
// In the second pass. The frame is split to 4x4 blocks.
// This structure contains the information of each 4x4 block. // This structure contains the information of each 4x4 block.
struct MotionVectorInfo { struct MotionVectorInfo {
// Number of valid motion vectors, always 0 if this block is in the key frame. // Number of valid motion vectors, always 0 if this block is in the key frame.
@ -60,8 +76,8 @@ struct MotionVectorInfo {
int mv_count; int mv_count;
// The reference frame for motion vectors. If the second motion vector does // The reference frame for motion vectors. If the second motion vector does
// not exist (mv_count = 1), the reference frame is kNoneRefFrame. // not exist (mv_count = 1), the reference frame is kNoneRefFrame.
// Otherwise, the reference frame is either kLastFrame, or kGoldenFrame, // Otherwise, the reference frame is either kRefFrameTypeLast, or
// or kAltRefFrame. // kRefFrameTypePast, or kRefFrameTypeFuture.
RefFrameType ref_frame[2]; RefFrameType ref_frame[2];
// The row offset of motion vectors in the unit of pixel. // The row offset of motion vectors in the unit of pixel.
// If the second motion vector does not exist, the value is 0. // If the second motion vector does not exist, the value is 0.
@ -71,6 +87,24 @@ struct MotionVectorInfo {
double mv_column[2]; double mv_column[2];
}; };
// Accumulated tpl stats of all blocks in one frame.
// For each frame, the tpl stats are computed per 32x32 block.
struct TplStatsInfo {
// Intra complexity: the sum of absolute transform difference (SATD) of
// intra predicted residuals.
int64_t intra_cost;
// Inter complexity: the SATD of inter predicted residuals.
int64_t inter_cost;
// Motion compensated information flow. It measures how much information
// is propagated from the current frame to other frames.
int64_t mc_flow;
// Motion compensated dependency cost. It equals to its own intra_cost
// plus the mc_flow.
int64_t mc_dep_cost;
// Motion compensated reference cost.
int64_t mc_ref_cost;
};
struct RefFrameInfo { struct RefFrameInfo {
int coding_indexes[kRefFrameTypeMax]; int coding_indexes[kRefFrameTypeMax];
@ -237,7 +271,7 @@ struct EncodeFrameResult {
std::vector<PartitionInfo> partition_info; std::vector<PartitionInfo> partition_info;
// A vector of the motion vector information of the frame. // A vector of the motion vector information of the frame.
// The number of elements is |num_rows_4x4| * |num_cols_4x4|. // The number of elements is |num_rows_4x4| * |num_cols_4x4|.
// The frame is divided 4x4 blocks of |num_rows_4x4| rows and // The frame is divided into 4x4 blocks of |num_rows_4x4| rows and
// |num_cols_4x4| columns. // |num_cols_4x4| columns.
// Each 4x4 block contains 0 motion vector if this is an intra predicted // Each 4x4 block contains 0 motion vector if this is an intra predicted
// frame (for example, the key frame). If the frame is inter predicted, // frame (for example, the key frame). If the frame is inter predicted,
@ -245,7 +279,25 @@ struct EncodeFrameResult {
// Similar to partition info, all 4x4 blocks inside the same partition block // Similar to partition info, all 4x4 blocks inside the same partition block
// share the same motion vector information. // share the same motion vector information.
std::vector<MotionVectorInfo> motion_vector_info; std::vector<MotionVectorInfo> motion_vector_info;
// A vector of the tpl stats information.
// The tpl stats measure the complexity of a frame, as well as the
// informatioin propagated along the motion trajactory between frames, in
// the reference frame structure.
// The tpl stats could be used as a more accurate spatial and temporal
// complexity measure in addition to the first pass stats.
// The vector contains tpl stats for all show frames in a GOP.
// The tpl stats stored in the vector is according to the encoding order.
// For example, suppose there are N show frames for the current GOP.
// Then tpl_stats_info[0] stores the information of the first frame to be
// encoded for this GOP, i.e, the AltRef frame.
std::vector<TplStatsInfo> tpl_stats_info;
ImageBuffer coded_frame; ImageBuffer coded_frame;
// recode_count, q_index_history and rate_history are only available when
// EncodeFrameWithTargetFrameBits() is used.
int recode_count;
std::vector<int> q_index_history;
std::vector<int> rate_history;
}; };
struct GroupOfPicture { struct GroupOfPicture {
@ -255,6 +307,7 @@ struct GroupOfPicture {
// triggered when the coded frame is the last one in the previous group of // triggered when the coded frame is the last one in the previous group of
// pictures. // pictures.
std::vector<EncodeFrameInfo> encode_frame_list; std::vector<EncodeFrameInfo> encode_frame_list;
// Indicates the index of the next coding frame in encode_frame_list. // Indicates the index of the next coding frame in encode_frame_list.
// In other words, EncodeFrameInfo of the next coding frame can be // In other words, EncodeFrameInfo of the next coding frame can be
// obtained with encode_frame_list[next_encode_frame_index]. // obtained with encode_frame_list[next_encode_frame_index].
@ -263,13 +316,25 @@ struct GroupOfPicture {
// will be increased after each EncodeFrame()/EncodeFrameWithQuantizeIndex() // will be increased after each EncodeFrame()/EncodeFrameWithQuantizeIndex()
// call. // call.
int next_encode_frame_index; int next_encode_frame_index;
// Number of show frames in this group of pictures. // Number of show frames in this group of pictures.
int show_frame_count; int show_frame_count;
// The show index/timestamp of the earliest show frame in the group of // The show index/timestamp of the earliest show frame in the group of
// pictures. // pictures.
int start_show_index; int start_show_index;
// The coding index of the first coding frame in the group of picture.
// The coding index of the first coding frame in the group of pictures.
int start_coding_index; int start_coding_index;
// Indicates whether this group of pictures starts with a key frame.
int first_is_key_frame;
// Indicates whether this group of pictures uses an alt ref.
int use_alt_ref;
// Indicates whether previous group of pictures used an alt ref.
int last_gop_use_alt_ref;
}; };
class SimpleEncode { class SimpleEncode {
@ -283,8 +348,44 @@ class SimpleEncode {
SimpleEncode(SimpleEncode &) = delete; SimpleEncode(SimpleEncode &) = delete;
SimpleEncode &operator=(const SimpleEncode &) = delete; SimpleEncode &operator=(const SimpleEncode &) = delete;
// Makes encoder compute the first pass stats and store it internally for // Adjusts the encoder's coding speed.
// future encode. // If this function is not called, the encoder will use default encode_speed
// 0. Call this function before ComputeFirstPassStats() if needed.
// The encode_speed is equivalent to --cpu-used of the vpxenc command.
// The encode_speed's range should be [0, 9].
// Setting the encode_speed to a higher level will yield faster coding
// at the cost of lower compression efficiency.
void SetEncodeSpeed(int encode_speed);
// Set encoder config
// The following configs in VP9EncoderConfig are allowed to change in this
// function. See https://ffmpeg.org/ffmpeg-codecs.html#libvpx for each
// config's meaning.
// Configs in VP9EncoderConfig: Equivalent configs in ffmpeg:
// 1 key_freq -g
// 2 two_pass_vbrmin_section -minrate * 100LL / bit_rate
// 3 two_pass_vbrmax_section -maxrate * 100LL / bit_rate
// 4 under_shoot_pct -undershoot-pct
// 5 over_shoot_pct -overshoot-pct
// 6 max_threads -threads
// 7 frame_parallel_decoding_mode -frame-parallel
// 8 tile_column -tile-columns
// 9 arnr_max_frames -arnr-maxframes
// 10 arnr_strength -arnr-strength
// 11 lag_in_frames -rc_lookahead
// 12 encode_breakout -static-thresh
// 13 enable_tpl_model -enable-tpl
// 14 enable_auto_arf -auto-alt-ref
StatusCode SetEncodeConfig(const char *name, const char *value);
// A debug function that dumps configs from VP9EncoderConfig
// pass = 1: first pass, pass = 2: second pass
// fp: file pointer for dumping config
StatusCode DumpEncodeConfigs(int pass, FILE *fp);
// Makes encoder compute the first pass stats and store it at
// impl_ptr_->first_pass_stats. key_frame_map_ is also computed based on the
// first pass stats.
void ComputeFirstPassStats(); void ComputeFirstPassStats();
// Outputs the first pass stats represented by a 2-D vector. // Outputs the first pass stats represented by a 2-D vector.
@ -293,13 +394,38 @@ class SimpleEncode {
// values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h // values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h
std::vector<std::vector<double>> ObserveFirstPassStats(); std::vector<std::vector<double>> ObserveFirstPassStats();
// Sets arf indexes for the video from external input. // Outputs the first pass motion vectors represented by a 2-D vector.
// The arf index determines whether a frame is arf or not. // One can use the frame index at first dimension to retrieve the mvs for
// Therefore it also determines the group of picture size. // each video frame. The frame is divided into 16x16 blocks. The number of
// If set, VP9 will use the external arf index to make decision. // elements is round_up(|num_rows_4x4| / 4) * round_up(|num_cols_4x4| / 4).
std::vector<std::vector<MotionVectorInfo>> ObserveFirstPassMotionVectors();
// Ouputs a copy of key_frame_map_, a binary vector with size equal to the
// number of show frames in the video. For each entry in the vector, 1
// indicates the position is a key frame and 0 indicates it's not a key frame.
// This function should be called after ComputeFirstPassStats()
std::vector<int> ObserveKeyFrameMap() const;
// Sets group of pictures map for coding the entire video.
// Each entry in the gop_map corresponds to a show frame in the video.
// Therefore, the size of gop_map should equal to the number of show frames in
// the entire video.
// If a given entry's kGopMapFlagStart is set, it means this is the start of a
// gop. Once kGopMapFlagStart is set, one can set kGopMapFlagUseAltRef to
// indicate whether this gop use altref.
// If a given entry is zero, it means it's in the middle of a gop.
// This function should be called only once after ComputeFirstPassStats(), // This function should be called only once after ComputeFirstPassStats(),
// before StartEncode(). // before StartEncode().
void SetExternalGroupOfPicture(std::vector<int> external_arf_indexes); // This API will check and modify the gop_map to satisfy the following
// constraints.
// 1) Each key frame position should be at the start of a gop.
// 2) The last gop should not use an alt ref.
void SetExternalGroupOfPicturesMap(int *gop_map, int gop_map_size);
// Observe the group of pictures map set through
// SetExternalGroupOfPicturesMap(). This function should be called after
// SetExternalGroupOfPicturesMap().
std::vector<int> ObserveExternalGroupOfPicturesMap();
// Initializes the encoder for actual encoding. // Initializes the encoder for actual encoding.
// This function should be called after ComputeFirstPassStats(). // This function should be called after ComputeFirstPassStats().
@ -332,6 +458,17 @@ class SimpleEncode {
void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result, void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result,
int quantize_index); int quantize_index);
// Encode a frame with target frame bits usage.
// The encoder will find a quantize index to make the actual frame bits usage
// match the target. EncodeFrameWithTargetFrameBits() will recode the frame
// up to 7 times to find a q_index to make the actual_frame_bits satisfy the
// following inequality. |actual_frame_bits - target_frame_bits| * 100 /
// target_frame_bits
// <= percent_diff.
void EncodeFrameWithTargetFrameBits(EncodeFrameResult *encode_frame_result,
int target_frame_bits,
double percent_diff);
// Gets the number of coding frames for the video. The coding frames include // Gets the number of coding frames for the video. The coding frames include
// show frame and no show frame. // show frame and no show frame.
// This function should be called after ComputeFirstPassStats(). // This function should be called after ComputeFirstPassStats().
@ -341,6 +478,12 @@ class SimpleEncode {
uint64_t GetFramePixelCount() const; uint64_t GetFramePixelCount() const;
private: private:
// Compute the key frame locations of the video based on first pass stats.
// The results are returned as a binary vector with 1s indicating keyframes
// and 0s indicating non keyframes.
// It has to be called after impl_ptr_->first_pass_stats is computed.
std::vector<int> ComputeKeyFrameMap() const;
// Updates key_frame_group_size_, reset key_frame_group_index_ and init // Updates key_frame_group_size_, reset key_frame_group_index_ and init
// ref_frame_info_. // ref_frame_info_.
void UpdateKeyFrameGroup(int key_frame_show_index); void UpdateKeyFrameGroup(int key_frame_show_index);
@ -358,12 +501,14 @@ class SimpleEncode {
int frame_rate_den_; int frame_rate_den_;
int target_bitrate_; int target_bitrate_;
int num_frames_; int num_frames_;
int encode_speed_;
std::FILE *in_file_; std::FILE *in_file_;
std::FILE *out_file_; std::FILE *out_file_;
std::unique_ptr<EncodeImpl> impl_ptr_; std::unique_ptr<EncodeImpl> impl_ptr_;
std::vector<int> external_arf_indexes_; std::vector<int> key_frame_map_;
std::vector<int> gop_map_;
GroupOfPicture group_of_picture_; GroupOfPicture group_of_picture_;
// The key frame group size includes one key frame plus the number of // The key frame group size includes one key frame plus the number of
@ -387,6 +532,17 @@ class SimpleEncode {
// frame appears? // frame appears?
// Reference frames info of the to-be-coded frame. // Reference frames info of the to-be-coded frame.
RefFrameInfo ref_frame_info_; RefFrameInfo ref_frame_info_;
// A 2-D vector of motion vector information of the frame collected
// from the first pass. The first dimension is the frame index.
// Each frame is divided into 16x16 blocks. The number of elements is
// round_up(|num_rows_4x4| / 4) * round_up(|num_cols_4x4| / 4).
// Each 16x16 block contains 0 motion vector if this is an intra predicted
// frame (for example, the key frame). If the frame is inter predicted,
// each 16x16 block contains either 1 or 2 motion vectors.
// The first motion vector is always from the LAST_FRAME.
// The second motion vector is always from the GOLDEN_FRAME.
std::vector<std::vector<MotionVectorInfo>> fp_motion_vector_info_;
}; };
} // namespace vp9 } // namespace vp9

View file

@ -13,6 +13,7 @@
#include "./vpx_config.h" #include "./vpx_config.h"
#include "vpx/vpx_encoder.h" #include "vpx/vpx_encoder.h"
#include "vpx/vpx_ext_ratectrl.h"
#include "vpx_dsp/psnr.h" #include "vpx_dsp/psnr.h"
#include "vpx_ports/vpx_once.h" #include "vpx_ports/vpx_once.h"
#include "vpx_ports/static_assert.h" #include "vpx_ports/static_assert.h"
@ -355,13 +356,14 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
switch (img->fmt) { switch (img->fmt) {
case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_I42016: break; case VPX_IMG_FMT_I42016:
case VPX_IMG_FMT_NV12: break;
case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I422:
case VPX_IMG_FMT_I444: case VPX_IMG_FMT_I444:
case VPX_IMG_FMT_I440: case VPX_IMG_FMT_I440:
if (ctx->cfg.g_profile != (unsigned int)PROFILE_1) { if (ctx->cfg.g_profile != (unsigned int)PROFILE_1) {
ERROR( ERROR(
"Invalid image format. I422, I444, I440 images are " "Invalid image format. I422, I444, I440, NV12 images are "
"not supported in profile."); "not supported in profile.");
} }
break; break;
@ -391,6 +393,7 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
static int get_image_bps(const vpx_image_t *img) { static int get_image_bps(const vpx_image_t *img) {
switch (img->fmt) { switch (img->fmt) {
case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_NV12:
case VPX_IMG_FMT_I420: return 12; case VPX_IMG_FMT_I420: return 12;
case VPX_IMG_FMT_I422: return 16; case VPX_IMG_FMT_I422: return 16;
case VPX_IMG_FMT_I444: return 24; case VPX_IMG_FMT_I444: return 24;
@ -468,10 +471,11 @@ static vpx_rational64_t get_g_timebase_in_ts(vpx_rational_t g_timebase) {
} }
static vpx_codec_err_t set_encoder_config( static vpx_codec_err_t set_encoder_config(
VP9EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg, VP9EncoderConfig *oxcf, vpx_codec_enc_cfg_t *cfg,
const struct vp9_extracfg *extra_cfg) { const struct vp9_extracfg *extra_cfg) {
const int is_vbr = cfg->rc_end_usage == VPX_VBR; const int is_vbr = cfg->rc_end_usage == VPX_VBR;
int sl, tl; int sl, tl;
unsigned int raw_target_rate;
oxcf->profile = cfg->g_profile; oxcf->profile = cfg->g_profile;
oxcf->max_threads = (int)cfg->g_threads; oxcf->max_threads = (int)cfg->g_threads;
oxcf->width = cfg->g_w; oxcf->width = cfg->g_w;
@ -498,8 +502,14 @@ static vpx_codec_err_t set_encoder_config(
cfg->g_pass == VPX_RC_FIRST_PASS ? 0 : cfg->g_lag_in_frames; cfg->g_pass == VPX_RC_FIRST_PASS ? 0 : cfg->g_lag_in_frames;
oxcf->rc_mode = cfg->rc_end_usage; oxcf->rc_mode = cfg->rc_end_usage;
raw_target_rate =
(unsigned int)((int64_t)oxcf->width * oxcf->height * oxcf->bit_depth * 3 *
oxcf->init_framerate / 1000);
// Cap target bitrate to raw rate
cfg->rc_target_bitrate = VPXMIN(raw_target_rate, cfg->rc_target_bitrate);
// Convert target bandwidth from Kbit/s to Bit/s // Convert target bandwidth from Kbit/s to Bit/s
oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate; oxcf->target_bandwidth = 1000 * (int64_t)cfg->rc_target_bitrate;
oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct; oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
oxcf->rc_max_inter_bitrate_pct = extra_cfg->rc_max_inter_bitrate_pct; oxcf->rc_max_inter_bitrate_pct = extra_cfg->rc_max_inter_bitrate_pct;
oxcf->gf_cbr_boost_pct = extra_cfg->gf_cbr_boost_pct; oxcf->gf_cbr_boost_pct = extra_cfg->gf_cbr_boost_pct;
@ -624,7 +634,7 @@ static vpx_codec_err_t set_encoder_config(
} }
if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf); if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf);
// vp9_dump_encoder_config(oxcf); // vp9_dump_encoder_config(oxcf, stderr);
return VPX_CODEC_OK; return VPX_CODEC_OK;
} }
@ -698,6 +708,10 @@ static vpx_codec_err_t ctrl_set_cpuused(vpx_codec_alg_priv_t *ctx,
extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args); extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args);
extra_cfg.cpu_used = VPXMIN(9, extra_cfg.cpu_used); extra_cfg.cpu_used = VPXMIN(9, extra_cfg.cpu_used);
extra_cfg.cpu_used = VPXMAX(-9, extra_cfg.cpu_used); extra_cfg.cpu_used = VPXMAX(-9, extra_cfg.cpu_used);
#if CONFIG_REALTIME_ONLY
if (extra_cfg.cpu_used > -5 && extra_cfg.cpu_used < 5)
extra_cfg.cpu_used = (extra_cfg.cpu_used > 0) ? 5 : -5;
#endif
return update_extra_cfg(ctx, &extra_cfg); return update_extra_cfg(ctx, &extra_cfg);
} }
@ -1559,6 +1573,7 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
lc->scaling_factor_num = params->scaling_factor_num[sl]; lc->scaling_factor_num = params->scaling_factor_num[sl];
lc->scaling_factor_den = params->scaling_factor_den[sl]; lc->scaling_factor_den = params->scaling_factor_den[sl];
lc->speed = params->speed_per_layer[sl]; lc->speed = params->speed_per_layer[sl];
lc->loopfilter_ctrl = params->loopfilter_ctrl[sl];
} }
} }
@ -1703,6 +1718,48 @@ static vpx_codec_err_t ctrl_set_postencode_drop(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK; return VPX_CODEC_OK;
} }
static vpx_codec_err_t ctrl_set_disable_overshoot_maxq_cbr(
vpx_codec_alg_priv_t *ctx, va_list args) {
VP9_COMP *const cpi = ctx->cpi;
const unsigned int data = va_arg(args, unsigned int);
cpi->rc.disable_overshoot_maxq_cbr = data;
return VPX_CODEC_OK;
}
static vpx_codec_err_t ctrl_set_disable_loopfilter(vpx_codec_alg_priv_t *ctx,
va_list args) {
VP9_COMP *const cpi = ctx->cpi;
const unsigned int data = va_arg(args, unsigned int);
cpi->loopfilter_ctrl = data;
return VPX_CODEC_OK;
}
static vpx_codec_err_t ctrl_set_external_rate_control(vpx_codec_alg_priv_t *ctx,
va_list args) {
vpx_rc_funcs_t funcs = *CAST(VP9E_SET_EXTERNAL_RATE_CONTROL, args);
VP9_COMP *cpi = ctx->cpi;
EXT_RATECTRL *ext_ratectrl = &cpi->ext_ratectrl;
const VP9EncoderConfig *oxcf = &cpi->oxcf;
// TODO(angiebird): Check the possibility of this flag being set at pass == 1
if (oxcf->pass == 2) {
const FRAME_INFO *frame_info = &cpi->frame_info;
vpx_rc_config_t ratectrl_config;
ratectrl_config.frame_width = frame_info->frame_width;
ratectrl_config.frame_height = frame_info->frame_height;
ratectrl_config.show_frame_count = cpi->twopass.first_pass_info.num_frames;
// TODO(angiebird): Double check whether this is the proper way to set up
// target_bitrate and frame_rate.
ratectrl_config.target_bitrate_kbps = (int)(oxcf->target_bandwidth / 1000);
ratectrl_config.frame_rate_num = oxcf->g_timebase.den;
ratectrl_config.frame_rate_den = oxcf->g_timebase.num;
vp9_extrc_create(funcs, ratectrl_config, ext_ratectrl);
}
return VPX_CODEC_OK;
}
static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP8_COPY_REFERENCE, ctrl_copy_reference }, { VP8_COPY_REFERENCE, ctrl_copy_reference },
@ -1747,12 +1804,15 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP9E_SET_TARGET_LEVEL, ctrl_set_target_level }, { VP9E_SET_TARGET_LEVEL, ctrl_set_target_level },
{ VP9E_SET_ROW_MT, ctrl_set_row_mt }, { VP9E_SET_ROW_MT, ctrl_set_row_mt },
{ VP9E_SET_POSTENCODE_DROP, ctrl_set_postencode_drop }, { VP9E_SET_POSTENCODE_DROP, ctrl_set_postencode_drop },
{ VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR, ctrl_set_disable_overshoot_maxq_cbr },
{ VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test }, { VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test },
{ VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred }, { VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred },
{ VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer }, { VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer },
{ VP9E_SET_SVC_GF_TEMPORAL_REF, ctrl_set_svc_gf_temporal_ref }, { VP9E_SET_SVC_GF_TEMPORAL_REF, ctrl_set_svc_gf_temporal_ref },
{ VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync }, { VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync },
{ VP9E_SET_DELTA_Q_UV, ctrl_set_delta_q_uv }, { VP9E_SET_DELTA_Q_UV, ctrl_set_delta_q_uv },
{ VP9E_SET_DISABLE_LOOPFILTER, ctrl_set_disable_loopfilter },
{ VP9E_SET_EXTERNAL_RATE_CONTROL, ctrl_set_external_rate_control },
// Getters // Getters
{ VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer }, { VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer },
@ -1886,7 +1946,7 @@ static vp9_extracfg get_extra_cfg() {
VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height, VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
vpx_rational_t frame_rate, vpx_rational_t frame_rate,
int target_bitrate, int target_bitrate, int encode_speed,
vpx_enc_pass enc_pass) { vpx_enc_pass enc_pass) {
/* This function will generate the same VP9EncoderConfig used by the /* This function will generate the same VP9EncoderConfig used by the
* vpxenc command given below. * vpxenc command given below.
@ -1897,6 +1957,7 @@ VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
* HEIGHT: frame_height * HEIGHT: frame_height
* FPS: frame_rate * FPS: frame_rate
* BITRATE: target_bitrate * BITRATE: target_bitrate
* CPU_USED:encode_speed
* *
* INPUT, OUTPUT, LIMIT will not affect VP9EncoderConfig * INPUT, OUTPUT, LIMIT will not affect VP9EncoderConfig
* *
@ -1908,9 +1969,10 @@ VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
* BITRATE=600 * BITRATE=600
* FPS=30/1 * FPS=30/1
* LIMIT=150 * LIMIT=150
* CPU_USED=0
* ./vpxenc --limit=$LIMIT --width=$WIDTH --height=$HEIGHT --fps=$FPS * ./vpxenc --limit=$LIMIT --width=$WIDTH --height=$HEIGHT --fps=$FPS
* --lag-in-frames=25 \ * --lag-in-frames=25 \
* --codec=vp9 --good --cpu-used=0 --threads=0 --profile=0 \ * --codec=vp9 --good --cpu-used=CPU_USED --threads=0 --profile=0 \
* --min-q=0 --max-q=63 --auto-alt-ref=1 --passes=2 --kf-max-dist=150 \ * --min-q=0 --max-q=63 --auto-alt-ref=1 --passes=2 --kf-max-dist=150 \
* --kf-min-dist=0 --drop-frame=0 --static-thresh=0 --bias-pct=50 \ * --kf-min-dist=0 --drop-frame=0 --static-thresh=0 --bias-pct=50 \
* --minsection-pct=0 --maxsection-pct=150 --arnr-maxframes=7 --psnr \ * --minsection-pct=0 --maxsection-pct=150 --arnr-maxframes=7 --psnr \
@ -1933,49 +1995,50 @@ VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
oxcf.tile_columns = 0; oxcf.tile_columns = 0;
oxcf.frame_parallel_decoding_mode = 0; oxcf.frame_parallel_decoding_mode = 0;
oxcf.two_pass_vbrmax_section = 150; oxcf.two_pass_vbrmax_section = 150;
oxcf.speed = abs(encode_speed);
return oxcf; return oxcf;
} }
#define DUMP_STRUCT_VALUE(struct, value) \ #define DUMP_STRUCT_VALUE(fp, structure, value) \
printf(#value " %" PRId64 "\n", (int64_t)(struct)->value) fprintf(fp, #value " %" PRId64 "\n", (int64_t)(structure)->value)
void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf) { void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf, FILE *fp) {
DUMP_STRUCT_VALUE(oxcf, profile); DUMP_STRUCT_VALUE(fp, oxcf, profile);
DUMP_STRUCT_VALUE(oxcf, bit_depth); DUMP_STRUCT_VALUE(fp, oxcf, bit_depth);
DUMP_STRUCT_VALUE(oxcf, width); DUMP_STRUCT_VALUE(fp, oxcf, width);
DUMP_STRUCT_VALUE(oxcf, height); DUMP_STRUCT_VALUE(fp, oxcf, height);
DUMP_STRUCT_VALUE(oxcf, input_bit_depth); DUMP_STRUCT_VALUE(fp, oxcf, input_bit_depth);
DUMP_STRUCT_VALUE(oxcf, init_framerate); DUMP_STRUCT_VALUE(fp, oxcf, init_framerate);
// TODO(angiebird): dump g_timebase // TODO(angiebird): dump g_timebase
// TODO(angiebird): dump g_timebase_in_ts // TODO(angiebird): dump g_timebase_in_ts
DUMP_STRUCT_VALUE(oxcf, target_bandwidth); DUMP_STRUCT_VALUE(fp, oxcf, target_bandwidth);
DUMP_STRUCT_VALUE(oxcf, noise_sensitivity); DUMP_STRUCT_VALUE(fp, oxcf, noise_sensitivity);
DUMP_STRUCT_VALUE(oxcf, sharpness); DUMP_STRUCT_VALUE(fp, oxcf, sharpness);
DUMP_STRUCT_VALUE(oxcf, speed); DUMP_STRUCT_VALUE(fp, oxcf, speed);
DUMP_STRUCT_VALUE(oxcf, rc_max_intra_bitrate_pct); DUMP_STRUCT_VALUE(fp, oxcf, rc_max_intra_bitrate_pct);
DUMP_STRUCT_VALUE(oxcf, rc_max_inter_bitrate_pct); DUMP_STRUCT_VALUE(fp, oxcf, rc_max_inter_bitrate_pct);
DUMP_STRUCT_VALUE(oxcf, gf_cbr_boost_pct); DUMP_STRUCT_VALUE(fp, oxcf, gf_cbr_boost_pct);
DUMP_STRUCT_VALUE(oxcf, mode); DUMP_STRUCT_VALUE(fp, oxcf, mode);
DUMP_STRUCT_VALUE(oxcf, pass); DUMP_STRUCT_VALUE(fp, oxcf, pass);
// Key Framing Operations // Key Framing Operations
DUMP_STRUCT_VALUE(oxcf, auto_key); DUMP_STRUCT_VALUE(fp, oxcf, auto_key);
DUMP_STRUCT_VALUE(oxcf, key_freq); DUMP_STRUCT_VALUE(fp, oxcf, key_freq);
DUMP_STRUCT_VALUE(oxcf, lag_in_frames); DUMP_STRUCT_VALUE(fp, oxcf, lag_in_frames);
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// DATARATE CONTROL OPTIONS // DATARATE CONTROL OPTIONS
// vbr, cbr, constrained quality or constant quality // vbr, cbr, constrained quality or constant quality
DUMP_STRUCT_VALUE(oxcf, rc_mode); DUMP_STRUCT_VALUE(fp, oxcf, rc_mode);
// buffer targeting aggressiveness // buffer targeting aggressiveness
DUMP_STRUCT_VALUE(oxcf, under_shoot_pct); DUMP_STRUCT_VALUE(fp, oxcf, under_shoot_pct);
DUMP_STRUCT_VALUE(oxcf, over_shoot_pct); DUMP_STRUCT_VALUE(fp, oxcf, over_shoot_pct);
// buffering parameters // buffering parameters
// TODO(angiebird): dump tarting_buffer_level_ms // TODO(angiebird): dump tarting_buffer_level_ms
@ -1983,37 +2046,37 @@ void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf) {
// TODO(angiebird): dump maximum_buffer_size_ms // TODO(angiebird): dump maximum_buffer_size_ms
// Frame drop threshold. // Frame drop threshold.
DUMP_STRUCT_VALUE(oxcf, drop_frames_water_mark); DUMP_STRUCT_VALUE(fp, oxcf, drop_frames_water_mark);
// controlling quality // controlling quality
DUMP_STRUCT_VALUE(oxcf, fixed_q); DUMP_STRUCT_VALUE(fp, oxcf, fixed_q);
DUMP_STRUCT_VALUE(oxcf, worst_allowed_q); DUMP_STRUCT_VALUE(fp, oxcf, worst_allowed_q);
DUMP_STRUCT_VALUE(oxcf, best_allowed_q); DUMP_STRUCT_VALUE(fp, oxcf, best_allowed_q);
DUMP_STRUCT_VALUE(oxcf, cq_level); DUMP_STRUCT_VALUE(fp, oxcf, cq_level);
DUMP_STRUCT_VALUE(oxcf, aq_mode); DUMP_STRUCT_VALUE(fp, oxcf, aq_mode);
// Special handling of Adaptive Quantization for AltRef frames // Special handling of Adaptive Quantization for AltRef frames
DUMP_STRUCT_VALUE(oxcf, alt_ref_aq); DUMP_STRUCT_VALUE(fp, oxcf, alt_ref_aq);
// Internal frame size scaling. // Internal frame size scaling.
DUMP_STRUCT_VALUE(oxcf, resize_mode); DUMP_STRUCT_VALUE(fp, oxcf, resize_mode);
DUMP_STRUCT_VALUE(oxcf, scaled_frame_width); DUMP_STRUCT_VALUE(fp, oxcf, scaled_frame_width);
DUMP_STRUCT_VALUE(oxcf, scaled_frame_height); DUMP_STRUCT_VALUE(fp, oxcf, scaled_frame_height);
// Enable feature to reduce the frame quantization every x frames. // Enable feature to reduce the frame quantization every x frames.
DUMP_STRUCT_VALUE(oxcf, frame_periodic_boost); DUMP_STRUCT_VALUE(fp, oxcf, frame_periodic_boost);
// two pass datarate control // two pass datarate control
DUMP_STRUCT_VALUE(oxcf, two_pass_vbrbias); DUMP_STRUCT_VALUE(fp, oxcf, two_pass_vbrbias);
DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmin_section); DUMP_STRUCT_VALUE(fp, oxcf, two_pass_vbrmin_section);
DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmax_section); DUMP_STRUCT_VALUE(fp, oxcf, two_pass_vbrmax_section);
DUMP_STRUCT_VALUE(oxcf, vbr_corpus_complexity); DUMP_STRUCT_VALUE(fp, oxcf, vbr_corpus_complexity);
// END DATARATE CONTROL OPTIONS // END DATARATE CONTROL OPTIONS
// ---------------------------------------------------------------- // ----------------------------------------------------------------
// Spatial and temporal scalability. // Spatial and temporal scalability.
DUMP_STRUCT_VALUE(oxcf, ss_number_layers); DUMP_STRUCT_VALUE(fp, oxcf, ss_number_layers);
DUMP_STRUCT_VALUE(oxcf, ts_number_layers); DUMP_STRUCT_VALUE(fp, oxcf, ts_number_layers);
// Bitrate allocation for spatial layers. // Bitrate allocation for spatial layers.
// TODO(angiebird): dump layer_target_bitrate[VPX_MAX_LAYERS] // TODO(angiebird): dump layer_target_bitrate[VPX_MAX_LAYERS]
@ -2021,25 +2084,25 @@ void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf) {
// TODO(angiebird): dump ss_enable_auto_arf[VPX_SS_MAX_LAYERS] // TODO(angiebird): dump ss_enable_auto_arf[VPX_SS_MAX_LAYERS]
// TODO(angiebird): dump ts_rate_decimator[VPX_TS_MAX_LAYERS] // TODO(angiebird): dump ts_rate_decimator[VPX_TS_MAX_LAYERS]
DUMP_STRUCT_VALUE(oxcf, enable_auto_arf); DUMP_STRUCT_VALUE(fp, oxcf, enable_auto_arf);
DUMP_STRUCT_VALUE(oxcf, encode_breakout); DUMP_STRUCT_VALUE(fp, oxcf, encode_breakout);
DUMP_STRUCT_VALUE(oxcf, error_resilient_mode); DUMP_STRUCT_VALUE(fp, oxcf, error_resilient_mode);
DUMP_STRUCT_VALUE(oxcf, frame_parallel_decoding_mode); DUMP_STRUCT_VALUE(fp, oxcf, frame_parallel_decoding_mode);
DUMP_STRUCT_VALUE(oxcf, arnr_max_frames); DUMP_STRUCT_VALUE(fp, oxcf, arnr_max_frames);
DUMP_STRUCT_VALUE(oxcf, arnr_strength); DUMP_STRUCT_VALUE(fp, oxcf, arnr_strength);
DUMP_STRUCT_VALUE(oxcf, min_gf_interval); DUMP_STRUCT_VALUE(fp, oxcf, min_gf_interval);
DUMP_STRUCT_VALUE(oxcf, max_gf_interval); DUMP_STRUCT_VALUE(fp, oxcf, max_gf_interval);
DUMP_STRUCT_VALUE(oxcf, tile_columns); DUMP_STRUCT_VALUE(fp, oxcf, tile_columns);
DUMP_STRUCT_VALUE(oxcf, tile_rows); DUMP_STRUCT_VALUE(fp, oxcf, tile_rows);
DUMP_STRUCT_VALUE(oxcf, enable_tpl_model); DUMP_STRUCT_VALUE(fp, oxcf, enable_tpl_model);
DUMP_STRUCT_VALUE(oxcf, max_threads); DUMP_STRUCT_VALUE(fp, oxcf, max_threads);
DUMP_STRUCT_VALUE(oxcf, target_level); DUMP_STRUCT_VALUE(fp, oxcf, target_level);
// TODO(angiebird): dump two_pass_stats_in // TODO(angiebird): dump two_pass_stats_in
@ -2047,19 +2110,19 @@ void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf) {
// TODO(angiebird): dump firstpass_mb_stats_in // TODO(angiebird): dump firstpass_mb_stats_in
#endif #endif
DUMP_STRUCT_VALUE(oxcf, tuning); DUMP_STRUCT_VALUE(fp, oxcf, tuning);
DUMP_STRUCT_VALUE(oxcf, content); DUMP_STRUCT_VALUE(fp, oxcf, content);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DUMP_STRUCT_VALUE(oxcf, use_highbitdepth); DUMP_STRUCT_VALUE(fp, oxcf, use_highbitdepth);
#endif #endif
DUMP_STRUCT_VALUE(oxcf, color_space); DUMP_STRUCT_VALUE(fp, oxcf, color_space);
DUMP_STRUCT_VALUE(oxcf, color_range); DUMP_STRUCT_VALUE(fp, oxcf, color_range);
DUMP_STRUCT_VALUE(oxcf, render_width); DUMP_STRUCT_VALUE(fp, oxcf, render_width);
DUMP_STRUCT_VALUE(oxcf, render_height); DUMP_STRUCT_VALUE(fp, oxcf, render_height);
DUMP_STRUCT_VALUE(oxcf, temporal_layering_mode); DUMP_STRUCT_VALUE(fp, oxcf, temporal_layering_mode);
DUMP_STRUCT_VALUE(oxcf, row_mt); DUMP_STRUCT_VALUE(fp, oxcf, row_mt);
DUMP_STRUCT_VALUE(oxcf, motion_vector_unit_test); DUMP_STRUCT_VALUE(fp, oxcf, motion_vector_unit_test);
} }
FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) { FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) {

View file

@ -19,10 +19,10 @@ extern "C" {
VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height, VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
vpx_rational_t frame_rate, vpx_rational_t frame_rate,
int target_bitrate, int target_bitrate, int encode_speed,
vpx_enc_pass enc_pass); vpx_enc_pass enc_pass);
void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf); void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf, FILE *fp);
FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf); FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf);

View file

@ -88,8 +88,9 @@ vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
yv12->y_width = img->d_w; yv12->y_width = img->d_w;
yv12->y_height = img->d_h; yv12->y_height = img->d_h;
yv12->uv_width = yv12->uv_width = img->x_chroma_shift == 1 || img->fmt == VPX_IMG_FMT_NV12
img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width; ? (1 + yv12->y_width) / 2
: yv12->y_width;
yv12->uv_height = yv12->uv_height =
img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height; img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height;
yv12->uv_crop_width = yv12->uv_width; yv12->uv_crop_width = yv12->uv_width;
@ -127,5 +128,9 @@ vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
yv12->subsampling_x = img->x_chroma_shift; yv12->subsampling_x = img->x_chroma_shift;
yv12->subsampling_y = img->y_chroma_shift; yv12->subsampling_y = img->y_chroma_shift;
// When reading the data, UV are in one plane for NV12 format, thus
// x_chroma_shift is 0. After converting, UV are in separate planes, and
// subsampling_x should be set to 1.
if (img->fmt == VPX_IMG_FMT_NV12) yv12->subsampling_x = 1;
return VPX_CODEC_OK; return VPX_CODEC_OK;
} }

View file

@ -18,9 +18,6 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
VP9_CX_SRCS-yes += vp9_cx_iface.c VP9_CX_SRCS-yes += vp9_cx_iface.c
VP9_CX_SRCS-yes += vp9_cx_iface.h VP9_CX_SRCS-yes += vp9_cx_iface.h
VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.cc
VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.h
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.h VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
@ -99,6 +96,8 @@ VP9_CX_SRCS-yes += encoder/vp9_skin_detection.c
VP9_CX_SRCS-yes += encoder/vp9_skin_detection.h VP9_CX_SRCS-yes += encoder/vp9_skin_detection.h
VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.c VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.c
VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.h VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.h
VP9_CX_SRCS-yes += encoder/vp9_ext_ratectrl.c
VP9_CX_SRCS-yes += encoder/vp9_ext_ratectrl.h
ifeq ($(CONFIG_VP9_POSTPROC),yes) ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c

View file

@ -27,13 +27,15 @@
* </pre> * </pre>
* *
* An application instantiates a specific decoder instance by using * An application instantiates a specific decoder instance by using
* vpx_codec_init() and a pointer to the algorithm's interface structure: * vpx_codec_dec_init() and a pointer to the algorithm's interface structure:
* <pre> * <pre>
* my_app.c: * my_app.c:
* extern vpx_codec_iface_t my_codec; * extern vpx_codec_iface_t my_codec;
* { * {
* vpx_codec_ctx_t algo; * vpx_codec_ctx_t algo;
* res = vpx_codec_init(&algo, &my_codec); * int threads = 4;
* vpx_codec_dec_cfg_t cfg = { threads, 0, 0 };
* res = vpx_codec_dec_init(&algo, &my_codec, &cfg, 0);
* } * }
* </pre> * </pre>
* *
@ -66,7 +68,7 @@ typedef struct vpx_codec_priv_enc_mr_cfg vpx_codec_priv_enc_mr_cfg_t;
/*!\brief init function pointer prototype /*!\brief init function pointer prototype
* *
* Performs algorithm-specific initialization of the decoder context. This * Performs algorithm-specific initialization of the decoder context. This
* function is called by the generic vpx_codec_init() wrapper function, so * function is called by vpx_codec_dec_init() and vpx_codec_enc_init(), so
* plugins implementing this interface may trust the input parameters to be * plugins implementing this interface may trust the input parameters to be
* properly initialized. * properly initialized.
* *
@ -175,16 +177,15 @@ typedef const struct vpx_codec_ctrl_fn_map {
/*!\brief decode data function pointer prototype /*!\brief decode data function pointer prototype
* *
* Processes a buffer of coded data. If the processing results in a new * Processes a buffer of coded data. If the processing results in a new
* decoded frame becoming available, #VPX_CODEC_CB_PUT_SLICE and * decoded frame becoming available, put_slice and put_frame callbacks
* #VPX_CODEC_CB_PUT_FRAME events are generated as appropriate. This * are invoked as appropriate. This function is called by the generic
* function is called by the generic vpx_codec_decode() wrapper function, * vpx_codec_decode() wrapper function, so plugins implementing this
* so plugins implementing this interface may trust the input parameters * interface may trust the input parameters to be properly initialized.
* to be properly initialized.
* *
* \param[in] ctx Pointer to this instance's context * \param[in] ctx Pointer to this instance's context
* \param[in] data Pointer to this block of new coded data. If * \param[in] data Pointer to this block of new coded data. If
* NULL, a #VPX_CODEC_CB_PUT_FRAME event is posted * NULL, the put_frame callback is invoked for
* for the previously decoded frame. * the previously decoded frame.
* \param[in] data_sz Size of the coded data, in bytes. * \param[in] data_sz Size of the coded data, in bytes.
* *
* \return Returns #VPX_CODEC_OK if the coded data was processed completely * \return Returns #VPX_CODEC_OK if the coded data was processed completely

View file

@ -97,7 +97,7 @@ vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, int ctrl_id, ...) {
res = VPX_CODEC_INCAPABLE; res = VPX_CODEC_INCAPABLE;
for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++) { for (entry = ctx->iface->ctrl_maps; entry->fn; entry++) {
if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) { if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) {
va_list ap; va_list ap;

View file

@ -138,9 +138,10 @@ vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx,
if (!ctx || !cb) if (!ctx || !cb)
res = VPX_CODEC_INVALID_PARAM; res = VPX_CODEC_INVALID_PARAM;
else if (!ctx->iface || !ctx->priv || else if (!ctx->iface || !ctx->priv)
!(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
res = VPX_CODEC_ERROR; res = VPX_CODEC_ERROR;
else if (!(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
res = VPX_CODEC_INCAPABLE;
else { else {
ctx->priv->dec.put_frame_cb.u.put_frame = cb; ctx->priv->dec.put_frame_cb.u.put_frame = cb;
ctx->priv->dec.put_frame_cb.user_priv = user_priv; ctx->priv->dec.put_frame_cb.user_priv = user_priv;
@ -157,9 +158,10 @@ vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx,
if (!ctx || !cb) if (!ctx || !cb)
res = VPX_CODEC_INVALID_PARAM; res = VPX_CODEC_INVALID_PARAM;
else if (!ctx->iface || !ctx->priv || else if (!ctx->iface || !ctx->priv)
!(ctx->iface->caps & VPX_CODEC_CAP_PUT_SLICE))
res = VPX_CODEC_ERROR; res = VPX_CODEC_ERROR;
else if (!(ctx->iface->caps & VPX_CODEC_CAP_PUT_SLICE))
res = VPX_CODEC_INCAPABLE;
else { else {
ctx->priv->dec.put_slice_cb.u.put_slice = cb; ctx->priv->dec.put_slice_cb.u.put_slice = cb;
ctx->priv->dec.put_slice_cb.user_priv = user_priv; ctx->priv->dec.put_slice_cb.user_priv = user_priv;
@ -176,9 +178,10 @@ vpx_codec_err_t vpx_codec_set_frame_buffer_functions(
if (!ctx || !cb_get || !cb_release) { if (!ctx || !cb_get || !cb_release) {
res = VPX_CODEC_INVALID_PARAM; res = VPX_CODEC_INVALID_PARAM;
} else if (!ctx->iface || !ctx->priv || } else if (!ctx->iface || !ctx->priv) {
!(ctx->iface->caps & VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) {
res = VPX_CODEC_ERROR; res = VPX_CODEC_ERROR;
} else if (!(ctx->iface->caps & VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) {
res = VPX_CODEC_INCAPABLE;
} else { } else {
res = ctx->iface->dec.set_fb_fn(get_alg_priv(ctx), cb_get, cb_release, res = ctx->iface->dec.set_fb_fn(get_alg_priv(ctx), cb_get, cb_release,
cb_priv); cb_priv);

View file

@ -39,7 +39,8 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt,
/* Get sample size for this format */ /* Get sample size for this format */
switch (fmt) { switch (fmt) {
case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_YV12: bps = 12; break; case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_NV12: bps = 12; break;
case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I422:
case VPX_IMG_FMT_I440: bps = 16; break; case VPX_IMG_FMT_I440: bps = 16; break;
case VPX_IMG_FMT_I444: bps = 24; break; case VPX_IMG_FMT_I444: bps = 24; break;
@ -51,6 +52,8 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt,
} }
/* Get chroma shift values for this format */ /* Get chroma shift values for this format */
// For VPX_IMG_FMT_NV12, xcs needs to be 0 such that UV data is all read at
// one time.
switch (fmt) { switch (fmt) {
case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_YV12:
@ -62,6 +65,7 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt,
switch (fmt) { switch (fmt) {
case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_NV12:
case VPX_IMG_FMT_I440: case VPX_IMG_FMT_I440:
case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I42016:
@ -173,7 +177,12 @@ int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y,
data + x * bytes_per_sample + y * img->stride[VPX_PLANE_Y]; data + x * bytes_per_sample + y * img->stride[VPX_PLANE_Y];
data += img->h * img->stride[VPX_PLANE_Y]; data += img->h * img->stride[VPX_PLANE_Y];
if (!(img->fmt & VPX_IMG_FMT_UV_FLIP)) { if (img->fmt == VPX_IMG_FMT_NV12) {
img->planes[VPX_PLANE_U] =
data + (x >> img->x_chroma_shift) +
(y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
img->planes[VPX_PLANE_V] = img->planes[VPX_PLANE_U] + 1;
} else if (!(img->fmt & VPX_IMG_FMT_UV_FLIP)) {
img->planes[VPX_PLANE_U] = img->planes[VPX_PLANE_U] =
data + (x >> img->x_chroma_shift) * bytes_per_sample + data + (x >> img->x_chroma_shift) * bytes_per_sample +
(y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];

View file

@ -17,6 +17,7 @@
*/ */
#include "./vp8.h" #include "./vp8.h"
#include "./vpx_encoder.h" #include "./vpx_encoder.h"
#include "./vpx_ext_ratectrl.h"
/*!\file /*!\file
* \brief Provides definitions for using VP8 or VP9 encoder algorithm within the * \brief Provides definitions for using VP8 or VP9 encoder algorithm within the
@ -684,6 +685,33 @@ enum vp8e_enc_control_id {
* Supported in codecs: VP9 * Supported in codecs: VP9
*/ */
VP9E_SET_DELTA_Q_UV, VP9E_SET_DELTA_Q_UV,
/*!\brief Codec control function to disable increase Q on overshoot in CBR.
*
* 0: On (default), 1: Disable.
*
* Supported in codecs: VP9
*/
VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR,
/*!\brief Codec control function to disable loopfilter.
*
* 0: Loopfilter on all frames, 1: Disable on non reference frames.
* 2: Disable on all frames.
*
* Supported in codecs: VP9
*/
VP9E_SET_DISABLE_LOOPFILTER,
/*!\brief Codec control function to enable external rate control library.
*
* args[0]: path of the rate control library
*
* args[1]: private config of the rate control library
*
* Supported in codecs: VP9
*/
VP9E_SET_EXTERNAL_RATE_CONTROL,
}; };
/*!\brief vpx 1-D scaling mode /*!\brief vpx 1-D scaling mode
@ -1034,6 +1062,15 @@ VPX_CTRL_USE_TYPE(VP9E_SET_POSTENCODE_DROP, unsigned int)
VPX_CTRL_USE_TYPE(VP9E_SET_DELTA_Q_UV, int) VPX_CTRL_USE_TYPE(VP9E_SET_DELTA_Q_UV, int)
#define VPX_CTRL_VP9E_SET_DELTA_Q_UV #define VPX_CTRL_VP9E_SET_DELTA_Q_UV
VPX_CTRL_USE_TYPE(VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR, int)
#define VPX_CTRL_VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR
VPX_CTRL_USE_TYPE(VP9E_SET_DISABLE_LOOPFILTER, int)
#define VPX_CTRL_VP9E_SET_DISABLE_LOOPFILTER
VPX_CTRL_USE_TYPE(VP9E_SET_EXTERNAL_RATE_CONTROL, vpx_rc_funcs_t *)
#define VPX_CTRL_VP9E_SET_EXTERNAL_RATE_CONTROL
/*!\endcond */ /*!\endcond */
/*! @} - end defgroup vp8_encoder */ /*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus #ifdef __cplusplus

View file

@ -22,13 +22,16 @@
* video codec algorithm. * video codec algorithm.
* *
* An application instantiates a specific codec instance by using * An application instantiates a specific codec instance by using
* vpx_codec_init() and a pointer to the algorithm's interface structure: * vpx_codec_dec_init() or vpx_codec_enc_init() and a pointer to the
* algorithm's interface structure:
* <pre> * <pre>
* my_app.c: * my_app.c:
* extern vpx_codec_iface_t my_codec; * extern vpx_codec_iface_t my_codec;
* { * {
* vpx_codec_ctx_t algo; * vpx_codec_ctx_t algo;
* res = vpx_codec_init(&algo, &my_codec); * int threads = 4;
* vpx_codec_dec_cfg_t cfg = { threads, 0, 0 };
* res = vpx_codec_dec_init(&algo, &my_codec, &cfg, 0);
* } * }
* </pre> * </pre>
* *

View file

@ -24,6 +24,7 @@ API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h
API_DOC_SRCS-yes += vpx_codec.h API_DOC_SRCS-yes += vpx_codec.h
API_DOC_SRCS-yes += vpx_decoder.h API_DOC_SRCS-yes += vpx_decoder.h
API_DOC_SRCS-yes += vpx_encoder.h API_DOC_SRCS-yes += vpx_encoder.h
API_DOC_SRCS-yes += vpx_ext_ratectrl.h
API_DOC_SRCS-yes += vpx_frame_buffer.h API_DOC_SRCS-yes += vpx_frame_buffer.h
API_DOC_SRCS-yes += vpx_image.h API_DOC_SRCS-yes += vpx_image.h
@ -39,3 +40,4 @@ API_SRCS-yes += vpx_codec.mk
API_SRCS-yes += vpx_frame_buffer.h API_SRCS-yes += vpx_frame_buffer.h
API_SRCS-yes += vpx_image.h API_SRCS-yes += vpx_image.h
API_SRCS-yes += vpx_integer.h API_SRCS-yes += vpx_integer.h
API_SRCS-yes += vpx_ext_ratectrl.h

View file

@ -58,6 +58,10 @@ extern "C" {
#define VPX_CODEC_CAP_ERROR_CONCEALMENT 0x80000 #define VPX_CODEC_CAP_ERROR_CONCEALMENT 0x80000
/*!\brief Can receive encoded frames one fragment at a time */ /*!\brief Can receive encoded frames one fragment at a time */
#define VPX_CODEC_CAP_INPUT_FRAGMENTS 0x100000 #define VPX_CODEC_CAP_INPUT_FRAGMENTS 0x100000
/*!\brief Can support frame-based multi-threading */
#define VPX_CODEC_CAP_FRAME_THREADING 0x200000
/*!brief Can support external frame buffers */
#define VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000
/*! \brief Initialization-time Feature Enabling /*! \brief Initialization-time Feature Enabling
* *
@ -66,11 +70,6 @@ extern "C" {
* *
* The available flags are specified by VPX_CODEC_USE_* defines. * The available flags are specified by VPX_CODEC_USE_* defines.
*/ */
/*!\brief Can support frame-based multi-threading */
#define VPX_CODEC_CAP_FRAME_THREADING 0x200000
/*!brief Can support external frame buffers */
#define VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000
#define VPX_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */ #define VPX_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */
/*!\brief Conceal errors in decoded frames */ /*!\brief Conceal errors in decoded frames */
#define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000 #define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000
@ -185,8 +184,8 @@ vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx,
/*!\brief Decode data /*!\brief Decode data
* *
* Processes a buffer of coded data. If the processing results in a new * Processes a buffer of coded data. If the processing results in a new
* decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be * decoded frame becoming available, put_slice and put_frame callbacks may be
* generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode * invoked, as appropriate. Encoded data \ref MUST be passed in DTS (decode
* time stamp) order. Frames produced will always be in PTS (presentation * time stamp) order. Frames produced will always be in PTS (presentation
* time stamp) order. * time stamp) order.
* If the decoder is configured with VPX_CODEC_USE_INPUT_FRAGMENTS enabled, * If the decoder is configured with VPX_CODEC_USE_INPUT_FRAGMENTS enabled,
@ -199,8 +198,8 @@ vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx,
* *
* \param[in] ctx Pointer to this instance's context * \param[in] ctx Pointer to this instance's context
* \param[in] data Pointer to this block of new coded data. If * \param[in] data Pointer to this block of new coded data. If
* NULL, a VPX_CODEC_CB_PUT_FRAME event is posted * NULL, the put_frame callback is invoked for
* for the previously decoded frame. * the previously decoded frame.
* \param[in] data_sz Size of the coded data, in bytes. * \param[in] data_sz Size of the coded data, in bytes.
* \param[in] user_priv Application specific data to associate with * \param[in] user_priv Application specific data to associate with
* this frame. * this frame.
@ -236,11 +235,10 @@ vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter);
/*!\defgroup cap_put_frame Frame-Based Decoding Functions /*!\defgroup cap_put_frame Frame-Based Decoding Functions
* *
* The following functions are required to be implemented for all decoders * The following function is required to be implemented for all decoders
* that advertise the VPX_CODEC_CAP_PUT_FRAME capability. Calling these * that advertise the VPX_CODEC_CAP_PUT_FRAME capability. Calling this
* functions * function for codecs that don't advertise this capability will result in
* for codecs that don't advertise this capability will result in an error * an error code being returned, usually VPX_CODEC_INCAPABLE.
* code being returned, usually VPX_CODEC_ERROR
* @{ * @{
*/ */
@ -264,8 +262,9 @@ typedef void (*vpx_codec_put_frame_cb_fn_t)(void *user_priv,
* \retval #VPX_CODEC_OK * \retval #VPX_CODEC_OK
* Callback successfully registered. * Callback successfully registered.
* \retval #VPX_CODEC_ERROR * \retval #VPX_CODEC_ERROR
* Decoder context not initialized, or algorithm not capable of * Decoder context not initialized.
* posting slice completion. * \retval #VPX_CODEC_INCAPABLE
* Algorithm not capable of posting frame completion.
*/ */
vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx, vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx,
vpx_codec_put_frame_cb_fn_t cb, vpx_codec_put_frame_cb_fn_t cb,
@ -275,18 +274,17 @@ vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx,
/*!\defgroup cap_put_slice Slice-Based Decoding Functions /*!\defgroup cap_put_slice Slice-Based Decoding Functions
* *
* The following functions are required to be implemented for all decoders * The following function is required to be implemented for all decoders
* that advertise the VPX_CODEC_CAP_PUT_SLICE capability. Calling these * that advertise the VPX_CODEC_CAP_PUT_SLICE capability. Calling this
* functions * function for codecs that don't advertise this capability will result in
* for codecs that don't advertise this capability will result in an error * an error code being returned, usually VPX_CODEC_INCAPABLE.
* code being returned, usually VPX_CODEC_ERROR
* @{ * @{
*/ */
/*!\brief put slice callback prototype /*!\brief put slice callback prototype
* *
* This callback is invoked by the decoder to notify the application of * This callback is invoked by the decoder to notify the application of
* the availability of partially decoded image data. The * the availability of partially decoded image data.
*/ */
typedef void (*vpx_codec_put_slice_cb_fn_t)(void *user_priv, typedef void (*vpx_codec_put_slice_cb_fn_t)(void *user_priv,
const vpx_image_t *img, const vpx_image_t *img,
@ -305,8 +303,9 @@ typedef void (*vpx_codec_put_slice_cb_fn_t)(void *user_priv,
* \retval #VPX_CODEC_OK * \retval #VPX_CODEC_OK
* Callback successfully registered. * Callback successfully registered.
* \retval #VPX_CODEC_ERROR * \retval #VPX_CODEC_ERROR
* Decoder context not initialized, or algorithm not capable of * Decoder context not initialized.
* posting slice completion. * \retval #VPX_CODEC_INCAPABLE
* Algorithm not capable of posting slice completion.
*/ */
vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx, vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx,
vpx_codec_put_slice_cb_fn_t cb, vpx_codec_put_slice_cb_fn_t cb,
@ -316,10 +315,10 @@ vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx,
/*!\defgroup cap_external_frame_buffer External Frame Buffer Functions /*!\defgroup cap_external_frame_buffer External Frame Buffer Functions
* *
* The following section is required to be implemented for all decoders * The following function is required to be implemented for all decoders
* that advertise the VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability. * that advertise the VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability.
* Calling this function for codecs that don't advertise this capability * Calling this function for codecs that don't advertise this capability
* will result in an error code being returned, usually VPX_CODEC_ERROR. * will result in an error code being returned, usually VPX_CODEC_INCAPABLE.
* *
* \note * \note
* Currently this only works with VP9. * Currently this only works with VP9.
@ -344,8 +343,9 @@ vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx,
* \retval #VPX_CODEC_INVALID_PARAM * \retval #VPX_CODEC_INVALID_PARAM
* One or more of the callbacks were NULL. * One or more of the callbacks were NULL.
* \retval #VPX_CODEC_ERROR * \retval #VPX_CODEC_ERROR
* Decoder context not initialized, or algorithm not capable of * Decoder context not initialized.
* using external frame buffers. * \retval #VPX_CODEC_INCAPABLE
* Algorithm not capable of using external frame buffers.
* *
* \note * \note
* When decoding VP9, the application may be required to pass in at least * When decoding VP9, the application may be required to pass in at least

View file

@ -705,6 +705,7 @@ typedef struct vpx_svc_parameters {
int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */ int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */
int speed_per_layer[VPX_MAX_LAYERS]; /**< Speed setting for each sl */ int speed_per_layer[VPX_MAX_LAYERS]; /**< Speed setting for each sl */
int temporal_layering_mode; /**< Temporal layering mode */ int temporal_layering_mode; /**< Temporal layering mode */
int loopfilter_ctrl[VPX_MAX_LAYERS]; /**< Loopfilter ctrl for each sl */
} vpx_svc_extra_cfg_t; } vpx_svc_extra_cfg_t;
/*!\brief Initialize an encoder instance /*!\brief Initialize an encoder instance

View file

@ -0,0 +1,337 @@
/*
* Copyright (c) 2020 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VPX_VPX_VPX_EXT_RATECTRL_H_
#define VPX_VPX_VPX_EXT_RATECTRL_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./vpx_integer.h"
/*!\brief Abstract rate control model handler
*
* The encoder will receive the model handler from create_model() defined in
* vpx_rc_funcs_t.
*/
typedef void *vpx_rc_model_t;
/*!\brief Encode frame decision made by the external rate control model
*
* The encoder will receive the decision from the external rate control model
* through get_encodeframe_decision() defined in vpx_rc_funcs_t.
*/
typedef struct vpx_rc_encodeframe_decision {
int q_index; /**< Quantizer step index [0..255]*/
} vpx_rc_encodeframe_decision_t;
/*!\brief Information for the frame to be encoded.
*
* The encoder will send the information to external rate control model through
* get_encodeframe_decision() defined in vpx_rc_funcs_t.
*
*/
typedef struct vpx_rc_encodeframe_info {
/*!
* 0: Key frame
* 1: Inter frame
* 2: Alternate reference frame
* 3: Overlay frame
* 4: Golden frame
*/
int frame_type;
int show_index; /**< display index, starts from zero*/
int coding_index; /**< coding index, starts from zero*/
int ref_frame_coding_indexes[3]; /**< three reference frames' coding indices*/
/*!
* The validity of the three reference frames.
* 0: Invalid
* 1: Valid
*/
int ref_frame_valid_list[3];
} vpx_rc_encodeframe_info_t;
/*!\brief Frame coding result
*
* The encoder will send the result to the external rate control model through
* update_encodeframe_result() defined in vpx_rc_funcs_t.
*/
typedef struct vpx_rc_encodeframe_result {
int64_t sse; /**< sum of squared error of the reconstructed frame */
int64_t bit_count; /**< number of bits spent on coding the frame*/
int64_t pixel_count; /**< number of pixels in YUV planes of the frame*/
} vpx_rc_encodeframe_result_t;
/*!\brief Status returned by rate control callback functions.
*/
typedef enum vpx_rc_status {
VPX_RC_OK = 0,
VPX_RC_ERROR = 1,
} vpx_rc_status_t;
/*!\brief First pass frame stats
* This is a mirror of vp9's FIRSTPASS_STATS except that spatial_layer_id is
* omitted
*/
typedef struct vpx_rc_frame_stats {
/*!
* Frame number in display order, if stats are for a single frame.
* No real meaning for a collection of frames.
*/
double frame;
/*!
* Weight assigned to this frame (or total weight for the collection of
* frames) currently based on intra factor and brightness factor. This is used
* to distribute bits between easier and harder frames.
*/
double weight;
/*!
* Intra prediction error.
*/
double intra_error;
/*!
* Best of intra pred error and inter pred error using last frame as ref.
*/
double coded_error;
/*!
* Best of intra pred error and inter pred error using golden frame as ref.
*/
double sr_coded_error;
/*!
* Estimate the noise energy of the current frame.
*/
double frame_noise_energy;
/*!
* Percentage of blocks with inter pred error < intra pred error.
*/
double pcnt_inter;
/*!
* Percentage of blocks using (inter prediction and) non-zero motion vectors.
*/
double pcnt_motion;
/*!
* Percentage of blocks where golden frame was better than last or intra:
* inter pred error using golden frame < inter pred error using last frame and
* inter pred error using golden frame < intra pred error
*/
double pcnt_second_ref;
/*!
* Percentage of blocks where intra and inter prediction errors were very
* close. Note that this is a 'weighted count', that is, the so blocks may be
* weighted by how close the two errors were.
*/
double pcnt_neutral;
/*!
* Percentage of blocks that have intra error < inter error and inter error <
* LOW_I_THRESH LOW_I_THRESH = 24000 using bit_depth 8 LOW_I_THRESH = 24000 <<
* 4 using bit_depth 10 LOW_I_THRESH = 24000 << 8 using bit_depth 12
*/
double pcnt_intra_low;
/*!
* Percentage of blocks that have intra error < inter error and intra error <
* LOW_I_THRESH but inter error >= LOW_I_THRESH LOW_I_THRESH = 24000 using
* bit_depth 8 LOW_I_THRESH = 24000 << 4 using bit_depth 10 LOW_I_THRESH =
* 24000 << 8 using bit_depth 12
*/
double pcnt_intra_high;
/*!
* Percentage of blocks that have almost no intra error residual
* (i.e. are in effect completely flat and untextured in the intra
* domain). In natural videos this is uncommon, but it is much more
* common in animations, graphics and screen content, so may be used
* as a signal to detect these types of content.
*/
double intra_skip_pct;
/*!
* Percentage of blocks that have intra error < SMOOTH_INTRA_THRESH
* SMOOTH_INTRA_THRESH = 4000 using bit_depth 8
* SMOOTH_INTRA_THRESH = 4000 << 4 using bit_depth 10
* SMOOTH_INTRA_THRESH = 4000 << 8 using bit_depth 12
*/
double intra_smooth_pct;
/*!
* Image mask rows top and bottom.
*/
double inactive_zone_rows;
/*!
* Image mask columns at left and right edges.
*/
double inactive_zone_cols;
/*!
* Average of row motion vectors.
*/
double MVr;
/*!
* Mean of absolute value of row motion vectors.
*/
double mvr_abs;
/*!
* Mean of column motion vectors.
*/
double MVc;
/*!
* Mean of absolute value of column motion vectors.
*/
double mvc_abs;
/*!
* Variance of row motion vectors.
*/
double MVrv;
/*!
* Variance of column motion vectors.
*/
double MVcv;
/*!
* Value in range [-1,1] indicating fraction of row and column motion vectors
* that point inwards (negative MV value) or outwards (positive MV value).
* For example, value of 1 indicates, all row/column MVs are inwards.
*/
double mv_in_out_count;
/*!
* Duration of the frame / collection of frames.
*/
double duration;
/*!
* 1.0 if stats are for a single frame, OR
* Number of frames in this collection for which the stats are accumulated.
*/
double count;
} vpx_rc_frame_stats_t;
/*!\brief Collection of first pass frame stats
*/
typedef struct vpx_rc_firstpass_stats {
/*!
* Pointer to first pass frame stats.
* The pointed array of vpx_rc_frame_stats_t should have length equal to
* number of show frames in the video.
*/
vpx_rc_frame_stats_t *frame_stats;
/*!
* Number of show frames in the video.
*/
int num_frames;
} vpx_rc_firstpass_stats_t;
/*!\brief Encode config sent to external rate control model
*/
typedef struct vpx_rc_config {
int frame_width; /**< frame width */
int frame_height; /**< frame height */
int show_frame_count; /**< number of visible frames in the video */
/*!
* Target bitrate in kilobytes per second
*/
int target_bitrate_kbps;
int frame_rate_num; /**< numerator of frame rate */
int frame_rate_den; /**< denominator of frame rate */
} vpx_rc_config_t;
/*!\brief Create an external rate control model callback prototype
*
* This callback is invoked by the encoder to create an external rate control
* model.
*
* \param[in] priv Callback's private data
* \param[in] ratectrl_config Pointer to vpx_rc_config_t
* \param[out] rate_ctrl_model_pt Pointer to vpx_rc_model_t
*/
typedef vpx_rc_status_t (*vpx_rc_create_model_cb_fn_t)(
void *priv, const vpx_rc_config_t *ratectrl_config,
vpx_rc_model_t *rate_ctrl_model_pt);
/*!\brief Send first pass stats to the external rate control model callback
* prototype
*
* This callback is invoked by the encoder to send first pass stats to the
* external rate control model.
*
* \param[in] rate_ctrl_model rate control model
* \param[in] first_pass_stats first pass stats
*/
typedef vpx_rc_status_t (*vpx_rc_send_firstpass_stats_cb_fn_t)(
vpx_rc_model_t rate_ctrl_model,
const vpx_rc_firstpass_stats_t *first_pass_stats);
/*!\brief Receive encode frame decision callback prototype
*
* This callback is invoked by the encoder to receive encode frame decision from
* the external rate control model.
*
* \param[in] rate_ctrl_model rate control model
* \param[in] encode_frame_info information of the coding frame
* \param[out] frame_decision encode decision of the coding frame
*/
typedef vpx_rc_status_t (*vpx_rc_get_encodeframe_decision_cb_fn_t)(
vpx_rc_model_t rate_ctrl_model,
const vpx_rc_encodeframe_info_t *encode_frame_info,
vpx_rc_encodeframe_decision_t *frame_decision);
/*!\brief Update encode frame result callback prototype
*
* This callback is invoked by the encoder to update encode frame result to the
* external rate control model.
*
* \param[in] rate_ctrl_model rate control model
* \param[out] encode_frame_result encode result of the coding frame
*/
typedef vpx_rc_status_t (*vpx_rc_update_encodeframe_result_cb_fn_t)(
vpx_rc_model_t rate_ctrl_model,
const vpx_rc_encodeframe_result_t *encode_frame_result);
/*!\brief Delete the external rate control model callback prototype
*
* This callback is invoked by the encoder to delete the external rate control
* model.
*
* \param[in] rate_ctrl_model rate control model
*/
typedef vpx_rc_status_t (*vpx_rc_delete_model_cb_fn_t)(
vpx_rc_model_t rate_ctrl_model);
/*!\brief Callback function set for external rate control.
*
* The user can enable external rate control by registering
* a set of callback functions with the codec control flag
* VP9E_SET_EXTERNAL_RATE_CONTROL.
*/
typedef struct vpx_rc_funcs {
/*!
* Create an external rate control model.
*/
vpx_rc_create_model_cb_fn_t create_model;
/*!
* Send first pass stats to the external rate control model.
*/
vpx_rc_send_firstpass_stats_cb_fn_t send_firstpass_stats;
/*!
* Get encodeframe decision from the external rate control model.
*/
vpx_rc_get_encodeframe_decision_cb_fn_t get_encodeframe_decision;
/*!
* Update encodeframe result to the external rate control model.
*/
vpx_rc_update_encodeframe_result_cb_fn_t update_encodeframe_result;
/*!
* Delete the external rate control model.
*/
vpx_rc_delete_model_cb_fn_t delete_model;
/*!
* Private data for the external rate control model.
*/
void *priv;
} vpx_rc_funcs_t;
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VPX_VPX_VPX_EXT_RATECTRL_H_

View file

@ -43,6 +43,7 @@ typedef enum vpx_img_fmt {
VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5, VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5,
VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6, VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6,
VPX_IMG_FMT_I440 = VPX_IMG_FMT_PLANAR | 7, VPX_IMG_FMT_I440 = VPX_IMG_FMT_PLANAR | 7,
VPX_IMG_FMT_NV12 = VPX_IMG_FMT_PLANAR | 9,
VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGHBITDEPTH,
VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGHBITDEPTH,
VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGHBITDEPTH,

View file

@ -88,10 +88,10 @@
const uint8_t *psrc_lw_m = (const uint8_t *)(psrc); \ const uint8_t *psrc_lw_m = (const uint8_t *)(psrc); \
uint32_t val_lw_m; \ uint32_t val_lw_m; \
\ \
__asm__ __volatile__("ulw %[val_lw_m], %[psrc_lw_m] \n\t" \ __asm__ __volatile__("lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \
\ "lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \
: [val_lw_m] "=r"(val_lw_m) \ : [val_lw_m] "=&r"(val_lw_m) \
: [psrc_lw_m] "m"(*psrc_lw_m)); \ : [psrc_lw_m] "r"(psrc_lw_m)); \
\ \
val_lw_m; \ val_lw_m; \
}) })
@ -102,10 +102,10 @@
const uint8_t *psrc_ld_m = (const uint8_t *)(psrc); \ const uint8_t *psrc_ld_m = (const uint8_t *)(psrc); \
uint64_t val_ld_m = 0; \ uint64_t val_ld_m = 0; \
\ \
__asm__ __volatile__("uld %[val_ld_m], %[psrc_ld_m] \n\t" \ __asm__ __volatile__("ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \
\ "ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \
: [val_ld_m] "=r"(val_ld_m) \ : [val_ld_m] "=&r"(val_ld_m) \
: [psrc_ld_m] "m"(*psrc_ld_m)); \ : [psrc_ld_m] "r"(psrc_ld_m)); \
\ \
val_ld_m; \ val_ld_m; \
}) })

View file

@ -364,8 +364,9 @@ static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter; mips_reg l_counter = counter;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_REF_ABS_SUB_64 SAD_SRC_REF_ABS_SUB_64
@ -383,6 +384,7 @@ static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }
@ -405,9 +407,11 @@ static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
unsigned int sad; unsigned int sad;
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter; mips_reg l_counter = counter;
mips_reg l_second_pred = (mips_reg)second_pred;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_AVGREF_ABS_SUB_64 SAD_SRC_AVGREF_ABS_SUB_64
@ -424,11 +428,12 @@ static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
[src]"+&r"(src), [ref]"+&r"(ref), [src]"+&r"(src), [ref]"+&r"(ref),
[second_pred]"+&r"((mips_reg)second_pred), [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad) [sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }
@ -450,8 +455,9 @@ static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter; mips_reg l_counter = counter;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_REF_ABS_SUB_32 SAD_SRC_REF_ABS_SUB_32
@ -469,6 +475,7 @@ static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }
@ -493,9 +500,11 @@ static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
unsigned int sad; unsigned int sad;
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter; mips_reg l_counter = counter;
mips_reg l_second_pred = (mips_reg)second_pred;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_AVGREF_ABS_SUB_32 SAD_SRC_AVGREF_ABS_SUB_32
@ -512,11 +521,12 @@ static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
[src]"+&r"(src), [ref]"+&r"(ref), [src]"+&r"(src), [ref]"+&r"(ref),
[second_pred]"+&r"((mips_reg)second_pred), [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad) [sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }
@ -539,8 +549,9 @@ static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter; mips_reg l_counter = counter;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_REF_ABS_SUB_16 SAD_SRC_REF_ABS_SUB_16
@ -558,6 +569,7 @@ static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }
@ -586,9 +598,11 @@ static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
unsigned int sad; unsigned int sad;
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter; mips_reg l_counter = counter;
mips_reg l_second_pred = (mips_reg)second_pred;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_AVGREF_ABS_SUB_16 SAD_SRC_AVGREF_ABS_SUB_16
@ -605,11 +619,12 @@ static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
[src]"+&r"(src), [ref]"+&r"(ref), [src]"+&r"(src), [ref]"+&r"(ref),
[second_pred]"+&r"((mips_reg)second_pred), [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad) [sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }
@ -632,8 +647,9 @@ static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3; double ftmp1, ftmp2, ftmp3;
mips_reg l_counter = counter; mips_reg l_counter = counter;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_REF_ABS_SUB_8 SAD_SRC_REF_ABS_SUB_8
@ -651,6 +667,7 @@ static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }
@ -679,9 +696,11 @@ static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
unsigned int sad; unsigned int sad;
double ftmp1, ftmp2, ftmp3; double ftmp1, ftmp2, ftmp3;
mips_reg l_counter = counter; mips_reg l_counter = counter;
mips_reg l_second_pred = (mips_reg)second_pred;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_AVGREF_ABS_SUB_8 SAD_SRC_AVGREF_ABS_SUB_8
@ -697,11 +716,12 @@ static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
"mfc1 %[sad], %[ftmp3] \n\t" "mfc1 %[sad], %[ftmp3] \n\t"
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
[second_pred]"+&r"((mips_reg)second_pred), [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad) [sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }
@ -724,8 +744,9 @@ static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3; double ftmp1, ftmp2, ftmp3;
mips_reg l_counter = counter; mips_reg l_counter = counter;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_REF_ABS_SUB_4 SAD_SRC_REF_ABS_SUB_4
@ -743,6 +764,7 @@ static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }
@ -767,9 +789,11 @@ static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
unsigned int sad; unsigned int sad;
double ftmp1, ftmp2, ftmp3; double ftmp1, ftmp2, ftmp3;
mips_reg l_counter = counter; mips_reg l_counter = counter;
mips_reg l_second_pred = (mips_reg)second_pred;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
"1: \n\t" "1: \n\t"
// Include two loop body, to reduce loop time. // Include two loop body, to reduce loop time.
SAD_SRC_AVGREF_ABS_SUB_4 SAD_SRC_AVGREF_ABS_SUB_4
@ -785,11 +809,12 @@ static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
"mfc1 %[sad], %[ftmp3] \n\t" "mfc1 %[sad], %[ftmp3] \n\t"
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
[second_pred]"+&r"((mips_reg)second_pred), [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad) [sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride), : [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride) [ref_stride]"r"((mips_reg)ref_stride)
); );
/* clang-format on */
return sad; return sad;
} }

View file

@ -24,7 +24,7 @@ void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
switch (rows) { switch (rows) {
case 4: case 4:
__asm__ volatile( __asm__ volatile(
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
#if _MIPS_SIM == _ABIO32 #if _MIPS_SIM == _ABIO32
"ulw %[tmp0], 0x00(%[src]) \n\t" "ulw %[tmp0], 0x00(%[src]) \n\t"
"mtc1 %[tmp0], %[ftmp1] \n\t" "mtc1 %[tmp0], %[ftmp1] \n\t"
@ -118,7 +118,7 @@ void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
break; break;
case 8: case 8:
__asm__ volatile( __asm__ volatile(
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"li %[tmp0], 0x02 \n\t" "li %[tmp0], 0x02 \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
@ -206,7 +206,7 @@ void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
break; break;
case 16: case 16:
__asm__ volatile( __asm__ volatile(
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"li %[tmp0], 0x08 \n\t" "li %[tmp0], 0x08 \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"

View file

@ -150,7 +150,7 @@ static const uint8_t bilinear_filters[8][2] = {
"psrlh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" \ "psrlh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" \
\ \
/* store: temp2[0] ~ temp2[3] */ \ /* store: temp2[0] ~ temp2[3] */ \
"and %[ftmp2], %[ftmp2], %[mask] \n\t" \ "pand %[ftmp2], %[ftmp2], %[mask] \n\t" \
"packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \
"gssdrc1 %[ftmp2], 0x00(%[temp2_ptr]) \n\t" "gssdrc1 %[ftmp2], 0x00(%[temp2_ptr]) \n\t"
@ -163,7 +163,7 @@ static const uint8_t bilinear_filters[8][2] = {
"psrlh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" \ "psrlh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" \
\ \
/* store: temp2[0] ~ temp2[3] */ \ /* store: temp2[0] ~ temp2[3] */ \
"and %[ftmp4], %[ftmp4], %[mask] \n\t" \ "pand %[ftmp4], %[ftmp4], %[mask] \n\t" \
"packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
"gssdrc1 %[ftmp4], 0x00(%[temp2_ptr]) \n\t" "gssdrc1 %[ftmp4], 0x00(%[temp2_ptr]) \n\t"
@ -225,8 +225,8 @@ static const uint8_t bilinear_filters[8][2] = {
"psrlh %[ftmp3], %[ftmp3], %[ftmp14] \n\t" \ "psrlh %[ftmp3], %[ftmp3], %[ftmp14] \n\t" \
\ \
/* store: temp2[0] ~ temp2[7] */ \ /* store: temp2[0] ~ temp2[7] */ \
"and %[ftmp2], %[ftmp2], %[mask] \n\t" \ "pand %[ftmp2], %[ftmp2], %[mask] \n\t" \
"and %[ftmp3], %[ftmp3], %[mask] \n\t" \ "pand %[ftmp3], %[ftmp3], %[mask] \n\t" \
"packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \
"gssdlc1 %[ftmp2], 0x07(%[temp2_ptr]) \n\t" \ "gssdlc1 %[ftmp2], 0x07(%[temp2_ptr]) \n\t" \
"gssdrc1 %[ftmp2], 0x00(%[temp2_ptr]) \n\t" "gssdrc1 %[ftmp2], 0x00(%[temp2_ptr]) \n\t"
@ -247,8 +247,8 @@ static const uint8_t bilinear_filters[8][2] = {
"psrlh %[ftmp9], %[ftmp9], %[ftmp14] \n\t" \ "psrlh %[ftmp9], %[ftmp9], %[ftmp14] \n\t" \
\ \
/* store: temp2[0] ~ temp2[7] */ \ /* store: temp2[0] ~ temp2[7] */ \
"and %[ftmp8], %[ftmp8], %[mask] \n\t" \ "pand %[ftmp8], %[ftmp8], %[mask] \n\t" \
"and %[ftmp9], %[ftmp9], %[mask] \n\t" \ "pand %[ftmp9], %[ftmp9], %[mask] \n\t" \
"packushb %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ "packushb %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
"gssdlc1 %[ftmp8], 0x07(%[temp2_ptr]) \n\t" \ "gssdlc1 %[ftmp8], 0x07(%[temp2_ptr]) \n\t" \
"gssdrc1 %[ftmp8], 0x00(%[temp2_ptr]) \n\t" "gssdrc1 %[ftmp8], 0x00(%[temp2_ptr]) \n\t"
@ -319,8 +319,8 @@ static const uint8_t bilinear_filters[8][2] = {
"psrlh %[ftmp5], %[ftmp5], %[ftmp14] \n\t" \ "psrlh %[ftmp5], %[ftmp5], %[ftmp14] \n\t" \
\ \
/* store: temp2[8] ~ temp2[15] */ \ /* store: temp2[8] ~ temp2[15] */ \
"and %[ftmp4], %[ftmp4], %[mask] \n\t" \ "pand %[ftmp4], %[ftmp4], %[mask] \n\t" \
"and %[ftmp5], %[ftmp5], %[mask] \n\t" \ "pand %[ftmp5], %[ftmp5], %[mask] \n\t" \
"packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
"gssdlc1 %[ftmp4], 0x0f(%[temp2_ptr]) \n\t" \ "gssdlc1 %[ftmp4], 0x0f(%[temp2_ptr]) \n\t" \
"gssdrc1 %[ftmp4], 0x08(%[temp2_ptr]) \n\t" "gssdrc1 %[ftmp4], 0x08(%[temp2_ptr]) \n\t"
@ -343,8 +343,8 @@ static const uint8_t bilinear_filters[8][2] = {
"psrlh %[ftmp11], %[ftmp11], %[ftmp14] \n\t" \ "psrlh %[ftmp11], %[ftmp11], %[ftmp14] \n\t" \
\ \
/* store: temp2[8] ~ temp2[15] */ \ /* store: temp2[8] ~ temp2[15] */ \
"and %[ftmp10], %[ftmp10], %[mask] \n\t" \ "pand %[ftmp10], %[ftmp10], %[mask] \n\t" \
"and %[ftmp11], %[ftmp11], %[mask] \n\t" \ "pand %[ftmp11], %[ftmp11], %[mask] \n\t" \
"packushb %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ "packushb %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \
"gssdlc1 %[ftmp10], 0x0f(%[temp2_ptr]) \n\t" \ "gssdlc1 %[ftmp10], 0x0f(%[temp2_ptr]) \n\t" \
"gssdrc1 %[ftmp10], 0x08(%[temp2_ptr]) \n\t" "gssdrc1 %[ftmp10], 0x08(%[temp2_ptr]) \n\t"
@ -414,13 +414,14 @@ static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride,
*sse = 0; *sse = 0;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"li %[tmp0], 0x20 \n\t" "li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t"
MMI_L(%[tmp0], %[high], 0x00) MMI_L(%[tmp0], %[high], 0x00)
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" "pxor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "pxor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
@ -478,7 +479,7 @@ static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride,
"mfc1 %[tmp1], %[ftmp9] \n\t" "mfc1 %[tmp1], %[ftmp9] \n\t"
"mfhc1 %[tmp2], %[ftmp9] \n\t" "mfhc1 %[tmp2], %[ftmp9] \n\t"
"addu %[sum], %[tmp1], %[tmp2] \n\t" "addu %[sum], %[tmp1], %[tmp2] \n\t"
"dsrl %[ftmp1], %[ftmp10], %[ftmp11] \n\t" "ssrld %[ftmp1], %[ftmp10], %[ftmp11] \n\t"
"paddw %[ftmp1], %[ftmp1], %[ftmp10] \n\t" "paddw %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
"swc1 %[ftmp1], 0x00(%[sse]) \n\t" "swc1 %[ftmp1], 0x00(%[sse]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
@ -496,6 +497,7 @@ static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse) [high]"r"(&high), [sse]"r"(sse)
: "memory" : "memory"
); );
/* clang-format on */
return *sse - (((int64_t)sum * sum) / (64 * high)); return *sse - (((int64_t)sum * sum) / (64 * high));
} }
@ -519,13 +521,14 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride,
*sse = 0; *sse = 0;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"li %[tmp0], 0x20 \n\t" "li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t"
"li %[tmp0], 0x40 \n\t" "li %[tmp0], 0x40 \n\t"
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" "pxor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "pxor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
@ -559,7 +562,7 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride,
"mfc1 %[tmp1], %[ftmp9] \n\t" "mfc1 %[tmp1], %[ftmp9] \n\t"
"mfhc1 %[tmp2], %[ftmp9] \n\t" "mfhc1 %[tmp2], %[ftmp9] \n\t"
"addu %[sum], %[tmp1], %[tmp2] \n\t" "addu %[sum], %[tmp1], %[tmp2] \n\t"
"dsrl %[ftmp1], %[ftmp10], %[ftmp11] \n\t" "ssrld %[ftmp1], %[ftmp10], %[ftmp11] \n\t"
"paddw %[ftmp1], %[ftmp1], %[ftmp10] \n\t" "paddw %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
"swc1 %[ftmp1], 0x00(%[sse]) \n\t" "swc1 %[ftmp1], 0x00(%[sse]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
@ -577,6 +580,7 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride,
[sse]"r"(sse) [sse]"r"(sse)
: "memory" : "memory"
); );
/* clang-format on */
return *sse - (((int64_t)sum * sum) / 2048); return *sse - (((int64_t)sum * sum) / 2048);
} }
@ -590,14 +594,15 @@ static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
*sse = 0; *sse = 0;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"li %[tmp0], 0x20 \n\t" "li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t"
MMI_L(%[tmp0], %[high], 0x00) MMI_L(%[tmp0], %[high], 0x00)
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "pxor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "pxor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
@ -625,7 +630,7 @@ static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t" "bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "ssrld %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
"paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t"
"swc1 %[ftmp9], 0x00(%[sse]) \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t"
@ -636,7 +641,7 @@ static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
"paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
"psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
"dsrl %[ftmp0], %[ftmp3], %[ftmp11] \n\t" "ssrld %[ftmp0], %[ftmp3], %[ftmp11] \n\t"
"paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
"swc1 %[ftmp0], 0x00(%[sum]) \n\t" "swc1 %[ftmp0], 0x00(%[sum]) \n\t"
@ -653,6 +658,7 @@ static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory" : "memory"
); );
/* clang-format on */
return *sse - (((int64_t)sum * sum) / (32 * high)); return *sse - (((int64_t)sum * sum) / (32 * high));
} }
@ -676,14 +682,15 @@ static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
*sse = 0; *sse = 0;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"li %[tmp0], 0x20 \n\t" "li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t"
MMI_L(%[tmp0], %[high], 0x00) MMI_L(%[tmp0], %[high], 0x00)
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "pxor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "pxor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
@ -701,7 +708,7 @@ static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t" "bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "ssrld %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
"paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t"
"swc1 %[ftmp9], 0x00(%[sse]) \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t"
@ -712,7 +719,7 @@ static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
"paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
"psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
"dsrl %[ftmp0], %[ftmp3], %[ftmp11] \n\t" "ssrld %[ftmp0], %[ftmp3], %[ftmp11] \n\t"
"paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
"swc1 %[ftmp0], 0x00(%[sum]) \n\t" "swc1 %[ftmp0], 0x00(%[sum]) \n\t"
@ -729,6 +736,7 @@ static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory" : "memory"
); );
/* clang-format on */
return *sse - (((int64_t)sum * sum) / (16 * high)); return *sse - (((int64_t)sum * sum) / (16 * high));
} }
@ -753,14 +761,15 @@ static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
*sse = 0; *sse = 0;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"li %[tmp0], 0x20 \n\t" "li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t"
MMI_L(%[tmp0], %[high], 0x00) MMI_L(%[tmp0], %[high], 0x00)
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "pxor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "pxor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
@ -773,7 +782,7 @@ static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t" "bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "ssrld %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
"paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t"
"swc1 %[ftmp9], 0x00(%[sse]) \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t"
@ -784,7 +793,7 @@ static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
"paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
"psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
"dsrl %[ftmp0], %[ftmp3], %[ftmp11] \n\t" "ssrld %[ftmp0], %[ftmp3], %[ftmp11] \n\t"
"paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
"swc1 %[ftmp0], 0x00(%[sum]) \n\t" "swc1 %[ftmp0], 0x00(%[sum]) \n\t"
@ -801,6 +810,7 @@ static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory" : "memory"
); );
/* clang-format on */
return *sse - (((int64_t)sum * sum) / (8 * high)); return *sse - (((int64_t)sum * sum) / (8 * high));
} }
@ -825,14 +835,15 @@ static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
*sse = 0; *sse = 0;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"li %[tmp0], 0x20 \n\t" "li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp10] \n\t" "mtc1 %[tmp0], %[ftmp10] \n\t"
MMI_L(%[tmp0], %[high], 0x00) MMI_L(%[tmp0], %[high], 0x00)
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t" "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
@ -845,7 +856,7 @@ static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t" "bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp6], %[ftmp10] \n\t" "ssrld %[ftmp9], %[ftmp6], %[ftmp10] \n\t"
"paddw %[ftmp9], %[ftmp9], %[ftmp6] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp6] \n\t"
"swc1 %[ftmp9], 0x00(%[sse]) \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t"
@ -856,7 +867,7 @@ static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
"paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
"psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
"dsrl %[ftmp0], %[ftmp3], %[ftmp10] \n\t" "ssrld %[ftmp0], %[ftmp3], %[ftmp10] \n\t"
"paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
"swc1 %[ftmp0], 0x00(%[sum]) \n\t" "swc1 %[ftmp0], 0x00(%[sum]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
@ -872,6 +883,7 @@ static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory" : "memory"
); );
/* clang-format on */
return *sse - (((int64_t)sum * sum) / (4 * high)); return *sse - (((int64_t)sum * sum) / (4 * high));
} }
@ -894,12 +906,13 @@ static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride,
*sse = 0; *sse = 0;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"li %[tmp0], 0x20 \n\t" "li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t"
MMI_L(%[tmp0], %[high], 0x00) MMI_L(%[tmp0], %[high], 0x00)
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"1: \n\t" "1: \n\t"
VARIANCE_SSE_16 VARIANCE_SSE_16
@ -909,7 +922,7 @@ static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride,
MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t" "bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "ssrld %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
"paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t"
"swc1 %[ftmp9], 0x00(%[sse]) \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
@ -925,6 +938,7 @@ static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse) [high]"r"(&high), [sse]"r"(sse)
: "memory" : "memory"
); );
/* clang-format on */
return *sse; return *sse;
} }
@ -947,12 +961,13 @@ static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride,
*sse = 0; *sse = 0;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"li %[tmp0], 0x20 \n\t" "li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t"
MMI_L(%[tmp0], %[high], 0x00) MMI_L(%[tmp0], %[high], 0x00)
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"1: \n\t" "1: \n\t"
VARIANCE_SSE_8 VARIANCE_SSE_8
@ -962,7 +977,7 @@ static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride,
MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t" "bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "ssrld %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
"paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t"
"swc1 %[ftmp9], 0x00(%[sse]) \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
@ -978,6 +993,7 @@ static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse) [high]"r"(&high), [sse]"r"(sse)
: "memory" : "memory"
); );
/* clang-format on */
return *sse; return *sse;
} }
@ -1021,22 +1037,39 @@ static inline void var_filter_block2d_bil_16x(const uint8_t *src_ptr,
uint8_t *temp2_ptr = temp2; uint8_t *temp2_ptr = temp2;
mips_reg l_counter = counter; mips_reg l_counter = counter;
double ftmp[15]; double ftmp[15];
double ff_ph_40, mask;
double filter_x0, filter_x1, filter_y0, filter_y1;
mips_reg tmp[2]; mips_reg tmp[2];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; uint64_t x0, x1, y0, y1, all;
DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL };
const uint8_t *filter_x = bilinear_filters[x_offset]; const uint8_t *filter_x = bilinear_filters[x_offset];
const uint8_t *filter_y = bilinear_filters[y_offset]; const uint8_t *filter_y = bilinear_filters[y_offset];
x0 = (uint64_t)filter_x[0];
x1 = (uint64_t)filter_x[1];
y0 = (uint64_t)filter_y[0];
y1 = (uint64_t)filter_y[1];
all = x0 | x1 << 8 | y0 << 16 | y1 << 24;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
MMI_MTC1(%[all], %[ftmp14])
"punpcklbh %[ftmp14], %[ftmp14], %[ftmp0] \n\t"
"pshufh %[filter_x0], %[ftmp14], %[ftmp0] \n\t"
MMI_LI(%[tmp0], 0x10)
MMI_MTC1(%[tmp0], %[mask])
"ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
"pshufh %[filter_x1], %[ftmp14], %[ftmp0] \n\t"
"ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
"pshufh %[filter_y0], %[ftmp14], %[ftmp0] \n\t"
"ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
"pshufh %[filter_y1], %[ftmp14], %[ftmp0] \n\t"
MMI_LI(%[tmp0], 0x07) MMI_LI(%[tmp0], 0x07)
MMI_MTC1(%[tmp0], %[ftmp14]) MMI_MTC1(%[tmp0], %[ftmp14])
"pshufh %[filter_x0], %[filter_x0], %[ftmp0] \n\t" MMI_LI(%[tmp0], 0x0040004000400040)
"pshufh %[filter_x1], %[filter_x1], %[ftmp0] \n\t" MMI_MTC1(%[tmp0], %[ff_ph_40])
"pshufh %[filter_y0], %[filter_y0], %[ftmp0] \n\t" MMI_LI(%[tmp0], 0x00ff00ff00ff00ff)
"pshufh %[filter_y1], %[filter_y1], %[ftmp0] \n\t" MMI_MTC1(%[tmp0], %[mask])
// fdata3: fdata3[0] ~ fdata3[15] // fdata3: fdata3[0] ~ fdata3[15]
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A
@ -1072,15 +1105,13 @@ static inline void var_filter_block2d_bil_16x(const uint8_t *src_ptr,
[ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
[ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]), [ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]),
[tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr),
[counter]"+&r"(l_counter) [counter]"+&r"(l_counter), [ff_ph_40] "=&f"(ff_ph_40), [mask] "=&f"(mask),
: [filter_x0] "f"((uint64_t)filter_x[0]), [filter_x0] "=&f"(filter_x0), [filter_x1] "=&f"(filter_x1),
[filter_x1] "f"((uint64_t)filter_x[1]), [filter_y0] "=&f"(filter_y0), [filter_y1] "=&f"(filter_y1)
[filter_y0] "f"((uint64_t)filter_y[0]), : [src_stride] "r"((mips_reg)src_stride), [all] "r"(all)
[filter_y1] "f"((uint64_t)filter_y[1]),
[src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40),
[mask] "f"(mask)
: "memory" : "memory"
); );
/* clang-format on */
} }
#define SUBPIX_VAR16XN(H) \ #define SUBPIX_VAR16XN(H) \
@ -1105,19 +1136,38 @@ static inline void var_filter_block2d_bil_8x(const uint8_t *src_ptr,
mips_reg l_counter = counter; mips_reg l_counter = counter;
double ftmp[15]; double ftmp[15];
mips_reg tmp[2]; mips_reg tmp[2];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; double ff_ph_40, mask;
DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL }; uint64_t x0, x1, y0, y1, all;
double filter_x0, filter_x1, filter_y0, filter_y1;
const uint8_t *filter_x = bilinear_filters[x_offset]; const uint8_t *filter_x = bilinear_filters[x_offset];
const uint8_t *filter_y = bilinear_filters[y_offset]; const uint8_t *filter_y = bilinear_filters[y_offset];
x0 = (uint64_t)filter_x[0];
x1 = (uint64_t)filter_x[1];
y0 = (uint64_t)filter_y[0];
y1 = (uint64_t)filter_y[1];
all = x0 | x1 << 8 | y0 << 16 | y1 << 24;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
MMI_MTC1(%[all], %[ftmp14])
"punpcklbh %[ftmp14], %[ftmp14], %[ftmp0] \n\t"
"pshufh %[filter_x0], %[ftmp14], %[ftmp0] \n\t"
MMI_LI(%[tmp0], 0x10)
MMI_MTC1(%[tmp0], %[mask])
"ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
"pshufh %[filter_x1], %[ftmp14], %[ftmp0] \n\t"
"ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
"pshufh %[filter_y0], %[ftmp14], %[ftmp0] \n\t"
"ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
"pshufh %[filter_y1], %[ftmp14], %[ftmp0] \n\t"
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
MMI_LI(%[tmp0], 0x07) MMI_LI(%[tmp0], 0x07)
MMI_MTC1(%[tmp0], %[ftmp14]) MMI_MTC1(%[tmp0], %[ftmp14])
"pshufh %[filter_x0], %[filter_x0], %[ftmp0] \n\t" MMI_LI(%[tmp0], 0x0040004000400040)
"pshufh %[filter_x1], %[filter_x1], %[ftmp0] \n\t" MMI_MTC1(%[tmp0], %[ff_ph_40])
"pshufh %[filter_y0], %[filter_y0], %[ftmp0] \n\t" MMI_LI(%[tmp0], 0x00ff00ff00ff00ff)
"pshufh %[filter_y1], %[filter_y1], %[ftmp0] \n\t" MMI_MTC1(%[tmp0], %[mask])
// fdata3: fdata3[0] ~ fdata3[7] // fdata3: fdata3[0] ~ fdata3[7]
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A
@ -1154,15 +1204,13 @@ static inline void var_filter_block2d_bil_8x(const uint8_t *src_ptr,
[ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
[ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]), [ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]),
[tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr),
[counter]"+&r"(l_counter) [counter]"+&r"(l_counter), [ff_ph_40] "=&f"(ff_ph_40), [mask] "=&f"(mask),
: [filter_x0] "f"((uint64_t)filter_x[0]), [filter_x0] "=&f"(filter_x0), [filter_x1] "=&f"(filter_x1),
[filter_x1] "f"((uint64_t)filter_x[1]), [filter_y0] "=&f"(filter_y0), [filter_y1] "=&f"(filter_y1)
[filter_y0] "f"((uint64_t)filter_y[0]), : [src_stride] "r"((mips_reg)src_stride), [all] "r"(all)
[filter_y1] "f"((uint64_t)filter_y[1]),
[src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40),
[mask] "f"(mask)
: "memory" : "memory"
); );
/* clang-format on */
} }
#define SUBPIX_VAR8XN(H) \ #define SUBPIX_VAR8XN(H) \
@ -1188,19 +1236,38 @@ static inline void var_filter_block2d_bil_4x(const uint8_t *src_ptr,
mips_reg l_counter = counter; mips_reg l_counter = counter;
double ftmp[7]; double ftmp[7];
mips_reg tmp[2]; mips_reg tmp[2];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; double ff_ph_40, mask;
DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL }; uint64_t x0, x1, y0, y1, all;
double filter_x0, filter_x1, filter_y0, filter_y1;
const uint8_t *filter_x = bilinear_filters[x_offset]; const uint8_t *filter_x = bilinear_filters[x_offset];
const uint8_t *filter_y = bilinear_filters[y_offset]; const uint8_t *filter_y = bilinear_filters[y_offset];
x0 = (uint64_t)filter_x[0];
x1 = (uint64_t)filter_x[1];
y0 = (uint64_t)filter_y[0];
y1 = (uint64_t)filter_y[1];
all = x0 | x1 << 8 | y0 << 16 | y1 << 24;
/* clang-format off */
__asm__ volatile ( __asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
MMI_MTC1(%[all], %[ftmp6])
"punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"pshufh %[filter_x0], %[ftmp6], %[ftmp0] \n\t"
MMI_LI(%[tmp0], 0x10)
MMI_MTC1(%[tmp0], %[mask])
"ssrld %[ftmp6], %[ftmp6], %[mask] \n\t"
"pshufh %[filter_x1], %[ftmp6], %[ftmp0] \n\t"
"ssrld %[ftmp6], %[ftmp6], %[mask] \n\t"
"pshufh %[filter_y0], %[ftmp6], %[ftmp0] \n\t"
"ssrld %[ftmp6], %[ftmp6], %[mask] \n\t"
"pshufh %[filter_y1], %[ftmp6], %[ftmp0] \n\t"
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
MMI_LI(%[tmp0], 0x07) MMI_LI(%[tmp0], 0x07)
MMI_MTC1(%[tmp0], %[ftmp6]) MMI_MTC1(%[tmp0], %[ftmp6])
"pshufh %[filter_x0], %[filter_x0], %[ftmp0] \n\t" MMI_LI(%[tmp0], 0x0040004000400040)
"pshufh %[filter_x1], %[filter_x1], %[ftmp0] \n\t" MMI_MTC1(%[tmp0], %[ff_ph_40])
"pshufh %[filter_y0], %[filter_y0], %[ftmp0] \n\t" MMI_LI(%[tmp0], 0x00ff00ff00ff00ff)
"pshufh %[filter_y1], %[filter_y1], %[ftmp0] \n\t" MMI_MTC1(%[tmp0], %[mask])
// fdata3: fdata3[0] ~ fdata3[3] // fdata3: fdata3[0] ~ fdata3[3]
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A
@ -1232,15 +1299,14 @@ static inline void var_filter_block2d_bil_4x(const uint8_t *src_ptr,
: [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
[ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
[ftmp6] "=&f"(ftmp[6]), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [ftmp6] "=&f"(ftmp[6]), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr),
[temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter) [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter),
: [filter_x0] "f"((uint64_t)filter_x[0]), [ff_ph_40] "=&f"(ff_ph_40), [mask] "=&f"(mask),
[filter_x1] "f"((uint64_t)filter_x[1]), [filter_x0] "=&f"(filter_x0), [filter_x1] "=&f"(filter_x1),
[filter_y0] "f"((uint64_t)filter_y[0]), [filter_y0] "=&f"(filter_y0), [filter_y1] "=&f"(filter_y1)
[filter_y1] "f"((uint64_t)filter_y[1]), : [src_stride] "r"((mips_reg)src_stride), [all] "r"(all)
[src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40),
[mask] "f"(mask)
: "memory" : "memory"
); );
/* clang-format on */
} }
#define SUBPIX_VAR4XN(H) \ #define SUBPIX_VAR4XN(H) \

View file

@ -105,7 +105,7 @@ static void convolve_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride,
/* clang-format off */ /* clang-format off */
__asm__ volatile( __asm__ volatile(
"move %[tmp1], %[width] \n\t" "move %[tmp1], %[width] \n\t"
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gsldlc1 %[filter1], 0x03(%[filter]) \n\t" "gsldlc1 %[filter1], 0x03(%[filter]) \n\t"
"gsldrc1 %[filter1], 0x00(%[filter]) \n\t" "gsldrc1 %[filter1], 0x00(%[filter]) \n\t"
"gsldlc1 %[filter2], 0x0b(%[filter]) \n\t" "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t"
@ -178,7 +178,7 @@ static void convolve_vert_mmi(const uint8_t *src, ptrdiff_t src_stride,
(void)y_step_q4; (void)y_step_q4;
__asm__ volatile( __asm__ volatile(
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t" "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t"
"gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t" "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t"
@ -271,7 +271,7 @@ static void convolve_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride,
__asm__ volatile( __asm__ volatile(
"move %[tmp1], %[width] \n\t" "move %[tmp1], %[width] \n\t"
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gsldlc1 %[filter1], 0x03(%[filter]) \n\t" "gsldlc1 %[filter1], 0x03(%[filter]) \n\t"
"gsldrc1 %[filter1], 0x00(%[filter]) \n\t" "gsldrc1 %[filter1], 0x00(%[filter]) \n\t"
"gsldlc1 %[filter2], 0x0b(%[filter]) \n\t" "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t"
@ -354,7 +354,7 @@ static void convolve_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride,
(void)y_step_q4; (void)y_step_q4;
__asm__ volatile( __asm__ volatile(
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t" "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t"
"gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t" "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t"
@ -467,7 +467,7 @@ void vpx_convolve_avg_mmi(const uint8_t *src, ptrdiff_t src_stride,
__asm__ volatile( __asm__ volatile(
"move %[tmp1], %[width] \n\t" "move %[tmp1], %[width] \n\t"
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"li %[tmp0], 0x10001 \n\t" "li %[tmp0], 0x10001 \n\t"
MMI_MTC1(%[tmp0], %[ftmp3]) MMI_MTC1(%[tmp0], %[ftmp3])
"punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t"

View file

@ -16,7 +16,7 @@ SECTION .text
;void vpx_plane_add_noise_sse2(uint8_t *start, const int8_t *noise, ;void vpx_plane_add_noise_sse2(uint8_t *start, const int8_t *noise,
; int blackclamp, int whiteclamp, ; int blackclamp, int whiteclamp,
; int width, int height, int pitch) ; int width, int height, int pitch)
global sym(vpx_plane_add_noise_sse2) PRIVATE globalsym(vpx_plane_add_noise_sse2)
sym(vpx_plane_add_noise_sse2): sym(vpx_plane_add_noise_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -95,7 +95,7 @@ SECTION .text
; int *flimits, ; int *flimits,
; int size ; int size
;) ;)
global sym(vpx_post_proc_down_and_across_mb_row_sse2) PRIVATE globalsym(vpx_post_proc_down_and_across_mb_row_sse2)
sym(vpx_post_proc_down_and_across_mb_row_sse2): sym(vpx_post_proc_down_and_across_mb_row_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -235,7 +235,7 @@ sym(vpx_post_proc_down_and_across_mb_row_sse2):
;void vpx_mbpost_proc_across_ip_sse2(unsigned char *src, ;void vpx_mbpost_proc_across_ip_sse2(unsigned char *src,
; int pitch, int rows, int cols,int flimit) ; int pitch, int rows, int cols,int flimit)
global sym(vpx_mbpost_proc_across_ip_sse2) PRIVATE globalsym(vpx_mbpost_proc_across_ip_sse2)
sym(vpx_mbpost_proc_across_ip_sse2): sym(vpx_mbpost_proc_across_ip_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -22,7 +22,7 @@ SECTION .text
; unsigned int * SSE, ; unsigned int * SSE,
; int * Sum ; int * Sum
;) ;)
global sym(vpx_highbd_calc16x16var_sse2) PRIVATE globalsym(vpx_highbd_calc16x16var_sse2)
sym(vpx_highbd_calc16x16var_sse2): sym(vpx_highbd_calc16x16var_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -175,7 +175,7 @@ sym(vpx_highbd_calc16x16var_sse2):
; unsigned int * SSE, ; unsigned int * SSE,
; int * Sum ; int * Sum
;) ;)
global sym(vpx_highbd_calc8x8var_sse2) PRIVATE globalsym(vpx_highbd_calc8x8var_sse2)
sym(vpx_highbd_calc8x8var_sse2): sym(vpx_highbd_calc8x8var_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -173,7 +173,7 @@ SECTION .text
; unsigned char *ref_ptr, ; unsigned char *ref_ptr,
; int ref_stride, ; int ref_stride,
; int *results) ; int *results)
global sym(vpx_sad16x16x3_sse3) PRIVATE globalsym(vpx_sad16x16x3_sse3)
sym(vpx_sad16x16x3_sse3): sym(vpx_sad16x16x3_sse3):
STACK_FRAME_CREATE_X3 STACK_FRAME_CREATE_X3
@ -215,7 +215,7 @@ sym(vpx_sad16x16x3_sse3):
; unsigned char *ref_ptr, ; unsigned char *ref_ptr,
; int ref_stride, ; int ref_stride,
; int *results) ; int *results)
global sym(vpx_sad16x8x3_sse3) PRIVATE globalsym(vpx_sad16x8x3_sse3)
sym(vpx_sad16x8x3_sse3): sym(vpx_sad16x8x3_sse3):
STACK_FRAME_CREATE_X3 STACK_FRAME_CREATE_X3
@ -253,7 +253,7 @@ sym(vpx_sad16x8x3_sse3):
; unsigned char *ref_ptr, ; unsigned char *ref_ptr,
; int ref_stride, ; int ref_stride,
; int *results) ; int *results)
global sym(vpx_sad8x16x3_sse3) PRIVATE globalsym(vpx_sad8x16x3_sse3)
sym(vpx_sad8x16x3_sse3): sym(vpx_sad8x16x3_sse3):
STACK_FRAME_CREATE_X3 STACK_FRAME_CREATE_X3
@ -282,7 +282,7 @@ sym(vpx_sad8x16x3_sse3):
; unsigned char *ref_ptr, ; unsigned char *ref_ptr,
; int ref_stride, ; int ref_stride,
; int *results) ; int *results)
global sym(vpx_sad8x8x3_sse3) PRIVATE globalsym(vpx_sad8x8x3_sse3)
sym(vpx_sad8x8x3_sse3): sym(vpx_sad8x8x3_sse3):
STACK_FRAME_CREATE_X3 STACK_FRAME_CREATE_X3
@ -307,7 +307,7 @@ sym(vpx_sad8x8x3_sse3):
; unsigned char *ref_ptr, ; unsigned char *ref_ptr,
; int ref_stride, ; int ref_stride,
; int *results) ; int *results)
global sym(vpx_sad4x4x3_sse3) PRIVATE globalsym(vpx_sad4x4x3_sse3)
sym(vpx_sad4x4x3_sse3): sym(vpx_sad4x4x3_sse3):
STACK_FRAME_CREATE_X3 STACK_FRAME_CREATE_X3

View file

@ -173,7 +173,7 @@ SECTION .text
; const unsigned char *ref_ptr, ; const unsigned char *ref_ptr,
; int ref_stride, ; int ref_stride,
; unsigned short *sad_array); ; unsigned short *sad_array);
global sym(vpx_sad16x16x8_sse4_1) PRIVATE globalsym(vpx_sad16x16x8_sse4_1)
sym(vpx_sad16x16x8_sse4_1): sym(vpx_sad16x16x8_sse4_1):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -214,7 +214,7 @@ sym(vpx_sad16x16x8_sse4_1):
; int ref_stride, ; int ref_stride,
; unsigned short *sad_array ; unsigned short *sad_array
;); ;);
global sym(vpx_sad16x8x8_sse4_1) PRIVATE globalsym(vpx_sad16x8x8_sse4_1)
sym(vpx_sad16x8x8_sse4_1): sym(vpx_sad16x8x8_sse4_1):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -251,7 +251,7 @@ sym(vpx_sad16x8x8_sse4_1):
; int ref_stride, ; int ref_stride,
; unsigned short *sad_array ; unsigned short *sad_array
;); ;);
global sym(vpx_sad8x8x8_sse4_1) PRIVATE globalsym(vpx_sad8x8x8_sse4_1)
sym(vpx_sad8x8x8_sse4_1): sym(vpx_sad8x8x8_sse4_1):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -288,7 +288,7 @@ sym(vpx_sad8x8x8_sse4_1):
; int ref_stride, ; int ref_stride,
; unsigned short *sad_array ; unsigned short *sad_array
;); ;);
global sym(vpx_sad8x16x8_sse4_1) PRIVATE globalsym(vpx_sad8x16x8_sse4_1)
sym(vpx_sad8x16x8_sse4_1): sym(vpx_sad8x16x8_sse4_1):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -329,7 +329,7 @@ sym(vpx_sad8x16x8_sse4_1):
; int ref_stride, ; int ref_stride,
; unsigned short *sad_array ; unsigned short *sad_array
;); ;);
global sym(vpx_sad4x4x8_sse4_1) PRIVATE globalsym(vpx_sad4x4x8_sse4_1)
sym(vpx_sad4x4x8_sse4_1): sym(vpx_sad4x4x8_sse4_1):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -154,7 +154,7 @@ SECTION .text
; unsigned char *ref_ptr, ; unsigned char *ref_ptr,
; int ref_stride, ; int ref_stride,
; int *results) ; int *results)
global sym(vpx_sad16x16x3_ssse3) PRIVATE globalsym(vpx_sad16x16x3_ssse3)
sym(vpx_sad16x16x3_ssse3): sym(vpx_sad16x16x3_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -267,7 +267,7 @@ sym(vpx_sad16x16x3_ssse3):
; unsigned char *ref_ptr, ; unsigned char *ref_ptr,
; int ref_stride, ; int ref_stride,
; int *results) ; int *results)
global sym(vpx_sad16x8x3_ssse3) PRIVATE globalsym(vpx_sad16x8x3_ssse3)
sym(vpx_sad16x8x3_ssse3): sym(vpx_sad16x8x3_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -64,7 +64,7 @@ SECTION .text
; or pavgb At this point this is just meant to be first pass for calculating ; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code. ; in mode selection code.
global sym(vpx_ssim_parms_16x16_sse2) PRIVATE globalsym(vpx_ssim_parms_16x16_sse2)
sym(vpx_ssim_parms_16x16_sse2): sym(vpx_ssim_parms_16x16_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -154,7 +154,7 @@ sym(vpx_ssim_parms_16x16_sse2):
; or pavgb At this point this is just meant to be first pass for calculating ; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code. ; in mode selection code.
global sym(vpx_ssim_parms_8x8_sse2) PRIVATE globalsym(vpx_ssim_parms_8x8_sse2)
sym(vpx_ssim_parms_8x8_sse2): sym(vpx_ssim_parms_8x8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -16,7 +16,7 @@
#include "./vpx_config.h" #include "./vpx_config.h"
static INLINE __m128i transpose_8bit_4x4(const __m128i *const in) { static INLINE __m128i transpose_8bit_4x4(const __m128i *const in) {
// Unpack 16 bit elements. Goes from: // Unpack 8 bit elements. Goes from:
// in[0]: 00 01 02 03 // in[0]: 00 01 02 03
// in[1]: 10 11 12 13 // in[1]: 10 11 12 13
// in[2]: 20 21 22 23 // in[2]: 20 21 22 23
@ -27,7 +27,7 @@ static INLINE __m128i transpose_8bit_4x4(const __m128i *const in) {
const __m128i a0 = _mm_unpacklo_epi8(in[0], in[1]); const __m128i a0 = _mm_unpacklo_epi8(in[0], in[1]);
const __m128i a1 = _mm_unpacklo_epi8(in[2], in[3]); const __m128i a1 = _mm_unpacklo_epi8(in[2], in[3]);
// Unpack 32 bit elements resulting in: // Unpack 16 bit elements resulting in:
// 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
return _mm_unpacklo_epi16(a0, a1); return _mm_unpacklo_epi16(a0, a1);
} }

View file

@ -208,7 +208,7 @@ SECTION .text
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_highbd_filter_block1d4_v8_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d4_v8_sse2)
sym(vpx_highbd_filter_block1d4_v8_sse2): sym(vpx_highbd_filter_block1d4_v8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -278,7 +278,7 @@ sym(vpx_highbd_filter_block1d4_v8_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_highbd_filter_block1d8_v8_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d8_v8_sse2)
sym(vpx_highbd_filter_block1d8_v8_sse2): sym(vpx_highbd_filter_block1d8_v8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -337,7 +337,7 @@ sym(vpx_highbd_filter_block1d8_v8_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_highbd_filter_block1d16_v8_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d16_v8_sse2)
sym(vpx_highbd_filter_block1d16_v8_sse2): sym(vpx_highbd_filter_block1d16_v8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -391,7 +391,7 @@ sym(vpx_highbd_filter_block1d16_v8_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d4_v8_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d4_v8_avg_sse2)
sym(vpx_highbd_filter_block1d4_v8_avg_sse2): sym(vpx_highbd_filter_block1d4_v8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -452,7 +452,7 @@ sym(vpx_highbd_filter_block1d4_v8_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d8_v8_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d8_v8_avg_sse2)
sym(vpx_highbd_filter_block1d8_v8_avg_sse2): sym(vpx_highbd_filter_block1d8_v8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -501,7 +501,7 @@ sym(vpx_highbd_filter_block1d8_v8_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d16_v8_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d16_v8_avg_sse2)
sym(vpx_highbd_filter_block1d16_v8_avg_sse2): sym(vpx_highbd_filter_block1d16_v8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -563,7 +563,7 @@ sym(vpx_highbd_filter_block1d16_v8_avg_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_highbd_filter_block1d4_h8_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d4_h8_sse2)
sym(vpx_highbd_filter_block1d4_h8_sse2): sym(vpx_highbd_filter_block1d4_h8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -638,7 +638,7 @@ sym(vpx_highbd_filter_block1d4_h8_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_highbd_filter_block1d8_h8_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d8_h8_sse2)
sym(vpx_highbd_filter_block1d8_h8_sse2): sym(vpx_highbd_filter_block1d8_h8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -704,7 +704,7 @@ sym(vpx_highbd_filter_block1d8_h8_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_highbd_filter_block1d16_h8_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d16_h8_sse2)
sym(vpx_highbd_filter_block1d16_h8_sse2): sym(vpx_highbd_filter_block1d16_h8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -772,7 +772,7 @@ sym(vpx_highbd_filter_block1d16_h8_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d4_h8_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d4_h8_avg_sse2)
sym(vpx_highbd_filter_block1d4_h8_avg_sse2): sym(vpx_highbd_filter_block1d4_h8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -838,7 +838,7 @@ sym(vpx_highbd_filter_block1d4_h8_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d8_h8_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d8_h8_avg_sse2)
sym(vpx_highbd_filter_block1d8_h8_avg_sse2): sym(vpx_highbd_filter_block1d8_h8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -895,7 +895,7 @@ sym(vpx_highbd_filter_block1d8_h8_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d16_h8_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d16_h8_avg_sse2)
sym(vpx_highbd_filter_block1d16_h8_avg_sse2): sym(vpx_highbd_filter_block1d16_h8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -173,7 +173,7 @@
SECTION .text SECTION .text
global sym(vpx_highbd_filter_block1d4_v2_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d4_v2_sse2)
sym(vpx_highbd_filter_block1d4_v2_sse2): sym(vpx_highbd_filter_block1d4_v2_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -198,7 +198,7 @@ sym(vpx_highbd_filter_block1d4_v2_sse2):
ret ret
%if VPX_ARCH_X86_64 %if VPX_ARCH_X86_64
global sym(vpx_highbd_filter_block1d8_v2_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d8_v2_sse2)
sym(vpx_highbd_filter_block1d8_v2_sse2): sym(vpx_highbd_filter_block1d8_v2_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -224,7 +224,7 @@ sym(vpx_highbd_filter_block1d8_v2_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d16_v2_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d16_v2_sse2)
sym(vpx_highbd_filter_block1d16_v2_sse2): sym(vpx_highbd_filter_block1d16_v2_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -253,7 +253,7 @@ sym(vpx_highbd_filter_block1d16_v2_sse2):
ret ret
%endif %endif
global sym(vpx_highbd_filter_block1d4_v2_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d4_v2_avg_sse2)
sym(vpx_highbd_filter_block1d4_v2_avg_sse2): sym(vpx_highbd_filter_block1d4_v2_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -278,7 +278,7 @@ sym(vpx_highbd_filter_block1d4_v2_avg_sse2):
ret ret
%if VPX_ARCH_X86_64 %if VPX_ARCH_X86_64
global sym(vpx_highbd_filter_block1d8_v2_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d8_v2_avg_sse2)
sym(vpx_highbd_filter_block1d8_v2_avg_sse2): sym(vpx_highbd_filter_block1d8_v2_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -304,7 +304,7 @@ sym(vpx_highbd_filter_block1d8_v2_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d16_v2_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d16_v2_avg_sse2)
sym(vpx_highbd_filter_block1d16_v2_avg_sse2): sym(vpx_highbd_filter_block1d16_v2_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -333,7 +333,7 @@ sym(vpx_highbd_filter_block1d16_v2_avg_sse2):
ret ret
%endif %endif
global sym(vpx_highbd_filter_block1d4_h2_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d4_h2_sse2)
sym(vpx_highbd_filter_block1d4_h2_sse2): sym(vpx_highbd_filter_block1d4_h2_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -359,7 +359,7 @@ sym(vpx_highbd_filter_block1d4_h2_sse2):
ret ret
%if VPX_ARCH_X86_64 %if VPX_ARCH_X86_64
global sym(vpx_highbd_filter_block1d8_h2_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d8_h2_sse2)
sym(vpx_highbd_filter_block1d8_h2_sse2): sym(vpx_highbd_filter_block1d8_h2_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -385,7 +385,7 @@ sym(vpx_highbd_filter_block1d8_h2_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d16_h2_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d16_h2_sse2)
sym(vpx_highbd_filter_block1d16_h2_sse2): sym(vpx_highbd_filter_block1d16_h2_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -414,7 +414,7 @@ sym(vpx_highbd_filter_block1d16_h2_sse2):
ret ret
%endif %endif
global sym(vpx_highbd_filter_block1d4_h2_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d4_h2_avg_sse2)
sym(vpx_highbd_filter_block1d4_h2_avg_sse2): sym(vpx_highbd_filter_block1d4_h2_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -440,7 +440,7 @@ sym(vpx_highbd_filter_block1d4_h2_avg_sse2):
ret ret
%if VPX_ARCH_X86_64 %if VPX_ARCH_X86_64
global sym(vpx_highbd_filter_block1d8_h2_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d8_h2_avg_sse2)
sym(vpx_highbd_filter_block1d8_h2_avg_sse2): sym(vpx_highbd_filter_block1d8_h2_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -466,7 +466,7 @@ sym(vpx_highbd_filter_block1d8_h2_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_highbd_filter_block1d16_h2_avg_sse2) PRIVATE globalsym(vpx_highbd_filter_block1d16_h2_avg_sse2)
sym(vpx_highbd_filter_block1d16_h2_avg_sse2): sym(vpx_highbd_filter_block1d16_h2_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

View file

@ -187,7 +187,7 @@ SECTION .text
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_filter_block1d4_v8_sse2) PRIVATE globalsym(vpx_filter_block1d4_v8_sse2)
sym(vpx_filter_block1d4_v8_sse2): sym(vpx_filter_block1d4_v8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -254,7 +254,7 @@ sym(vpx_filter_block1d4_v8_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_filter_block1d8_v8_sse2) PRIVATE globalsym(vpx_filter_block1d8_v8_sse2)
sym(vpx_filter_block1d8_v8_sse2): sym(vpx_filter_block1d8_v8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -313,7 +313,7 @@ sym(vpx_filter_block1d8_v8_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_filter_block1d16_v8_sse2) PRIVATE globalsym(vpx_filter_block1d16_v8_sse2)
sym(vpx_filter_block1d16_v8_sse2): sym(vpx_filter_block1d16_v8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -367,7 +367,7 @@ sym(vpx_filter_block1d16_v8_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_filter_block1d4_v8_avg_sse2) PRIVATE globalsym(vpx_filter_block1d4_v8_avg_sse2)
sym(vpx_filter_block1d4_v8_avg_sse2): sym(vpx_filter_block1d4_v8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -425,7 +425,7 @@ sym(vpx_filter_block1d4_v8_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_filter_block1d8_v8_avg_sse2) PRIVATE globalsym(vpx_filter_block1d8_v8_avg_sse2)
sym(vpx_filter_block1d8_v8_avg_sse2): sym(vpx_filter_block1d8_v8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -474,7 +474,7 @@ sym(vpx_filter_block1d8_v8_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_filter_block1d16_v8_avg_sse2) PRIVATE globalsym(vpx_filter_block1d16_v8_avg_sse2)
sym(vpx_filter_block1d16_v8_avg_sse2): sym(vpx_filter_block1d16_v8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -536,7 +536,7 @@ sym(vpx_filter_block1d16_v8_avg_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_filter_block1d4_h8_sse2) PRIVATE globalsym(vpx_filter_block1d4_h8_sse2)
sym(vpx_filter_block1d4_h8_sse2): sym(vpx_filter_block1d4_h8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -610,7 +610,7 @@ sym(vpx_filter_block1d4_h8_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_filter_block1d8_h8_sse2) PRIVATE globalsym(vpx_filter_block1d8_h8_sse2)
sym(vpx_filter_block1d8_h8_sse2): sym(vpx_filter_block1d8_h8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -685,7 +685,7 @@ sym(vpx_filter_block1d8_h8_sse2):
; unsigned int output_height, ; unsigned int output_height,
; short *filter ; short *filter
;) ;)
global sym(vpx_filter_block1d16_h8_sse2) PRIVATE globalsym(vpx_filter_block1d16_h8_sse2)
sym(vpx_filter_block1d16_h8_sse2): sym(vpx_filter_block1d16_h8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -771,7 +771,7 @@ sym(vpx_filter_block1d16_h8_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_filter_block1d4_h8_avg_sse2) PRIVATE globalsym(vpx_filter_block1d4_h8_avg_sse2)
sym(vpx_filter_block1d4_h8_avg_sse2): sym(vpx_filter_block1d4_h8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -836,7 +836,7 @@ sym(vpx_filter_block1d4_h8_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_filter_block1d8_h8_avg_sse2) PRIVATE globalsym(vpx_filter_block1d8_h8_avg_sse2)
sym(vpx_filter_block1d8_h8_avg_sse2): sym(vpx_filter_block1d8_h8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -902,7 +902,7 @@ sym(vpx_filter_block1d8_h8_avg_sse2):
pop rbp pop rbp
ret ret
global sym(vpx_filter_block1d16_h8_avg_sse2) PRIVATE globalsym(vpx_filter_block1d16_h8_avg_sse2)
sym(vpx_filter_block1d16_h8_avg_sse2): sym(vpx_filter_block1d16_h8_avg_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp

Some files were not shown because too many files have changed in this diff Show more