Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 15 Jul 2016 01:27:43 +0000 (UTC)
From:      Mikhail Teterin <mi@FreeBSD.org>
To:        ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org
Subject:   svn commit: r418567 - in head/graphics: . lepton lepton/files
Message-ID:  <201607150127.u6F1RhaS087043@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mi
Date: Fri Jul 15 01:27:43 2016
New Revision: 418567
URL: https://svnweb.freebsd.org/changeset/ports/418567

Log:
  Add port of "lepton" -- a tool for manipulating files in LEP-format,
  which was recently open-sourced by Dropbox.
  
  The patch

Added:
  head/graphics/lepton/
  head/graphics/lepton/Makefile   (contents, props changed)
  head/graphics/lepton/distinfo   (contents, props changed)
  head/graphics/lepton/files/
  head/graphics/lepton/files/patch-base-dependencies   (contents, props changed)
  head/graphics/lepton/files/patch-bsd   (contents, props changed)
  head/graphics/lepton/files/patch-sse2   (contents, props changed)
  head/graphics/lepton/pkg-descr   (contents, props changed)
Modified:
  head/graphics/Makefile

Modified: head/graphics/Makefile
==============================================================================
--- head/graphics/Makefile	Fri Jul 15 01:10:15 2016	(r418566)
+++ head/graphics/Makefile	Fri Jul 15 01:27:43 2016	(r418567)
@@ -460,6 +460,7 @@
     SUBDIR += lcms2
     SUBDIR += leafpak
     SUBDIR += lensfun
+    SUBDIR += lepton
     SUBDIR += leptonica
     SUBDIR += lfview
     SUBDIR += lib3ds

Added: head/graphics/lepton/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/graphics/lepton/Makefile	Fri Jul 15 01:27:43 2016	(r418567)
@@ -0,0 +1,30 @@
+# Created by: Mikhail T. <mi@aldan.algebra.com>
+# $FreeBSD$
+
+PORTNAME=	lepton
+PORTVERSION=	0.2016.07.16
+CATEGORIES=	graphics devel
+
+MAINTAINER=	mi@aldan.algebra.com
+COMMENT=	Tool for manipulating files in LEP image format
+
+LICENSE=	APACHE20
+
+USE_GITHUB=	yes
+GH_ACCOUNT=	dropbox
+GH_TAGNAME=	a34ee2f4b0a6454eff8ebe334750dd008d57de35
+
+USES=		autoreconf
+GNU_CONFIGURE=	yes
+TEST_TARGET=	check
+CFLAGS+=	-DBSD
+.ifndef WITH_DEBUG
+CFLAGS+=	-DNDEBUG
+.endif
+EXTRACT_AFTER_ARGS=--no-same-owner --no-same-permissions --exclude dependencies
+PLIST_FILES=	bin/lepton
+
+do-install:
+	${INSTALL_PROGRAM} ${WRKSRC}/lepton ${STAGEDIR}${PREFIX}/bin
+
+.include <bsd.port.mk>

Added: head/graphics/lepton/distinfo
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/graphics/lepton/distinfo	Fri Jul 15 01:27:43 2016	(r418567)
@@ -0,0 +1,3 @@
+TIMESTAMP = 1468524984
+SHA256 (dropbox-lepton-0.2016.07.16-a34ee2f4b0a6454eff8ebe334750dd008d57de35_GH0.tar.gz) = 9e1941c7cc72b50a20f4f7a9495df42f7978d4cd1b9764c74787af997bd386f9
+SIZE (dropbox-lepton-0.2016.07.16-a34ee2f4b0a6454eff8ebe334750dd008d57de35_GH0.tar.gz) = 50216342

Added: head/graphics/lepton/files/patch-base-dependencies
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/graphics/lepton/files/patch-base-dependencies	Fri Jul 15 01:27:43 2016	(r418567)
@@ -0,0 +1,99 @@
+Do not use the zlib and md5 implementations bundled by
+the author(s). Use base-system's...
+
+	-mi
+
++++ src/io/ZlibCompression.hh
+@@ -28,7 +28,7 @@
+  */
+ 
+ #include "Reader.hh"
+-#include "../../dependencies/zlib/zlib.h"
++#include <zlib.h>
+ namespace Sirikata{
+ class SIRIKATA_EXPORT ZlibDecoderDecompressionReader : public DecoderReader {
+ protected:
++++ src/lepton/bitops.cc
+@@ -38,7 +38,7 @@ reading and writing of arrays
+ #include <algorithm>
+ #include <assert.h>
+ extern "C" {
+-#include "../../dependencies/md5/md5.h"
++#include <openssl/md5.h>
+ }
+ #include "bitops.hh"
+ 
++++ src/io/ioutil.cc
+@@ -12,5 +12,5 @@
+ #include "Reader.hh"
+ #include "ioutil.hh"
+-#include "../../dependencies/md5/md5.h"
++#include <openssl/md5.h>
+ #ifdef _WIN32
+ #include <Windows.h>
++++ Makefile.am
+@@ -1,13 +1,13 @@
+-includes = -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/src/vp8/util -I$(srcdir)/src/vp8/model -I$(srcdir)/src/vp8/decoder -I$(srcdir)/src/vp8/encoder -I$(srcdir)/dependencies -I$(srcdir)/dependencies -I$(srcdir)/dependencies/xz/src/liblzma/common -I$(srcdir)/dependencies/xz/src/common -I$(srcdir)/dependencies/xz/src/liblzma/lzma -I$(srcdir)/dependencies/xz/src/liblzma/lz -I$(srcdir)/dependencies/xz/src/liblzma/check -I$(srcdir)/dependencies/xz/src/liblzma/rangecoder -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/dependencies/xz/src/liblzma/simple -I$(srcdir)/dependencies/xz/src/liblzma/delta -I$(srcdir)/dependencies/xz/src/liblzma
++includes = -I$(srcdir)/src/vp8/util -I$(srcdir)/src/vp8/model -I$(srcdir)/src/vp8/decoder -I$(srcdir)/src/vp8/encoder
+ 
+-AM_CXXFLAGS = $(CXX11_FLAGS) $(PICKY_CXXFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) -DGIT_REVISION=\"$(shell git describe --dirty --always)\" $(includes)
++AM_CXXFLAGS = $(CXX11_FLAGS) $(PICKY_CXXFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) $(includes)
+ 
+ AM_CFLAGS = $(C99_FLAGS) $(PICKY_CFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS)
+ 
+-noinst_LIBRARIES = liblocalzlib.a libtestdriver.a liblocalmd5.a
++noinst_LIBRARIES = libtestdriver.a
+ 
+ bin_PROGRAMS = lepton test_suite/test_invariants
+ 
+-lepton_LDADD = liblocalmd5.a liblocalzlib.a -lpthread
++lepton_LDADD = -lz -lcrypto -lpthread
+ 
+ lepton_SOURCES = \
+@@ -80,5 +80,4 @@
+    src/vp8/model/model.cc \
+    src/vp8/model/model.hh \
+-   src/vp8/model/numeric.cc \
+    src/vp8/model/numeric.hh \
+    src/vp8/model/jpeg_meta.hh \
+@@ -94,33 +93,4 @@
+    src/vp8/decoder/vpx_bool_reader.hh
+ 
+-liblocalmd5_a_SOURCES = dependencies/md5/md5.c
+-
+-liblocalzlib_a_SOURCES = dependencies/zlib/inflate.c \
+-dependencies/zlib/inflate.h \
+-dependencies/zlib/gzguts.h \
+-dependencies/zlib/infback.c \
+-dependencies/zlib/trees.c \
+-dependencies/zlib/adler32.c \
+-dependencies/zlib/gzclose.c \
+-dependencies/zlib/inftrees.h \
+-dependencies/zlib/zconf.h \
+-dependencies/zlib/compress.c \
+-dependencies/zlib/crc32.c \
+-dependencies/zlib/crc32.h \
+-dependencies/zlib/trees.h \
+-dependencies/zlib/inftrees.c \
+-dependencies/zlib/zutil.c \
+-dependencies/zlib/gzwrite.c \
+-dependencies/zlib/zutil.h \
+-dependencies/zlib/zlib.h \
+-dependencies/zlib/inffixed.h \
+-dependencies/zlib/deflate.c \
+-dependencies/zlib/inffast.h \
+-dependencies/zlib/inffast.c \
+-dependencies/zlib/uncompr.c \
+-dependencies/zlib/gzread.c \
+-dependencies/zlib/deflate.h \
+-dependencies/zlib/gzlib.c
+-
+ libtestdriver_a_SOURCES = test_suite/timing_driver.cc
+ 
+@@ -142,5 +112,5 @@
+    src/io/DecoderPlatform.hh
+ 
+-test_suite_test_invariants_LDADD = liblocalzlib.a liblocalmd5.a
++test_suite_test_invariants_LDADD = -lz -lcrypto
+ 
+ check_PROGRAMS = test_suite/test_recode_memory_bound test_suite/test_truncate_lowmem test_suite/test_android_lowmem test_suite/test_invariants test_suite/test_baseline_ujg test_suite/test_baseline test_suite/test_misc test_suite/test_iphone test_suite/test_phone_outdoor test_suite/test_truncate_ujg test_suite/test_truncate test_suite/test_SLR test_suite/test_progressive_ujg test_suite/test_progressive_disallowed test_suite/test_progressive test_suite/test_arithmetic_failfast test_suite/test_hq test_suite/test_baseline_unjailed test_suite/test_baseline_unjailed_thread test_suite/test_baseline_unjailed_decode test_suite/test_baseline_unjailed_decode_thread test_suite/test_seccomp_encode_main test_suite/test_seccomp_encode_thread  test_suite/test_seccomp_decode_main test_suite/test_seccomp_decode_thread test_suite/test_nofsync test_suite/test_colorswap test_suite/test_odd_rst test_suite/test_trailing_header test_suite/test_trailing_rst test_suite/test_gray2sf test_suite/test_truncated
 _zero_run test_suite/test_bad_zero_run

Added: head/graphics/lepton/files/patch-bsd
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/graphics/lepton/files/patch-bsd	Fri Jul 15 01:27:43 2016	(r418567)
@@ -0,0 +1,148 @@
+Make buildable on BSD-systems other than Apple. Mostly this
+simply replaces #ifdef __APPLE__ with #ifdef BSD, but not
+only...
+
+	-mi
+
++++ src/io/DecoderPlatform.hh
+@@ -26,7 +26,7 @@
+  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+-#if defined (__linux) || defined (__APPLE__)
++#if defined (__linux) || defined (BSD)
+ #define SIRIKATA_FUNCTION_EXPORT __attribute__ ((visibility("default")))
+ #define SIRIKATA_EXPORT __attribute__ ((visibility("default")))
+ #define SIRIKATA_PLUGIN_EXPORT __attribute__ ((visibility("default")))
++++ src/io/MemMgrAllocator.cc
+@@ -41,7 +41,7 @@
+ #include <cstdint>
+ #include "DecoderPlatform.hh"
+ #include "MemMgrAllocator.hh"
+-#if (defined(__APPLE__) || __cplusplus <= 199711L) && !defined(_WIN32)
++#if (defined(BSD) || __cplusplus <= 199711L) && !defined(_WIN32)
+ #define THREAD_LOCAL_STORAGE __thread
+ #else
+ #include <atomic>
+@@ -151,7 +151,7 @@ void setup_memmgr(MemMgrState& memmgr, u
+ }
+ void memmgr_init(size_t main_thread_pool_size, size_t worker_thread_pool_size, size_t num_workers, size_t x_min_pool_alloc_quantas, bool needs_huge_pages)
+ {
+-#ifdef __APPLE__
++#ifdef BSD
+     // in apple, the thread_local storage winds up different when destroying the thread
+     num_workers *= 2;
+ #endif
++++ src/lepton/fork_serve.cc
+@@ -10,7 +10,7 @@
+ #include <fcntl.h>
+ #include <unistd.h>
+ #include <algorithm>
+-#ifndef __APPLE__
++#ifndef BSD
+ #include <wait.h>
+ #else
+ #include <sys/wait.h>
++++ src/lepton/socket_serve.cc
+@@ -11,7 +11,7 @@
+ #include <algorithm>
+ #include <netinet/in.h>
+ #include <sys/time.h>
+-#ifndef __APPLE__
++#ifndef BSD
+ #include <sys/signalfd.h>
+ #include <wait.h>
+ #else
+@@ -127,7 +127,7 @@ int should_wait_bitmask(size_t children_
+ 
+ int make_sigchld_fd() {
+     int fd = -1;
+-#ifndef __APPLE__
++#ifndef BSD
+     sigset_t sigset;
+     int err = sigemptyset(&sigset);
+     always_assert(err == 0);
+@@ -233,7 +233,7 @@ void serving_loop(int unix_domain_socket
+             if (fds[i].revents & POLLIN) {
+                 fds[i].revents = 0;
+                 if (fds[i].fd == sigchild_fd) {
+-#ifndef __APPLE__
++#ifndef BSD
+                     struct signalfd_siginfo info;
+                     ssize_t ignore = read(fds[i].fd, &info, sizeof(info));
+                     (void)ignore;
++++ src/vp8/model/jpeg_meta.hh
+@@ -3,7 +3,7 @@
+ 
+ #include <vector>
+ #include <type_traits>
+-#ifndef __APPLE__
++#ifndef BSD
+ #ifndef _WIN32
+ #include <endian.h>
+ #endif
++++ src/vp8/util/memory.cc
+@@ -13,7 +13,7 @@
+ #ifdef _WIN32
+ #define USE_STANDARD_MEMORY_ALLOCATORS
+ #endif
+-#if defined(__APPLE__) || (__cplusplus <= 199711L && !defined(_WIN32))
++#if defined(BSD) || (__cplusplus <= 199711L && !defined(_WIN32))
+ #define THREAD_LOCAL_STORAGE __thread
+ #else
+ #define THREAD_LOCAL_STORAGE thread_local
++++ src/vp8/util/vpx_config.hh
+@@ -44,6 +44,6 @@
+ #ifndef _BSD_SOURCE
+ #define _BSD_SOURCE       /* See feature_test_macros(7) */
+ #endif
+-#include <endian.h>
++#include <sys/endian.h>
+ #endif
+ #endif
++++ test_suite/timing_driver.cc
+@@ -6,11 +6,13 @@
+ #include <unistd.h>
+ #include <assert.h>
+ #include <errno.h>
++#include <stdlib.h>
+ #include <string.h>
+ #include <sys/types.h>
+ #include <sys/wait.h>
+ #include <fcntl.h>
+ #include <sys/select.h>
++#include <sys/signal.h>
+ #include <sys/time.h>
+ #include "smalljpg.hh"
+ 
++++ src/lepton/jpgcoder.cc
+@@ -77,11 +77,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBI
+ int g_argc = 0;
+ const char** g_argv = NULL;
+ #ifndef GIT_REVISION
+-#include "version.hh"
+-#ifndef GIT_REVISION
+ #define GIT_REVISION "unknown"
+ #endif
+-#endif
+ bool fast_exit = true;
+ #ifdef SKIP_VALIDATION
+ bool g_skip_validation = true;
++++ src/io/ioutil.cc
+@@ -325,13 +325,13 @@
+     int input_tee_flags = 0;
+     int copy_to_storage_flags = 0;
+-#ifndef __APPLE__
++#ifndef BSD
+     input_tee_flags = fcntl(input_tee, F_GETFL, 0);
+ #endif
+     fcntl(input_tee, F_SETFL, input_tee_flags | O_NONBLOCK);
+-#ifndef __APPLE__
++#ifndef BSD
+     copy_to_input_tee_flags = fcntl(copy_to_input_tee, F_GETFL, 0);
+ #endif
+     fcntl(copy_to_input_tee, F_SETFL, copy_to_input_tee_flags | O_NONBLOCK);
+-#ifndef __APPLE__
++#ifndef BSD
+     copy_to_storage_flags = fcntl(copy_to_storage, F_GETFL, 0);
+ #endif

Added: head/graphics/lepton/files/patch-sse2
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/graphics/lepton/files/patch-sse2	Fri Jul 15 01:27:43 2016	(r418567)
@@ -0,0 +1,209 @@
+Make -- or attempt to -- the code work on CPUs with only SSE2
+instruction set...
+
+	-mi
+
+--- src/lepton/idct.cc
++++ src/lepton/idct.cc
+@@ -1,8 +1,6 @@
+ /* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+-#include <emmintrin.h>
+-#include <smmintrin.h>
+-#include <immintrin.h>
+ #include "../vp8/util/aligned_block.hh"
++#include "../vp8/util/mm_mullo_epi32.hh"
+ 
+ namespace idct_local{
+ enum {
+@@ -23,7 +21,10 @@ enum {
+     r2 = 181 // 256/sqrt(2)
+ };
+ }
+-void idct_scalar(const AlignedBlock &block, const uint16_t q[64], int16_t outp[64], bool ignore_dc) {
++
++#ifndef __SSE2__
++static void
++idct_scalar(const AlignedBlock &block, const uint16_t q[64], int16_t outp[64], bool ignore_dc) {
+     int32_t intermed[64];
+     using namespace idct_local;
+     // Horizontal 1-D IDCT.
+@@ -149,6 +150,8 @@ void idct_scalar(const AlignedBlock &blo
+         //outp[i]>>=3;
+     }
+ }
++#else /* At least SSE2 is available { */
++
+ template<int which_vec, int offset, int stride> __m128i vget_raster(const AlignedBlock&block) {
+     return _mm_set_epi32(block.coefficients_raster(which_vec + 3 * stride + offset),
+                          block.coefficients_raster(which_vec + 2 * stride + offset),
+@@ -162,8 +165,8 @@ template<int offset, int stride> __m128i
+                                               q[which_vec + offset]));
+ }
+ 
+-
+-__m128i epi32l_to_epi16(__m128i lowvec) {
++static __m128i
++epi32l_to_epi16(__m128i lowvec) {
+     return _mm_shuffle_epi8(lowvec, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
+                                                  0xd, 0xc, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
+ }
+@@ -181,9 +184,8 @@ __m128i epi32l_to_epi16(__m128i lowvec) 
+     }while(0)
+ 
+ 
+-
+-
+-void idct_sse(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
++static void
++idct_sse(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
+     
+     char vintermed_storage[64 * sizeof(int32_t) + 16];
+     // align intermediate storage to 16 bytes
+@@ -202,7 +204,12 @@ void idct_sse(const AlignedBlock &block,
+             xv6 = vget_raster<0, 5, 8>(block);
+             xv7 = vget_raster<0, 3, 8>(block);
+             if (__builtin_expect(ignore_dc, true)) {
++#ifdef __SSE4_1__
+                 xv0 = _mm_insert_epi32(xv0, 0, 0);
++#else
++// See http://stackoverflow.com/questions/38384520/is-there-a-sse2-equivalent-for-mm-insert-epi32
++                xv0 = _mm_insert_epi16(_mm_insert_epi16(xv0, 0, 0), 0, 1);
++#endif
+             }
+         } else {
+             xv0 = vget_raster<32, 0, 8>(block);
+@@ -378,7 +385,8 @@ __m128i m256_to_epi16(__m256i vec) {
+ 
+     }*/
+ #if __AVX2__
+-void idct_avx(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
++static void
++idct_avx(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
+     // align intermediate storage to 16 bytes
+     using namespace idct_local;
+     // Horizontal 1-D IDCT.
+@@ -589,11 +597,16 @@ void idct_avx(const AlignedBlock &block,
+ #endif
+     }
+ }
+-#else
+-void idct_avx(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
+-    idct_sse(block, q, voutp, ignore_dc);
+-}
+ #endif
+-void idct(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
++#endif /* } SSE2 or higher is available */
++
++void
++idct(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
++#ifdef __AVX2__
+     idct_avx(block, q, voutp, ignore_dc);
++#elif __SSE2__
++    idct_sse(block, q, voutp, ignore_dc);
++#else
++    idct_scalar(block, q, voutp, ignore_dc);
++#endif
+ }
+--- src/lepton/vp8_encoder.cc
++++ src/lepton/vp8_encoder.cc
+@@ -150,29 +150,34 @@ void VP8ComponentEncoder::process_row(Pr
+     }
+ }
+ uint32_t aligned_block_cost(const AlignedBlock &block) {
+-    uint32_t cost = 16; // .25 cost for zeros
+-    if (VECTORIZE) {
+-        for (int i = 0; i < 64; i+= 8) {
+-            __m128i val = _mm_abs_epi16(_mm_load_si128((const __m128i*)(const char*)(block.raw_data() + i)));
+-            __m128i v_cost = _mm_set1_epi16(0);
+-            while (!_mm_test_all_zeros(val, val)) {
+-                __m128i mask = _mm_cmpgt_epi16(val, _mm_setzero_si128());
+-                v_cost = _mm_add_epi16(v_cost, _mm_and_si128(mask, _mm_set1_epi16(2)));
+-                val = _mm_srli_epi16(val, 1);
+-            }
+-            __m128i sum = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 8));
+-            sum = _mm_add_epi16(sum ,_mm_srli_si128(sum, 4));
+-            sum = _mm_add_epi16(sum, _mm_srli_si128(sum, 2));
+-            cost += _mm_extract_epi16(sum, 0);
+-        }
+-    } else {
+-        uint32_t scost = 0;
+-        for (int i = 0; i < 64; ++i) {
+-            scost += 1 + 2 * uint16bit_length(abs(block.raw_data()[i]));
++#ifdef __SSE2__ /* SSE2 or higher instruction set available { */
++    const __m128i zero = _mm_setzero_si128();
++     __m128i v_cost;
++    for (int i = 0; i < 64; i+= 8) {
++        __m128i val = _mm_abs_epi16(_mm_load_si128((const __m128i*)(const char*)(block.raw_data() + i)));
++        v_cost = _mm_set1_epi16(0);
++#ifndef __SSE4_1__
++        while (_mm_movemask_epi8(_mm_cmpeq_epi32(val, zero)) != 0xFFFF)
++#else
++        while (!_mm_test_all_zeros(val, val))
++#endif
++        {
++            __m128i mask = _mm_cmpgt_epi16(val, zero);
++            v_cost = _mm_add_epi16(v_cost, _mm_and_si128(mask, _mm_set1_epi16(2)));
++            val = _mm_srli_epi16(val, 1);
+         }
+-        cost = scost;
++        v_cost = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 8));
++        v_cost = _mm_add_epi16(v_cost ,_mm_srli_si128(v_cost, 4));
++        v_cost = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 2));
+     }
+-    return cost;
++    return 16 + _mm_extract_epi16(v_cost, 0);
++#else /* } No SSE2 instructions { */
++    uint32_t scost = 0;
++    for (int i = 0; i < 64; ++i) {
++        scost += 1 + 2 * uint16bit_length(abs(block.raw_data()[i]));
++    }
++    return scost;
++#endif /* } */
+ }
+ 
+ #ifdef ALLOW_FOUR_COLORS
+--- src/vp8/model/model.hh
++++ src/vp8/model/model.hh
+@@ -11,9 +11,7 @@
+ #include "branch.hh"
+ #include "../util/aligned_block.hh"
+ #include "../util/block_based_image.hh"
+-#include <smmintrin.h>
+-#include <immintrin.h>
+-#include <emmintrin.h>
++#include "../util/mm_mullo_epi32.hh"
+ 
+ class BoolEncoder;
+ constexpr bool advanced_dc_prediction = true;
+--- src/vp8/model/numeric.hh
++++ src/vp8/model/numeric.hh
+@@ -8,8 +8,8 @@
+ // for std::min
+ #include <algorithm>
+ #include <assert.h>
+-#include <smmintrin.h>
+-#include <emmintrin.h>
++#include <immintrin.h>
++#include "../util/mm_mullo_epi32.hh"
+ 
+ #ifdef _WIN32
+ #include <intrin.h>
+--- src/vp8/util/mm_mullo_epi32.hh
++++ src/vp8/util/mm_mullo_epi32.hh
+@@ -0,0 +1,16 @@
++#if defined(__SSE2__) && !defined(__SSE4_1__) && !defined(MM_MULLO_EPI32_H)
++#define MM_MULLO_EPI32_H
++#include <immintrin.h>
++// See:	http://stackoverflow.com/questions/10500766/sse-multiplication-of-4-32-bit-integers
++// and	https://software.intel.com/en-us/forums/intel-c-compiler/topic/288768
++static inline __m128i
++_mm_mullo_epi32(const __m128i &a, const __m128i &b)
++{
++	__m128i tmp1 = _mm_mul_epu32(a,b); /* mul 2,0*/
++	__m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a,4),
++	    _mm_srli_si128(b,4)); /* mul 3,1 */
++	return _mm_unpacklo_epi32( /* shuffle results to [63..0] and pack */
++	    _mm_shuffle_epi32(tmp1, _MM_SHUFFLE (0,0,2,0)),
++	    _mm_shuffle_epi32(tmp2, _MM_SHUFFLE (0,0,2,0)));
++}
++#endif

Added: head/graphics/lepton/pkg-descr
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/graphics/lepton/pkg-descr	Fri Jul 15 01:27:43 2016	(r418567)
@@ -0,0 +1,9 @@
+A tool for creating and manipulating images in Lepton format.
+
+Developed by Dropbox it is currently available under Apache-2.0
+license.
+
+Compared to JPEG, the format averages 22% lower file-sizes without
+any (additional) loss of quality.
+
+WWW: https://github.com/dropbox/lepton/



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201607150127.u6F1RhaS087043>