Revision 4bd4ee07 target-arm/neon_helper.c
b/target-arm/neon_helper.c | ||
---|---|---|
558 | 558 |
}} while (0) |
559 | 559 |
NEON_VOP(rshl_s8, neon_s8, 4) |
560 | 560 |
NEON_VOP(rshl_s16, neon_s16, 2) |
561 |
NEON_VOP(rshl_s32, neon_s32, 1) |
|
562 | 561 |
#undef NEON_FN |
563 | 562 |
|
563 |
/* The addition of the rounding constant may overflow, so we use an |
|
564 |
* intermediate 64 bits accumulator. */ |
|
565 |
uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop) |
|
566 |
{ |
|
567 |
int32_t dest; |
|
568 |
int32_t val = (int32_t)valop; |
|
569 |
int8_t shift = (int8_t)shiftop; |
|
570 |
if ((shift >= 32) || (shift <= -32)) { |
|
571 |
dest = 0; |
|
572 |
} else if (shift < 0) { |
|
573 |
int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
|
574 |
dest = big_dest >> -shift; |
|
575 |
} else { |
|
576 |
dest = val << shift; |
|
577 |
} |
|
578 |
return dest; |
|
579 |
} |
|
580 |
|
|
581 |
/* Handling addition overflow with 64 bits inputs values is more |
|
582 |
* tricky than with 32 bits values. */ |
|
564 | 583 |
uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) |
565 | 584 |
{ |
566 | 585 |
int8_t shift = (int8_t)shiftop; |
... | ... | |
574 | 593 |
val++; |
575 | 594 |
val >>= 1; |
576 | 595 |
} else if (shift < 0) { |
577 |
val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; |
|
596 |
val >>= (-shift - 1); |
|
597 |
if (val == INT64_MAX) { |
|
598 |
/* In this case, it means that the rounding constant is 1, |
|
599 |
* and the addition would overflow. Return the actual |
|
600 |
* result directly. */ |
|
601 |
val = 0x4000000000000000LL; |
|
602 |
} else { |
|
603 |
val++; |
|
604 |
val >>= 1; |
|
605 |
} |
|
578 | 606 |
} else { |
579 | 607 |
val <<= shift; |
580 | 608 |
} |
... | ... | |
596 | 624 |
}} while (0) |
597 | 625 |
NEON_VOP(rshl_u8, neon_u8, 4) |
598 | 626 |
NEON_VOP(rshl_u16, neon_u16, 2) |
599 |
NEON_VOP(rshl_u32, neon_u32, 1) |
|
600 | 627 |
#undef NEON_FN |
601 | 628 |
|
629 |
/* The addition of the rounding constant may overflow, so we use an |
|
630 |
* intermediate 64 bits accumulator. */ |
|
631 |
uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop) |
|
632 |
{ |
|
633 |
uint32_t dest; |
|
634 |
int8_t shift = (int8_t)shiftop; |
|
635 |
if (shift >= 32 || shift < -32) { |
|
636 |
dest = 0; |
|
637 |
} else if (shift == -32) { |
|
638 |
dest = val >> 31; |
|
639 |
} else if (shift < 0) { |
|
640 |
uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
|
641 |
dest = big_dest >> -shift; |
|
642 |
} else { |
|
643 |
dest = val << shift; |
|
644 |
} |
|
645 |
return dest; |
|
646 |
} |
|
647 |
|
|
648 |
/* Handling addition overflow with 64 bits inputs values is more |
|
649 |
* tricky than with 32 bits values. */ |
|
602 | 650 |
uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) |
603 | 651 |
{ |
604 | 652 |
int8_t shift = (uint8_t)shiftop; |
... | ... | |
607 | 655 |
} else if (shift == -64) { |
608 | 656 |
/* Rounding a 1-bit result just preserves that bit. */ |
609 | 657 |
val >>= 63; |
610 |
} if (shift < 0) { |
|
611 |
val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; |
|
612 |
val >>= -shift; |
|
658 |
} else if (shift < 0) { |
|
659 |
val >>= (-shift - 1); |
|
660 |
if (val == UINT64_MAX) { |
|
661 |
/* In this case, it means that the rounding constant is 1, |
|
662 |
* and the addition would overflow. Return the actual |
|
663 |
* result directly. */ |
|
664 |
val = 0x8000000000000000ULL; |
|
665 |
} else { |
|
666 |
val++; |
|
667 |
val >>= 1; |
|
668 |
} |
|
613 | 669 |
} else { |
614 | 670 |
val <<= shift; |
615 | 671 |
} |
... | ... | |
784 | 840 |
}} while (0) |
785 | 841 |
NEON_VOP_ENV(qrshl_u8, neon_u8, 4) |
786 | 842 |
NEON_VOP_ENV(qrshl_u16, neon_u16, 2) |
787 |
NEON_VOP_ENV(qrshl_u32, neon_u32, 1) |
|
788 | 843 |
#undef NEON_FN |
789 | 844 |
|
845 |
/* The addition of the rounding constant may overflow, so we use an |
|
846 |
* intermediate 64 bits accumulator. */ |
|
847 |
uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop) |
|
848 |
{ |
|
849 |
uint32_t dest; |
|
850 |
int8_t shift = (int8_t)shiftop; |
|
851 |
if (shift < 0) { |
|
852 |
uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
|
853 |
dest = big_dest >> -shift; |
|
854 |
} else { |
|
855 |
dest = val << shift; |
|
856 |
if ((dest >> shift) != val) { |
|
857 |
SET_QC(); |
|
858 |
dest = ~0; |
|
859 |
} |
|
860 |
} |
|
861 |
return dest; |
|
862 |
} |
|
863 |
|
|
864 |
/* Handling addition overflow with 64 bits inputs values is more |
|
865 |
* tricky than with 32 bits values. */ |
|
790 | 866 |
uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) |
791 | 867 |
{ |
792 | 868 |
int8_t shift = (int8_t)shiftop; |
793 | 869 |
if (shift < 0) { |
794 |
val = (val + (1 << (-1 - shift))) >> -shift; |
|
870 |
val >>= (-shift - 1); |
|
871 |
if (val == UINT64_MAX) { |
|
872 |
/* In this case, it means that the rounding constant is 1, |
|
873 |
* and the addition would overflow. Return the actual |
|
874 |
* result directly. */ |
|
875 |
val = 0x8000000000000000ULL; |
|
876 |
} else { |
|
877 |
val++; |
|
878 |
val >>= 1; |
|
879 |
} |
|
795 | 880 |
} else { \ |
796 | 881 |
uint64_t tmp = val; |
797 | 882 |
val <<= shift; |
... | ... | |
817 | 902 |
}} while (0) |
818 | 903 |
NEON_VOP_ENV(qrshl_s8, neon_s8, 4) |
819 | 904 |
NEON_VOP_ENV(qrshl_s16, neon_s16, 2) |
820 |
NEON_VOP_ENV(qrshl_s32, neon_s32, 1) |
|
821 | 905 |
#undef NEON_FN |
822 | 906 |
|
907 |
/* The addition of the rounding constant may overflow, so we use an |
|
908 |
* intermediate 64 bits accumulator. */ |
|
909 |
uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop) |
|
910 |
{ |
|
911 |
int32_t dest; |
|
912 |
int32_t val = (int32_t)valop; |
|
913 |
int8_t shift = (int8_t)shiftop; |
|
914 |
if (shift < 0) { |
|
915 |
int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
|
916 |
dest = big_dest >> -shift; |
|
917 |
} else { |
|
918 |
dest = val << shift; |
|
919 |
if ((dest >> shift) != val) { |
|
920 |
SET_QC(); |
|
921 |
dest = (val >> 31) ^ ~SIGNBIT; |
|
922 |
} |
|
923 |
} |
|
924 |
return dest; |
|
925 |
} |
|
926 |
|
|
927 |
/* Handling addition overflow with 64 bits inputs values is more |
|
928 |
* tricky than with 32 bits values. */ |
|
823 | 929 |
uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
824 | 930 |
{ |
825 | 931 |
int8_t shift = (uint8_t)shiftop; |
826 | 932 |
int64_t val = valop; |
827 | 933 |
|
828 | 934 |
if (shift < 0) { |
829 |
val = (val + (1 << (-1 - shift))) >> -shift; |
|
935 |
val >>= (-shift - 1); |
|
936 |
if (val == INT64_MAX) { |
|
937 |
/* In this case, it means that the rounding constant is 1, |
|
938 |
* and the addition would overflow. Return the actual |
|
939 |
* result directly. */ |
|
940 |
val = 0x4000000000000000ULL; |
|
941 |
} else { |
|
942 |
val++; |
|
943 |
val >>= 1; |
|
944 |
} |
|
830 | 945 |
} else { |
831 |
int64_t tmp = val;;
|
|
946 |
int64_t tmp = val; |
|
832 | 947 |
val <<= shift; |
833 | 948 |
if ((val >> shift) != tmp) { |
834 | 949 |
SET_QC(); |
835 |
val = tmp >> 31;
|
|
950 |
val = (tmp >> 63) ^ ~SIGNBIT64;
|
|
836 | 951 |
} |
837 | 952 |
} |
838 | 953 |
return val; |
Also available in: Unified diff