@@ -508,19 +508,80 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
508508 (* index )++ ;
509509}
510510
511+ /* Small combining runs are usually cheaper with insertion sort. */
512+ #define CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD 20
513+
514+ static void
515+ canonical_ordering_sort_insertion (int kind , void * data ,
516+ Py_ssize_t start , Py_ssize_t end )
517+ {
518+ for (Py_ssize_t i = start + 1 ; i < end ; i ++ ) {
519+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
520+ unsigned char combining = _getrecord_ex (code )-> combining ;
521+ Py_ssize_t j = i ;
522+
523+ while (j > start ) {
524+ Py_UCS4 previous = PyUnicode_READ (kind , data , j - 1 );
525+ if (_getrecord_ex (previous )-> combining <= combining ) {
526+ break ;
527+ }
528+ PyUnicode_WRITE (kind , data , j , previous );
529+ j -- ;
530+ }
531+ if (j != i ) {
532+ PyUnicode_WRITE (kind , data , j , code );
533+ }
534+ }
535+ }
536+
537+ static void
538+ canonical_ordering_sort_counting (int kind , void * data ,
539+ Py_ssize_t start , Py_ssize_t end ,
540+ Py_UCS4 * sortbuf )
541+ {
542+ Py_ssize_t counts [256 ] = {0 };
543+ Py_ssize_t run_length = end - start ;
544+ Py_ssize_t total = 0 ;
545+
546+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
547+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
548+ unsigned char combining = _getrecord_ex (code )-> combining ;
549+ counts [combining ]++ ;
550+ }
551+
552+ for (size_t i = 0 ; i < Py_ARRAY_LENGTH (counts ); i ++ ) {
553+ Py_ssize_t count = counts [i ];
554+ counts [i ] = total ;
555+ total += count ;
556+ }
557+
558+ /* Reuse counts[] as the next output slot for each CCC. */
559+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
560+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
561+ unsigned char combining = _getrecord_ex (code )-> combining ;
562+ sortbuf [counts [combining ]++ ] = code ;
563+ }
564+ for (Py_ssize_t i = 0 ; i < run_length ; i ++ ) {
565+ PyUnicode_WRITE (kind , data , start + i , sortbuf [i ]);
566+ }
567+ }
568+
511569static PyObject *
512570nfd_nfkd (PyObject * self , PyObject * input , int k )
513571{
514572 PyObject * result ;
515573 Py_UCS4 * output ;
516574 Py_ssize_t i , o , osize ;
517- int kind ;
518- const void * data ;
575+ int input_kind , result_kind ;
576+ const void * input_data ;
577+ void * result_data ;
519578 /* Longest decomposition in Unicode 3.2: U+FDFA */
520579 Py_UCS4 stack [20 ];
521580 Py_ssize_t space , isize ;
522581 int index , prefix , count , stackptr ;
523582 unsigned char prev , cur ;
583+ Py_UCS4 * sortbuf = NULL ;
584+ Py_ssize_t sortbuflen = 0 ;
524585
525586 stackptr = 0 ;
526587 isize = PyUnicode_GET_LENGTH (input );
@@ -540,11 +601,11 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
540601 return NULL ;
541602 }
542603 i = o = 0 ;
543- kind = PyUnicode_KIND (input );
544- data = PyUnicode_DATA (input );
604+ input_kind = PyUnicode_KIND (input );
605+ input_data = PyUnicode_DATA (input );
545606
546607 while (i < isize ) {
547- stack [stackptr ++ ] = PyUnicode_READ (kind , data , i ++ );
608+ stack [stackptr ++ ] = PyUnicode_READ (input_kind , input_data , i ++ );
548609 while (stackptr ) {
549610 Py_UCS4 code = stack [-- stackptr ];
550611 /* Hangul Decomposition adds three characters in
@@ -611,35 +672,66 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
611672 PyMem_Free (output );
612673 if (!result )
613674 return NULL ;
675+
614676 /* result is guaranteed to be ready, as it is compact. */
615- kind = PyUnicode_KIND (result );
616- data = PyUnicode_DATA (result );
677+ result_kind = PyUnicode_KIND (result );
678+ result_data = PyUnicode_DATA (result );
617679
618- /* Sort canonically. */
680+ /* Sort each consecutive combining-character run canonically. */
619681 i = 0 ;
620- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
621- for (i ++ ; i < PyUnicode_GET_LENGTH (result ); i ++ ) {
622- cur = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
623- if (prev == 0 || cur == 0 || prev <= cur ) {
624- prev = cur ;
682+ while (i < o ) {
683+ Py_ssize_t run_length , run_start ;
684+ int needs_sort = 0 ;
685+
686+ Py_UCS4 ch = PyUnicode_READ (result_kind , result_data , i );
687+ prev = _getrecord_ex (ch )-> combining ;
688+ if (prev == 0 ) {
689+ i ++ ;
625690 continue ;
626691 }
627- /* Non-canonical order. Need to switch *i with previous. */
628- o = i - 1 ;
629- while (1 ) {
630- Py_UCS4 tmp = PyUnicode_READ (kind , data , o + 1 );
631- PyUnicode_WRITE (kind , data , o + 1 ,
632- PyUnicode_READ (kind , data , o ));
633- PyUnicode_WRITE (kind , data , o , tmp );
634- o -- ;
635- if (o < 0 )
636- break ;
637- prev = _getrecord_ex (PyUnicode_READ (kind , data , o ))-> combining ;
638- if (prev == 0 || prev <= cur )
692+
693+ run_start = i ++ ;
694+ while (i < o ) {
695+ Py_UCS4 ch = PyUnicode_READ (result_kind , result_data , i );
696+ cur = _getrecord_ex (ch )-> combining ;
697+ if (cur == 0 ) {
639698 break ;
699+ }
700+ if (prev > cur ) {
701+ needs_sort = 1 ;
702+ }
703+ prev = cur ;
704+ i ++ ;
705+ }
706+ if (!needs_sort ) {
707+ continue ;
708+ }
709+
710+ run_length = i - run_start ;
711+ if (run_length < CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD ) {
712+ canonical_ordering_sort_insertion (result_kind , result_data ,
713+ run_start , i );
714+ continue ;
640715 }
641- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
716+
717+ if (run_length > sortbuflen ) {
718+ Py_UCS4 * new_sortbuf = PyMem_Resize (sortbuf ,
719+ Py_UCS4 ,
720+ run_length );
721+ if (new_sortbuf == NULL ) {
722+ PyErr_NoMemory ();
723+ PyMem_Free (sortbuf );
724+ Py_DECREF (result );
725+ return NULL ;
726+ }
727+ sortbuf = new_sortbuf ;
728+ sortbuflen = run_length ;
729+ }
730+
731+ canonical_ordering_sort_counting (result_kind , result_data ,
732+ run_start , i , sortbuf );
642733 }
734+ PyMem_Free (sortbuf );
643735 return result ;
644736}
645737
0 commit comments