p256-x86_64-asm.asm 80 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984
  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. default rel
  4. %define XMMWORD
  5. %define YMMWORD
  6. %define ZMMWORD
  7. %ifdef BORINGSSL_PREFIX
  8. %include "boringssl_prefix_symbols_nasm.inc"
  9. %endif
  10. section .text code align=64
  11. EXTERN OPENSSL_ia32cap_P
  12. ALIGN 64
  13. $L$poly:
  14. DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
  15. $L$One:
  16. DD 1,1,1,1,1,1,1,1
  17. $L$Two:
  18. DD 2,2,2,2,2,2,2,2
  19. $L$Three:
  20. DD 3,3,3,3,3,3,3,3
  21. $L$ONE_mont:
  22. DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
  23. $L$ord:
  24. DQ 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000
  25. $L$ordK:
  26. DQ 0xccd1c8aaee00bc4f
  27. global ecp_nistz256_neg
  28. ALIGN 32
  29. ecp_nistz256_neg:
  30. mov QWORD[8+rsp],rdi ;WIN64 prologue
  31. mov QWORD[16+rsp],rsi
  32. mov rax,rsp
  33. $L$SEH_begin_ecp_nistz256_neg:
  34. mov rdi,rcx
  35. mov rsi,rdx
  36. push r12
  37. push r13
  38. $L$neg_body:
  39. xor r8,r8
  40. xor r9,r9
  41. xor r10,r10
  42. xor r11,r11
  43. xor r13,r13
  44. sub r8,QWORD[rsi]
  45. sbb r9,QWORD[8+rsi]
  46. sbb r10,QWORD[16+rsi]
  47. mov rax,r8
  48. sbb r11,QWORD[24+rsi]
  49. lea rsi,[$L$poly]
  50. mov rdx,r9
  51. sbb r13,0
  52. add r8,QWORD[rsi]
  53. mov rcx,r10
  54. adc r9,QWORD[8+rsi]
  55. adc r10,QWORD[16+rsi]
  56. mov r12,r11
  57. adc r11,QWORD[24+rsi]
  58. test r13,r13
  59. cmovz r8,rax
  60. cmovz r9,rdx
  61. mov QWORD[rdi],r8
  62. cmovz r10,rcx
  63. mov QWORD[8+rdi],r9
  64. cmovz r11,r12
  65. mov QWORD[16+rdi],r10
  66. mov QWORD[24+rdi],r11
  67. mov r13,QWORD[rsp]
  68. mov r12,QWORD[8+rsp]
  69. lea rsp,[16+rsp]
  70. $L$neg_epilogue:
  71. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  72. mov rsi,QWORD[16+rsp]
  73. DB 0F3h,0C3h ;repret
  74. $L$SEH_end_ecp_nistz256_neg:
  75. global ecp_nistz256_ord_mul_mont
  76. ALIGN 32
  77. ecp_nistz256_ord_mul_mont:
  78. mov QWORD[8+rsp],rdi ;WIN64 prologue
  79. mov QWORD[16+rsp],rsi
  80. mov rax,rsp
  81. $L$SEH_begin_ecp_nistz256_ord_mul_mont:
  82. mov rdi,rcx
  83. mov rsi,rdx
  84. mov rdx,r8
  85. lea rcx,[OPENSSL_ia32cap_P]
  86. mov rcx,QWORD[8+rcx]
  87. and ecx,0x80100
  88. cmp ecx,0x80100
  89. je NEAR $L$ecp_nistz256_ord_mul_montx
  90. push rbp
  91. push rbx
  92. push r12
  93. push r13
  94. push r14
  95. push r15
  96. $L$ord_mul_body:
  97. mov rax,QWORD[rdx]
  98. mov rbx,rdx
  99. lea r14,[$L$ord]
  100. mov r15,QWORD[$L$ordK]
  101. mov rcx,rax
  102. mul QWORD[rsi]
  103. mov r8,rax
  104. mov rax,rcx
  105. mov r9,rdx
  106. mul QWORD[8+rsi]
  107. add r9,rax
  108. mov rax,rcx
  109. adc rdx,0
  110. mov r10,rdx
  111. mul QWORD[16+rsi]
  112. add r10,rax
  113. mov rax,rcx
  114. adc rdx,0
  115. mov r13,r8
  116. imul r8,r15
  117. mov r11,rdx
  118. mul QWORD[24+rsi]
  119. add r11,rax
  120. mov rax,r8
  121. adc rdx,0
  122. mov r12,rdx
  123. mul QWORD[r14]
  124. mov rbp,r8
  125. add r13,rax
  126. mov rax,r8
  127. adc rdx,0
  128. mov rcx,rdx
  129. sub r10,r8
  130. sbb r8,0
  131. mul QWORD[8+r14]
  132. add r9,rcx
  133. adc rdx,0
  134. add r9,rax
  135. mov rax,rbp
  136. adc r10,rdx
  137. mov rdx,rbp
  138. adc r8,0
  139. shl rax,32
  140. shr rdx,32
  141. sub r11,rax
  142. mov rax,QWORD[8+rbx]
  143. sbb rbp,rdx
  144. add r11,r8
  145. adc r12,rbp
  146. adc r13,0
  147. mov rcx,rax
  148. mul QWORD[rsi]
  149. add r9,rax
  150. mov rax,rcx
  151. adc rdx,0
  152. mov rbp,rdx
  153. mul QWORD[8+rsi]
  154. add r10,rbp
  155. adc rdx,0
  156. add r10,rax
  157. mov rax,rcx
  158. adc rdx,0
  159. mov rbp,rdx
  160. mul QWORD[16+rsi]
  161. add r11,rbp
  162. adc rdx,0
  163. add r11,rax
  164. mov rax,rcx
  165. adc rdx,0
  166. mov rcx,r9
  167. imul r9,r15
  168. mov rbp,rdx
  169. mul QWORD[24+rsi]
  170. add r12,rbp
  171. adc rdx,0
  172. xor r8,r8
  173. add r12,rax
  174. mov rax,r9
  175. adc r13,rdx
  176. adc r8,0
  177. mul QWORD[r14]
  178. mov rbp,r9
  179. add rcx,rax
  180. mov rax,r9
  181. adc rcx,rdx
  182. sub r11,r9
  183. sbb r9,0
  184. mul QWORD[8+r14]
  185. add r10,rcx
  186. adc rdx,0
  187. add r10,rax
  188. mov rax,rbp
  189. adc r11,rdx
  190. mov rdx,rbp
  191. adc r9,0
  192. shl rax,32
  193. shr rdx,32
  194. sub r12,rax
  195. mov rax,QWORD[16+rbx]
  196. sbb rbp,rdx
  197. add r12,r9
  198. adc r13,rbp
  199. adc r8,0
  200. mov rcx,rax
  201. mul QWORD[rsi]
  202. add r10,rax
  203. mov rax,rcx
  204. adc rdx,0
  205. mov rbp,rdx
  206. mul QWORD[8+rsi]
  207. add r11,rbp
  208. adc rdx,0
  209. add r11,rax
  210. mov rax,rcx
  211. adc rdx,0
  212. mov rbp,rdx
  213. mul QWORD[16+rsi]
  214. add r12,rbp
  215. adc rdx,0
  216. add r12,rax
  217. mov rax,rcx
  218. adc rdx,0
  219. mov rcx,r10
  220. imul r10,r15
  221. mov rbp,rdx
  222. mul QWORD[24+rsi]
  223. add r13,rbp
  224. adc rdx,0
  225. xor r9,r9
  226. add r13,rax
  227. mov rax,r10
  228. adc r8,rdx
  229. adc r9,0
  230. mul QWORD[r14]
  231. mov rbp,r10
  232. add rcx,rax
  233. mov rax,r10
  234. adc rcx,rdx
  235. sub r12,r10
  236. sbb r10,0
  237. mul QWORD[8+r14]
  238. add r11,rcx
  239. adc rdx,0
  240. add r11,rax
  241. mov rax,rbp
  242. adc r12,rdx
  243. mov rdx,rbp
  244. adc r10,0
  245. shl rax,32
  246. shr rdx,32
  247. sub r13,rax
  248. mov rax,QWORD[24+rbx]
  249. sbb rbp,rdx
  250. add r13,r10
  251. adc r8,rbp
  252. adc r9,0
  253. mov rcx,rax
  254. mul QWORD[rsi]
  255. add r11,rax
  256. mov rax,rcx
  257. adc rdx,0
  258. mov rbp,rdx
  259. mul QWORD[8+rsi]
  260. add r12,rbp
  261. adc rdx,0
  262. add r12,rax
  263. mov rax,rcx
  264. adc rdx,0
  265. mov rbp,rdx
  266. mul QWORD[16+rsi]
  267. add r13,rbp
  268. adc rdx,0
  269. add r13,rax
  270. mov rax,rcx
  271. adc rdx,0
  272. mov rcx,r11
  273. imul r11,r15
  274. mov rbp,rdx
  275. mul QWORD[24+rsi]
  276. add r8,rbp
  277. adc rdx,0
  278. xor r10,r10
  279. add r8,rax
  280. mov rax,r11
  281. adc r9,rdx
  282. adc r10,0
  283. mul QWORD[r14]
  284. mov rbp,r11
  285. add rcx,rax
  286. mov rax,r11
  287. adc rcx,rdx
  288. sub r13,r11
  289. sbb r11,0
  290. mul QWORD[8+r14]
  291. add r12,rcx
  292. adc rdx,0
  293. add r12,rax
  294. mov rax,rbp
  295. adc r13,rdx
  296. mov rdx,rbp
  297. adc r11,0
  298. shl rax,32
  299. shr rdx,32
  300. sub r8,rax
  301. sbb rbp,rdx
  302. add r8,r11
  303. adc r9,rbp
  304. adc r10,0
  305. mov rsi,r12
  306. sub r12,QWORD[r14]
  307. mov r11,r13
  308. sbb r13,QWORD[8+r14]
  309. mov rcx,r8
  310. sbb r8,QWORD[16+r14]
  311. mov rbp,r9
  312. sbb r9,QWORD[24+r14]
  313. sbb r10,0
  314. cmovc r12,rsi
  315. cmovc r13,r11
  316. cmovc r8,rcx
  317. cmovc r9,rbp
  318. mov QWORD[rdi],r12
  319. mov QWORD[8+rdi],r13
  320. mov QWORD[16+rdi],r8
  321. mov QWORD[24+rdi],r9
  322. mov r15,QWORD[rsp]
  323. mov r14,QWORD[8+rsp]
  324. mov r13,QWORD[16+rsp]
  325. mov r12,QWORD[24+rsp]
  326. mov rbx,QWORD[32+rsp]
  327. mov rbp,QWORD[40+rsp]
  328. lea rsp,[48+rsp]
  329. $L$ord_mul_epilogue:
  330. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  331. mov rsi,QWORD[16+rsp]
  332. DB 0F3h,0C3h ;repret
  333. $L$SEH_end_ecp_nistz256_ord_mul_mont:
  334. global ecp_nistz256_ord_sqr_mont
  335. ALIGN 32
  336. ecp_nistz256_ord_sqr_mont:
  337. mov QWORD[8+rsp],rdi ;WIN64 prologue
  338. mov QWORD[16+rsp],rsi
  339. mov rax,rsp
  340. $L$SEH_begin_ecp_nistz256_ord_sqr_mont:
  341. mov rdi,rcx
  342. mov rsi,rdx
  343. mov rdx,r8
  344. lea rcx,[OPENSSL_ia32cap_P]
  345. mov rcx,QWORD[8+rcx]
  346. and ecx,0x80100
  347. cmp ecx,0x80100
  348. je NEAR $L$ecp_nistz256_ord_sqr_montx
  349. push rbp
  350. push rbx
  351. push r12
  352. push r13
  353. push r14
  354. push r15
  355. $L$ord_sqr_body:
  356. mov r8,QWORD[rsi]
  357. mov rax,QWORD[8+rsi]
  358. mov r14,QWORD[16+rsi]
  359. mov r15,QWORD[24+rsi]
  360. lea rsi,[$L$ord]
  361. mov rbx,rdx
  362. jmp NEAR $L$oop_ord_sqr
  363. ALIGN 32
  364. $L$oop_ord_sqr:
  365. mov rbp,rax
  366. mul r8
  367. mov r9,rax
  368. DB 102,72,15,110,205
  369. mov rax,r14
  370. mov r10,rdx
  371. mul r8
  372. add r10,rax
  373. mov rax,r15
  374. DB 102,73,15,110,214
  375. adc rdx,0
  376. mov r11,rdx
  377. mul r8
  378. add r11,rax
  379. mov rax,r15
  380. DB 102,73,15,110,223
  381. adc rdx,0
  382. mov r12,rdx
  383. mul r14
  384. mov r13,rax
  385. mov rax,r14
  386. mov r14,rdx
  387. mul rbp
  388. add r11,rax
  389. mov rax,r15
  390. adc rdx,0
  391. mov r15,rdx
  392. mul rbp
  393. add r12,rax
  394. adc rdx,0
  395. add r12,r15
  396. adc r13,rdx
  397. adc r14,0
  398. xor r15,r15
  399. mov rax,r8
  400. add r9,r9
  401. adc r10,r10
  402. adc r11,r11
  403. adc r12,r12
  404. adc r13,r13
  405. adc r14,r14
  406. adc r15,0
  407. mul rax
  408. mov r8,rax
  409. DB 102,72,15,126,200
  410. mov rbp,rdx
  411. mul rax
  412. add r9,rbp
  413. adc r10,rax
  414. DB 102,72,15,126,208
  415. adc rdx,0
  416. mov rbp,rdx
  417. mul rax
  418. add r11,rbp
  419. adc r12,rax
  420. DB 102,72,15,126,216
  421. adc rdx,0
  422. mov rbp,rdx
  423. mov rcx,r8
  424. imul r8,QWORD[32+rsi]
  425. mul rax
  426. add r13,rbp
  427. adc r14,rax
  428. mov rax,QWORD[rsi]
  429. adc r15,rdx
  430. mul r8
  431. mov rbp,r8
  432. add rcx,rax
  433. mov rax,QWORD[8+rsi]
  434. adc rcx,rdx
  435. sub r10,r8
  436. sbb rbp,0
  437. mul r8
  438. add r9,rcx
  439. adc rdx,0
  440. add r9,rax
  441. mov rax,r8
  442. adc r10,rdx
  443. mov rdx,r8
  444. adc rbp,0
  445. mov rcx,r9
  446. imul r9,QWORD[32+rsi]
  447. shl rax,32
  448. shr rdx,32
  449. sub r11,rax
  450. mov rax,QWORD[rsi]
  451. sbb r8,rdx
  452. add r11,rbp
  453. adc r8,0
  454. mul r9
  455. mov rbp,r9
  456. add rcx,rax
  457. mov rax,QWORD[8+rsi]
  458. adc rcx,rdx
  459. sub r11,r9
  460. sbb rbp,0
  461. mul r9
  462. add r10,rcx
  463. adc rdx,0
  464. add r10,rax
  465. mov rax,r9
  466. adc r11,rdx
  467. mov rdx,r9
  468. adc rbp,0
  469. mov rcx,r10
  470. imul r10,QWORD[32+rsi]
  471. shl rax,32
  472. shr rdx,32
  473. sub r8,rax
  474. mov rax,QWORD[rsi]
  475. sbb r9,rdx
  476. add r8,rbp
  477. adc r9,0
  478. mul r10
  479. mov rbp,r10
  480. add rcx,rax
  481. mov rax,QWORD[8+rsi]
  482. adc rcx,rdx
  483. sub r8,r10
  484. sbb rbp,0
  485. mul r10
  486. add r11,rcx
  487. adc rdx,0
  488. add r11,rax
  489. mov rax,r10
  490. adc r8,rdx
  491. mov rdx,r10
  492. adc rbp,0
  493. mov rcx,r11
  494. imul r11,QWORD[32+rsi]
  495. shl rax,32
  496. shr rdx,32
  497. sub r9,rax
  498. mov rax,QWORD[rsi]
  499. sbb r10,rdx
  500. add r9,rbp
  501. adc r10,0
  502. mul r11
  503. mov rbp,r11
  504. add rcx,rax
  505. mov rax,QWORD[8+rsi]
  506. adc rcx,rdx
  507. sub r9,r11
  508. sbb rbp,0
  509. mul r11
  510. add r8,rcx
  511. adc rdx,0
  512. add r8,rax
  513. mov rax,r11
  514. adc r9,rdx
  515. mov rdx,r11
  516. adc rbp,0
  517. shl rax,32
  518. shr rdx,32
  519. sub r10,rax
  520. sbb r11,rdx
  521. add r10,rbp
  522. adc r11,0
  523. xor rdx,rdx
  524. add r8,r12
  525. adc r9,r13
  526. mov r12,r8
  527. adc r10,r14
  528. adc r11,r15
  529. mov rax,r9
  530. adc rdx,0
  531. sub r8,QWORD[rsi]
  532. mov r14,r10
  533. sbb r9,QWORD[8+rsi]
  534. sbb r10,QWORD[16+rsi]
  535. mov r15,r11
  536. sbb r11,QWORD[24+rsi]
  537. sbb rdx,0
  538. cmovc r8,r12
  539. cmovnc rax,r9
  540. cmovnc r14,r10
  541. cmovnc r15,r11
  542. dec rbx
  543. jnz NEAR $L$oop_ord_sqr
  544. mov QWORD[rdi],r8
  545. mov QWORD[8+rdi],rax
  546. pxor xmm1,xmm1
  547. mov QWORD[16+rdi],r14
  548. pxor xmm2,xmm2
  549. mov QWORD[24+rdi],r15
  550. pxor xmm3,xmm3
  551. mov r15,QWORD[rsp]
  552. mov r14,QWORD[8+rsp]
  553. mov r13,QWORD[16+rsp]
  554. mov r12,QWORD[24+rsp]
  555. mov rbx,QWORD[32+rsp]
  556. mov rbp,QWORD[40+rsp]
  557. lea rsp,[48+rsp]
  558. $L$ord_sqr_epilogue:
  559. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  560. mov rsi,QWORD[16+rsp]
  561. DB 0F3h,0C3h ;repret
  562. $L$SEH_end_ecp_nistz256_ord_sqr_mont:
  563. ALIGN 32
  564. ecp_nistz256_ord_mul_montx:
  565. mov QWORD[8+rsp],rdi ;WIN64 prologue
  566. mov QWORD[16+rsp],rsi
  567. mov rax,rsp
  568. $L$SEH_begin_ecp_nistz256_ord_mul_montx:
  569. mov rdi,rcx
  570. mov rsi,rdx
  571. mov rdx,r8
  572. $L$ecp_nistz256_ord_mul_montx:
  573. push rbp
  574. push rbx
  575. push r12
  576. push r13
  577. push r14
  578. push r15
  579. $L$ord_mulx_body:
  580. mov rbx,rdx
  581. mov rdx,QWORD[rdx]
  582. mov r9,QWORD[rsi]
  583. mov r10,QWORD[8+rsi]
  584. mov r11,QWORD[16+rsi]
  585. mov r12,QWORD[24+rsi]
  586. lea rsi,[((-128))+rsi]
  587. lea r14,[(($L$ord-128))]
  588. mov r15,QWORD[$L$ordK]
  589. mulx r9,r8,r9
  590. mulx r10,rcx,r10
  591. mulx r11,rbp,r11
  592. add r9,rcx
  593. mulx r12,rcx,r12
  594. mov rdx,r8
  595. mulx rax,rdx,r15
  596. adc r10,rbp
  597. adc r11,rcx
  598. adc r12,0
  599. xor r13,r13
  600. mulx rbp,rcx,QWORD[((0+128))+r14]
  601. adcx r8,rcx
  602. adox r9,rbp
  603. mulx rbp,rcx,QWORD[((8+128))+r14]
  604. adcx r9,rcx
  605. adox r10,rbp
  606. mulx rbp,rcx,QWORD[((16+128))+r14]
  607. adcx r10,rcx
  608. adox r11,rbp
  609. mulx rbp,rcx,QWORD[((24+128))+r14]
  610. mov rdx,QWORD[8+rbx]
  611. adcx r11,rcx
  612. adox r12,rbp
  613. adcx r12,r8
  614. adox r13,r8
  615. adc r13,0
  616. mulx rbp,rcx,QWORD[((0+128))+rsi]
  617. adcx r9,rcx
  618. adox r10,rbp
  619. mulx rbp,rcx,QWORD[((8+128))+rsi]
  620. adcx r10,rcx
  621. adox r11,rbp
  622. mulx rbp,rcx,QWORD[((16+128))+rsi]
  623. adcx r11,rcx
  624. adox r12,rbp
  625. mulx rbp,rcx,QWORD[((24+128))+rsi]
  626. mov rdx,r9
  627. mulx rax,rdx,r15
  628. adcx r12,rcx
  629. adox r13,rbp
  630. adcx r13,r8
  631. adox r8,r8
  632. adc r8,0
  633. mulx rbp,rcx,QWORD[((0+128))+r14]
  634. adcx r9,rcx
  635. adox r10,rbp
  636. mulx rbp,rcx,QWORD[((8+128))+r14]
  637. adcx r10,rcx
  638. adox r11,rbp
  639. mulx rbp,rcx,QWORD[((16+128))+r14]
  640. adcx r11,rcx
  641. adox r12,rbp
  642. mulx rbp,rcx,QWORD[((24+128))+r14]
  643. mov rdx,QWORD[16+rbx]
  644. adcx r12,rcx
  645. adox r13,rbp
  646. adcx r13,r9
  647. adox r8,r9
  648. adc r8,0
  649. mulx rbp,rcx,QWORD[((0+128))+rsi]
  650. adcx r10,rcx
  651. adox r11,rbp
  652. mulx rbp,rcx,QWORD[((8+128))+rsi]
  653. adcx r11,rcx
  654. adox r12,rbp
  655. mulx rbp,rcx,QWORD[((16+128))+rsi]
  656. adcx r12,rcx
  657. adox r13,rbp
  658. mulx rbp,rcx,QWORD[((24+128))+rsi]
  659. mov rdx,r10
  660. mulx rax,rdx,r15
  661. adcx r13,rcx
  662. adox r8,rbp
  663. adcx r8,r9
  664. adox r9,r9
  665. adc r9,0
  666. mulx rbp,rcx,QWORD[((0+128))+r14]
  667. adcx r10,rcx
  668. adox r11,rbp
  669. mulx rbp,rcx,QWORD[((8+128))+r14]
  670. adcx r11,rcx
  671. adox r12,rbp
  672. mulx rbp,rcx,QWORD[((16+128))+r14]
  673. adcx r12,rcx
  674. adox r13,rbp
  675. mulx rbp,rcx,QWORD[((24+128))+r14]
  676. mov rdx,QWORD[24+rbx]
  677. adcx r13,rcx
  678. adox r8,rbp
  679. adcx r8,r10
  680. adox r9,r10
  681. adc r9,0
  682. mulx rbp,rcx,QWORD[((0+128))+rsi]
  683. adcx r11,rcx
  684. adox r12,rbp
  685. mulx rbp,rcx,QWORD[((8+128))+rsi]
  686. adcx r12,rcx
  687. adox r13,rbp
  688. mulx rbp,rcx,QWORD[((16+128))+rsi]
  689. adcx r13,rcx
  690. adox r8,rbp
  691. mulx rbp,rcx,QWORD[((24+128))+rsi]
  692. mov rdx,r11
  693. mulx rax,rdx,r15
  694. adcx r8,rcx
  695. adox r9,rbp
  696. adcx r9,r10
  697. adox r10,r10
  698. adc r10,0
  699. mulx rbp,rcx,QWORD[((0+128))+r14]
  700. adcx r11,rcx
  701. adox r12,rbp
  702. mulx rbp,rcx,QWORD[((8+128))+r14]
  703. adcx r12,rcx
  704. adox r13,rbp
  705. mulx rbp,rcx,QWORD[((16+128))+r14]
  706. adcx r13,rcx
  707. adox r8,rbp
  708. mulx rbp,rcx,QWORD[((24+128))+r14]
  709. lea r14,[128+r14]
  710. mov rbx,r12
  711. adcx r8,rcx
  712. adox r9,rbp
  713. mov rdx,r13
  714. adcx r9,r11
  715. adox r10,r11
  716. adc r10,0
  717. mov rcx,r8
  718. sub r12,QWORD[r14]
  719. sbb r13,QWORD[8+r14]
  720. sbb r8,QWORD[16+r14]
  721. mov rbp,r9
  722. sbb r9,QWORD[24+r14]
  723. sbb r10,0
  724. cmovc r12,rbx
  725. cmovc r13,rdx
  726. cmovc r8,rcx
  727. cmovc r9,rbp
  728. mov QWORD[rdi],r12
  729. mov QWORD[8+rdi],r13
  730. mov QWORD[16+rdi],r8
  731. mov QWORD[24+rdi],r9
  732. mov r15,QWORD[rsp]
  733. mov r14,QWORD[8+rsp]
  734. mov r13,QWORD[16+rsp]
  735. mov r12,QWORD[24+rsp]
  736. mov rbx,QWORD[32+rsp]
  737. mov rbp,QWORD[40+rsp]
  738. lea rsp,[48+rsp]
  739. $L$ord_mulx_epilogue:
  740. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  741. mov rsi,QWORD[16+rsp]
  742. DB 0F3h,0C3h ;repret
  743. $L$SEH_end_ecp_nistz256_ord_mul_montx:
  744. ALIGN 32
  745. ecp_nistz256_ord_sqr_montx:
  746. mov QWORD[8+rsp],rdi ;WIN64 prologue
  747. mov QWORD[16+rsp],rsi
  748. mov rax,rsp
  749. $L$SEH_begin_ecp_nistz256_ord_sqr_montx:
  750. mov rdi,rcx
  751. mov rsi,rdx
  752. mov rdx,r8
  753. $L$ecp_nistz256_ord_sqr_montx:
  754. push rbp
  755. push rbx
  756. push r12
  757. push r13
  758. push r14
  759. push r15
  760. $L$ord_sqrx_body:
  761. mov rbx,rdx
  762. mov rdx,QWORD[rsi]
  763. mov r14,QWORD[8+rsi]
  764. mov r15,QWORD[16+rsi]
  765. mov r8,QWORD[24+rsi]
  766. lea rsi,[$L$ord]
  767. jmp NEAR $L$oop_ord_sqrx
  768. ALIGN 32
  769. $L$oop_ord_sqrx:
  770. mulx r10,r9,r14
  771. mulx r11,rcx,r15
  772. mov rax,rdx
  773. DB 102,73,15,110,206
  774. mulx r12,rbp,r8
  775. mov rdx,r14
  776. add r10,rcx
  777. DB 102,73,15,110,215
  778. adc r11,rbp
  779. adc r12,0
  780. xor r13,r13
  781. mulx rbp,rcx,r15
  782. adcx r11,rcx
  783. adox r12,rbp
  784. mulx rbp,rcx,r8
  785. mov rdx,r15
  786. adcx r12,rcx
  787. adox r13,rbp
  788. adc r13,0
  789. mulx r14,rcx,r8
  790. mov rdx,rax
  791. DB 102,73,15,110,216
  792. xor r15,r15
  793. adcx r9,r9
  794. adox r13,rcx
  795. adcx r10,r10
  796. adox r14,r15
  797. mulx rbp,r8,rdx
  798. DB 102,72,15,126,202
  799. adcx r11,r11
  800. adox r9,rbp
  801. adcx r12,r12
  802. mulx rax,rcx,rdx
  803. DB 102,72,15,126,210
  804. adcx r13,r13
  805. adox r10,rcx
  806. adcx r14,r14
  807. mulx rbp,rcx,rdx
  808. DB 0x67
  809. DB 102,72,15,126,218
  810. adox r11,rax
  811. adcx r15,r15
  812. adox r12,rcx
  813. adox r13,rbp
  814. mulx rax,rcx,rdx
  815. adox r14,rcx
  816. adox r15,rax
  817. mov rdx,r8
  818. mulx rcx,rdx,QWORD[32+rsi]
  819. xor rax,rax
  820. mulx rbp,rcx,QWORD[rsi]
  821. adcx r8,rcx
  822. adox r9,rbp
  823. mulx rbp,rcx,QWORD[8+rsi]
  824. adcx r9,rcx
  825. adox r10,rbp
  826. mulx rbp,rcx,QWORD[16+rsi]
  827. adcx r10,rcx
  828. adox r11,rbp
  829. mulx rbp,rcx,QWORD[24+rsi]
  830. adcx r11,rcx
  831. adox r8,rbp
  832. adcx r8,rax
  833. mov rdx,r9
  834. mulx rcx,rdx,QWORD[32+rsi]
  835. mulx rbp,rcx,QWORD[rsi]
  836. adox r9,rcx
  837. adcx r10,rbp
  838. mulx rbp,rcx,QWORD[8+rsi]
  839. adox r10,rcx
  840. adcx r11,rbp
  841. mulx rbp,rcx,QWORD[16+rsi]
  842. adox r11,rcx
  843. adcx r8,rbp
  844. mulx rbp,rcx,QWORD[24+rsi]
  845. adox r8,rcx
  846. adcx r9,rbp
  847. adox r9,rax
  848. mov rdx,r10
  849. mulx rcx,rdx,QWORD[32+rsi]
  850. mulx rbp,rcx,QWORD[rsi]
  851. adcx r10,rcx
  852. adox r11,rbp
  853. mulx rbp,rcx,QWORD[8+rsi]
  854. adcx r11,rcx
  855. adox r8,rbp
  856. mulx rbp,rcx,QWORD[16+rsi]
  857. adcx r8,rcx
  858. adox r9,rbp
  859. mulx rbp,rcx,QWORD[24+rsi]
  860. adcx r9,rcx
  861. adox r10,rbp
  862. adcx r10,rax
  863. mov rdx,r11
  864. mulx rcx,rdx,QWORD[32+rsi]
  865. mulx rbp,rcx,QWORD[rsi]
  866. adox r11,rcx
  867. adcx r8,rbp
  868. mulx rbp,rcx,QWORD[8+rsi]
  869. adox r8,rcx
  870. adcx r9,rbp
  871. mulx rbp,rcx,QWORD[16+rsi]
  872. adox r9,rcx
  873. adcx r10,rbp
  874. mulx rbp,rcx,QWORD[24+rsi]
  875. adox r10,rcx
  876. adcx r11,rbp
  877. adox r11,rax
  878. add r12,r8
  879. adc r9,r13
  880. mov rdx,r12
  881. adc r10,r14
  882. adc r11,r15
  883. mov r14,r9
  884. adc rax,0
  885. sub r12,QWORD[rsi]
  886. mov r15,r10
  887. sbb r9,QWORD[8+rsi]
  888. sbb r10,QWORD[16+rsi]
  889. mov r8,r11
  890. sbb r11,QWORD[24+rsi]
  891. sbb rax,0
  892. cmovnc rdx,r12
  893. cmovnc r14,r9
  894. cmovnc r15,r10
  895. cmovnc r8,r11
  896. dec rbx
  897. jnz NEAR $L$oop_ord_sqrx
  898. mov QWORD[rdi],rdx
  899. mov QWORD[8+rdi],r14
  900. pxor xmm1,xmm1
  901. mov QWORD[16+rdi],r15
  902. pxor xmm2,xmm2
  903. mov QWORD[24+rdi],r8
  904. pxor xmm3,xmm3
  905. mov r15,QWORD[rsp]
  906. mov r14,QWORD[8+rsp]
  907. mov r13,QWORD[16+rsp]
  908. mov r12,QWORD[24+rsp]
  909. mov rbx,QWORD[32+rsp]
  910. mov rbp,QWORD[40+rsp]
  911. lea rsp,[48+rsp]
  912. $L$ord_sqrx_epilogue:
  913. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  914. mov rsi,QWORD[16+rsp]
  915. DB 0F3h,0C3h ;repret
  916. $L$SEH_end_ecp_nistz256_ord_sqr_montx:
  917. global ecp_nistz256_mul_mont
  918. ALIGN 32
  919. ecp_nistz256_mul_mont:
  920. mov QWORD[8+rsp],rdi ;WIN64 prologue
  921. mov QWORD[16+rsp],rsi
  922. mov rax,rsp
  923. $L$SEH_begin_ecp_nistz256_mul_mont:
  924. mov rdi,rcx
  925. mov rsi,rdx
  926. mov rdx,r8
  927. lea rcx,[OPENSSL_ia32cap_P]
  928. mov rcx,QWORD[8+rcx]
  929. and ecx,0x80100
  930. $L$mul_mont:
  931. push rbp
  932. push rbx
  933. push r12
  934. push r13
  935. push r14
  936. push r15
  937. $L$mul_body:
  938. cmp ecx,0x80100
  939. je NEAR $L$mul_montx
  940. mov rbx,rdx
  941. mov rax,QWORD[rdx]
  942. mov r9,QWORD[rsi]
  943. mov r10,QWORD[8+rsi]
  944. mov r11,QWORD[16+rsi]
  945. mov r12,QWORD[24+rsi]
  946. call __ecp_nistz256_mul_montq
  947. jmp NEAR $L$mul_mont_done
  948. ALIGN 32
  949. $L$mul_montx:
  950. mov rbx,rdx
  951. mov rdx,QWORD[rdx]
  952. mov r9,QWORD[rsi]
  953. mov r10,QWORD[8+rsi]
  954. mov r11,QWORD[16+rsi]
  955. mov r12,QWORD[24+rsi]
  956. lea rsi,[((-128))+rsi]
  957. call __ecp_nistz256_mul_montx
  958. $L$mul_mont_done:
  959. mov r15,QWORD[rsp]
  960. mov r14,QWORD[8+rsp]
  961. mov r13,QWORD[16+rsp]
  962. mov r12,QWORD[24+rsp]
  963. mov rbx,QWORD[32+rsp]
  964. mov rbp,QWORD[40+rsp]
  965. lea rsp,[48+rsp]
  966. $L$mul_epilogue:
  967. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  968. mov rsi,QWORD[16+rsp]
  969. DB 0F3h,0C3h ;repret
  970. $L$SEH_end_ecp_nistz256_mul_mont:
  971. ALIGN 32
  972. __ecp_nistz256_mul_montq:
  973. mov rbp,rax
  974. mul r9
  975. mov r14,QWORD[(($L$poly+8))]
  976. mov r8,rax
  977. mov rax,rbp
  978. mov r9,rdx
  979. mul r10
  980. mov r15,QWORD[(($L$poly+24))]
  981. add r9,rax
  982. mov rax,rbp
  983. adc rdx,0
  984. mov r10,rdx
  985. mul r11
  986. add r10,rax
  987. mov rax,rbp
  988. adc rdx,0
  989. mov r11,rdx
  990. mul r12
  991. add r11,rax
  992. mov rax,r8
  993. adc rdx,0
  994. xor r13,r13
  995. mov r12,rdx
  996. mov rbp,r8
  997. shl r8,32
  998. mul r15
  999. shr rbp,32
  1000. add r9,r8
  1001. adc r10,rbp
  1002. adc r11,rax
  1003. mov rax,QWORD[8+rbx]
  1004. adc r12,rdx
  1005. adc r13,0
  1006. xor r8,r8
  1007. mov rbp,rax
  1008. mul QWORD[rsi]
  1009. add r9,rax
  1010. mov rax,rbp
  1011. adc rdx,0
  1012. mov rcx,rdx
  1013. mul QWORD[8+rsi]
  1014. add r10,rcx
  1015. adc rdx,0
  1016. add r10,rax
  1017. mov rax,rbp
  1018. adc rdx,0
  1019. mov rcx,rdx
  1020. mul QWORD[16+rsi]
  1021. add r11,rcx
  1022. adc rdx,0
  1023. add r11,rax
  1024. mov rax,rbp
  1025. adc rdx,0
  1026. mov rcx,rdx
  1027. mul QWORD[24+rsi]
  1028. add r12,rcx
  1029. adc rdx,0
  1030. add r12,rax
  1031. mov rax,r9
  1032. adc r13,rdx
  1033. adc r8,0
  1034. mov rbp,r9
  1035. shl r9,32
  1036. mul r15
  1037. shr rbp,32
  1038. add r10,r9
  1039. adc r11,rbp
  1040. adc r12,rax
  1041. mov rax,QWORD[16+rbx]
  1042. adc r13,rdx
  1043. adc r8,0
  1044. xor r9,r9
  1045. mov rbp,rax
  1046. mul QWORD[rsi]
  1047. add r10,rax
  1048. mov rax,rbp
  1049. adc rdx,0
  1050. mov rcx,rdx
  1051. mul QWORD[8+rsi]
  1052. add r11,rcx
  1053. adc rdx,0
  1054. add r11,rax
  1055. mov rax,rbp
  1056. adc rdx,0
  1057. mov rcx,rdx
  1058. mul QWORD[16+rsi]
  1059. add r12,rcx
  1060. adc rdx,0
  1061. add r12,rax
  1062. mov rax,rbp
  1063. adc rdx,0
  1064. mov rcx,rdx
  1065. mul QWORD[24+rsi]
  1066. add r13,rcx
  1067. adc rdx,0
  1068. add r13,rax
  1069. mov rax,r10
  1070. adc r8,rdx
  1071. adc r9,0
  1072. mov rbp,r10
  1073. shl r10,32
  1074. mul r15
  1075. shr rbp,32
  1076. add r11,r10
  1077. adc r12,rbp
  1078. adc r13,rax
  1079. mov rax,QWORD[24+rbx]
  1080. adc r8,rdx
  1081. adc r9,0
  1082. xor r10,r10
  1083. mov rbp,rax
  1084. mul QWORD[rsi]
  1085. add r11,rax
  1086. mov rax,rbp
  1087. adc rdx,0
  1088. mov rcx,rdx
  1089. mul QWORD[8+rsi]
  1090. add r12,rcx
  1091. adc rdx,0
  1092. add r12,rax
  1093. mov rax,rbp
  1094. adc rdx,0
  1095. mov rcx,rdx
  1096. mul QWORD[16+rsi]
  1097. add r13,rcx
  1098. adc rdx,0
  1099. add r13,rax
  1100. mov rax,rbp
  1101. adc rdx,0
  1102. mov rcx,rdx
  1103. mul QWORD[24+rsi]
  1104. add r8,rcx
  1105. adc rdx,0
  1106. add r8,rax
  1107. mov rax,r11
  1108. adc r9,rdx
  1109. adc r10,0
  1110. mov rbp,r11
  1111. shl r11,32
  1112. mul r15
  1113. shr rbp,32
  1114. add r12,r11
  1115. adc r13,rbp
  1116. mov rcx,r12
  1117. adc r8,rax
  1118. adc r9,rdx
  1119. mov rbp,r13
  1120. adc r10,0
  1121. sub r12,-1
  1122. mov rbx,r8
  1123. sbb r13,r14
  1124. sbb r8,0
  1125. mov rdx,r9
  1126. sbb r9,r15
  1127. sbb r10,0
  1128. cmovc r12,rcx
  1129. cmovc r13,rbp
  1130. mov QWORD[rdi],r12
  1131. cmovc r8,rbx
  1132. mov QWORD[8+rdi],r13
  1133. cmovc r9,rdx
  1134. mov QWORD[16+rdi],r8
  1135. mov QWORD[24+rdi],r9
  1136. DB 0F3h,0C3h ;repret
  1137. global ecp_nistz256_sqr_mont
  1138. ALIGN 32
  1139. ecp_nistz256_sqr_mont:
  1140. mov QWORD[8+rsp],rdi ;WIN64 prologue
  1141. mov QWORD[16+rsp],rsi
  1142. mov rax,rsp
  1143. $L$SEH_begin_ecp_nistz256_sqr_mont:
  1144. mov rdi,rcx
  1145. mov rsi,rdx
  1146. lea rcx,[OPENSSL_ia32cap_P]
  1147. mov rcx,QWORD[8+rcx]
  1148. and ecx,0x80100
  1149. push rbp
  1150. push rbx
  1151. push r12
  1152. push r13
  1153. push r14
  1154. push r15
  1155. $L$sqr_body:
  1156. cmp ecx,0x80100
  1157. je NEAR $L$sqr_montx
  1158. mov rax,QWORD[rsi]
  1159. mov r14,QWORD[8+rsi]
  1160. mov r15,QWORD[16+rsi]
  1161. mov r8,QWORD[24+rsi]
  1162. call __ecp_nistz256_sqr_montq
  1163. jmp NEAR $L$sqr_mont_done
  1164. ALIGN 32
  1165. $L$sqr_montx:
  1166. mov rdx,QWORD[rsi]
  1167. mov r14,QWORD[8+rsi]
  1168. mov r15,QWORD[16+rsi]
  1169. mov r8,QWORD[24+rsi]
  1170. lea rsi,[((-128))+rsi]
  1171. call __ecp_nistz256_sqr_montx
  1172. $L$sqr_mont_done:
  1173. mov r15,QWORD[rsp]
  1174. mov r14,QWORD[8+rsp]
  1175. mov r13,QWORD[16+rsp]
  1176. mov r12,QWORD[24+rsp]
  1177. mov rbx,QWORD[32+rsp]
  1178. mov rbp,QWORD[40+rsp]
  1179. lea rsp,[48+rsp]
  1180. $L$sqr_epilogue:
  1181. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  1182. mov rsi,QWORD[16+rsp]
  1183. DB 0F3h,0C3h ;repret
  1184. $L$SEH_end_ecp_nistz256_sqr_mont:
  1185. ALIGN 32
  1186. __ecp_nistz256_sqr_montq:
  1187. mov r13,rax
  1188. mul r14
  1189. mov r9,rax
  1190. mov rax,r15
  1191. mov r10,rdx
  1192. mul r13
  1193. add r10,rax
  1194. mov rax,r8
  1195. adc rdx,0
  1196. mov r11,rdx
  1197. mul r13
  1198. add r11,rax
  1199. mov rax,r15
  1200. adc rdx,0
  1201. mov r12,rdx
  1202. mul r14
  1203. add r11,rax
  1204. mov rax,r8
  1205. adc rdx,0
  1206. mov rbp,rdx
  1207. mul r14
  1208. add r12,rax
  1209. mov rax,r8
  1210. adc rdx,0
  1211. add r12,rbp
  1212. mov r13,rdx
  1213. adc r13,0
  1214. mul r15
  1215. xor r15,r15
  1216. add r13,rax
  1217. mov rax,QWORD[rsi]
  1218. mov r14,rdx
  1219. adc r14,0
  1220. add r9,r9
  1221. adc r10,r10
  1222. adc r11,r11
  1223. adc r12,r12
  1224. adc r13,r13
  1225. adc r14,r14
  1226. adc r15,0
  1227. mul rax
  1228. mov r8,rax
  1229. mov rax,QWORD[8+rsi]
  1230. mov rcx,rdx
  1231. mul rax
  1232. add r9,rcx
  1233. adc r10,rax
  1234. mov rax,QWORD[16+rsi]
  1235. adc rdx,0
  1236. mov rcx,rdx
  1237. mul rax
  1238. add r11,rcx
  1239. adc r12,rax
  1240. mov rax,QWORD[24+rsi]
  1241. adc rdx,0
  1242. mov rcx,rdx
  1243. mul rax
  1244. add r13,rcx
  1245. adc r14,rax
  1246. mov rax,r8
  1247. adc r15,rdx
  1248. mov rsi,QWORD[(($L$poly+8))]
  1249. mov rbp,QWORD[(($L$poly+24))]
  1250. mov rcx,r8
  1251. shl r8,32
  1252. mul rbp
  1253. shr rcx,32
  1254. add r9,r8
  1255. adc r10,rcx
  1256. adc r11,rax
  1257. mov rax,r9
  1258. adc rdx,0
  1259. mov rcx,r9
  1260. shl r9,32
  1261. mov r8,rdx
  1262. mul rbp
  1263. shr rcx,32
  1264. add r10,r9
  1265. adc r11,rcx
  1266. adc r8,rax
  1267. mov rax,r10
  1268. adc rdx,0
  1269. mov rcx,r10
  1270. shl r10,32
  1271. mov r9,rdx
  1272. mul rbp
  1273. shr rcx,32
  1274. add r11,r10
  1275. adc r8,rcx
  1276. adc r9,rax
  1277. mov rax,r11
  1278. adc rdx,0
  1279. mov rcx,r11
  1280. shl r11,32
  1281. mov r10,rdx
  1282. mul rbp
  1283. shr rcx,32
  1284. add r8,r11
  1285. adc r9,rcx
  1286. adc r10,rax
  1287. adc rdx,0
  1288. xor r11,r11
  1289. add r12,r8
  1290. adc r13,r9
  1291. mov r8,r12
  1292. adc r14,r10
  1293. adc r15,rdx
  1294. mov r9,r13
  1295. adc r11,0
  1296. sub r12,-1
  1297. mov r10,r14
  1298. sbb r13,rsi
  1299. sbb r14,0
  1300. mov rcx,r15
  1301. sbb r15,rbp
  1302. sbb r11,0
  1303. cmovc r12,r8
  1304. cmovc r13,r9
  1305. mov QWORD[rdi],r12
  1306. cmovc r14,r10
  1307. mov QWORD[8+rdi],r13
  1308. cmovc r15,rcx
  1309. mov QWORD[16+rdi],r14
  1310. mov QWORD[24+rdi],r15
  1311. DB 0F3h,0C3h ;repret
  1312. ALIGN 32
  1313. __ecp_nistz256_mul_montx:
  1314. mulx r9,r8,r9
  1315. mulx r10,rcx,r10
  1316. mov r14,32
  1317. xor r13,r13
  1318. mulx r11,rbp,r11
  1319. mov r15,QWORD[(($L$poly+24))]
  1320. adc r9,rcx
  1321. mulx r12,rcx,r12
  1322. mov rdx,r8
  1323. adc r10,rbp
  1324. shlx rbp,r8,r14
  1325. adc r11,rcx
  1326. shrx rcx,r8,r14
  1327. adc r12,0
  1328. add r9,rbp
  1329. adc r10,rcx
  1330. mulx rbp,rcx,r15
  1331. mov rdx,QWORD[8+rbx]
  1332. adc r11,rcx
  1333. adc r12,rbp
  1334. adc r13,0
  1335. xor r8,r8
  1336. mulx rbp,rcx,QWORD[((0+128))+rsi]
  1337. adcx r9,rcx
  1338. adox r10,rbp
  1339. mulx rbp,rcx,QWORD[((8+128))+rsi]
  1340. adcx r10,rcx
  1341. adox r11,rbp
  1342. mulx rbp,rcx,QWORD[((16+128))+rsi]
  1343. adcx r11,rcx
  1344. adox r12,rbp
  1345. mulx rbp,rcx,QWORD[((24+128))+rsi]
  1346. mov rdx,r9
  1347. adcx r12,rcx
  1348. shlx rcx,r9,r14
  1349. adox r13,rbp
  1350. shrx rbp,r9,r14
  1351. adcx r13,r8
  1352. adox r8,r8
  1353. adc r8,0
  1354. add r10,rcx
  1355. adc r11,rbp
  1356. mulx rbp,rcx,r15
  1357. mov rdx,QWORD[16+rbx]
  1358. adc r12,rcx
  1359. adc r13,rbp
  1360. adc r8,0
  1361. xor r9,r9
  1362. mulx rbp,rcx,QWORD[((0+128))+rsi]
  1363. adcx r10,rcx
  1364. adox r11,rbp
  1365. mulx rbp,rcx,QWORD[((8+128))+rsi]
  1366. adcx r11,rcx
  1367. adox r12,rbp
  1368. mulx rbp,rcx,QWORD[((16+128))+rsi]
  1369. adcx r12,rcx
  1370. adox r13,rbp
  1371. mulx rbp,rcx,QWORD[((24+128))+rsi]
  1372. mov rdx,r10
  1373. adcx r13,rcx
  1374. shlx rcx,r10,r14
  1375. adox r8,rbp
  1376. shrx rbp,r10,r14
  1377. adcx r8,r9
  1378. adox r9,r9
  1379. adc r9,0
  1380. add r11,rcx
  1381. adc r12,rbp
  1382. mulx rbp,rcx,r15
  1383. mov rdx,QWORD[24+rbx]
  1384. adc r13,rcx
  1385. adc r8,rbp
  1386. adc r9,0
  1387. xor r10,r10
  1388. mulx rbp,rcx,QWORD[((0+128))+rsi]
  1389. adcx r11,rcx
  1390. adox r12,rbp
  1391. mulx rbp,rcx,QWORD[((8+128))+rsi]
  1392. adcx r12,rcx
  1393. adox r13,rbp
  1394. mulx rbp,rcx,QWORD[((16+128))+rsi]
  1395. adcx r13,rcx
  1396. adox r8,rbp
  1397. mulx rbp,rcx,QWORD[((24+128))+rsi]
  1398. mov rdx,r11
  1399. adcx r8,rcx
  1400. shlx rcx,r11,r14
  1401. adox r9,rbp
  1402. shrx rbp,r11,r14
  1403. adcx r9,r10
  1404. adox r10,r10
  1405. adc r10,0
  1406. add r12,rcx
  1407. adc r13,rbp
  1408. mulx rbp,rcx,r15
  1409. mov rbx,r12
  1410. mov r14,QWORD[(($L$poly+8))]
  1411. adc r8,rcx
  1412. mov rdx,r13
  1413. adc r9,rbp
  1414. adc r10,0
  1415. xor eax,eax
  1416. mov rcx,r8
  1417. sbb r12,-1
  1418. sbb r13,r14
  1419. sbb r8,0
  1420. mov rbp,r9
  1421. sbb r9,r15
  1422. sbb r10,0
  1423. cmovc r12,rbx
  1424. cmovc r13,rdx
  1425. mov QWORD[rdi],r12
  1426. cmovc r8,rcx
  1427. mov QWORD[8+rdi],r13
  1428. cmovc r9,rbp
  1429. mov QWORD[16+rdi],r8
  1430. mov QWORD[24+rdi],r9
  1431. DB 0F3h,0C3h ;repret
  1432. ALIGN 32
  1433. __ecp_nistz256_sqr_montx:
  1434. mulx r10,r9,r14
  1435. mulx r11,rcx,r15
  1436. xor eax,eax
  1437. adc r10,rcx
  1438. mulx r12,rbp,r8
  1439. mov rdx,r14
  1440. adc r11,rbp
  1441. adc r12,0
  1442. xor r13,r13
  1443. mulx rbp,rcx,r15
  1444. adcx r11,rcx
  1445. adox r12,rbp
  1446. mulx rbp,rcx,r8
  1447. mov rdx,r15
  1448. adcx r12,rcx
  1449. adox r13,rbp
  1450. adc r13,0
  1451. mulx r14,rcx,r8
  1452. mov rdx,QWORD[((0+128))+rsi]
  1453. xor r15,r15
  1454. adcx r9,r9
  1455. adox r13,rcx
  1456. adcx r10,r10
  1457. adox r14,r15
  1458. mulx rbp,r8,rdx
  1459. mov rdx,QWORD[((8+128))+rsi]
  1460. adcx r11,r11
  1461. adox r9,rbp
  1462. adcx r12,r12
  1463. mulx rax,rcx,rdx
  1464. mov rdx,QWORD[((16+128))+rsi]
  1465. adcx r13,r13
  1466. adox r10,rcx
  1467. adcx r14,r14
  1468. DB 0x67
  1469. mulx rbp,rcx,rdx
  1470. mov rdx,QWORD[((24+128))+rsi]
  1471. adox r11,rax
  1472. adcx r15,r15
  1473. adox r12,rcx
  1474. mov rsi,32
  1475. adox r13,rbp
  1476. DB 0x67,0x67
  1477. mulx rax,rcx,rdx
  1478. mov rdx,QWORD[(($L$poly+24))]
  1479. adox r14,rcx
  1480. shlx rcx,r8,rsi
  1481. adox r15,rax
  1482. shrx rax,r8,rsi
  1483. mov rbp,rdx
  1484. add r9,rcx
  1485. adc r10,rax
  1486. mulx r8,rcx,r8
  1487. adc r11,rcx
  1488. shlx rcx,r9,rsi
  1489. adc r8,0
  1490. shrx rax,r9,rsi
  1491. add r10,rcx
  1492. adc r11,rax
  1493. mulx r9,rcx,r9
  1494. adc r8,rcx
  1495. shlx rcx,r10,rsi
  1496. adc r9,0
  1497. shrx rax,r10,rsi
  1498. add r11,rcx
  1499. adc r8,rax
  1500. mulx r10,rcx,r10
  1501. adc r9,rcx
  1502. shlx rcx,r11,rsi
  1503. adc r10,0
  1504. shrx rax,r11,rsi
  1505. add r8,rcx
  1506. adc r9,rax
  1507. mulx r11,rcx,r11
  1508. adc r10,rcx
  1509. adc r11,0
  1510. xor rdx,rdx
  1511. add r12,r8
  1512. mov rsi,QWORD[(($L$poly+8))]
  1513. adc r13,r9
  1514. mov r8,r12
  1515. adc r14,r10
  1516. adc r15,r11
  1517. mov r9,r13
  1518. adc rdx,0
  1519. sub r12,-1
  1520. mov r10,r14
  1521. sbb r13,rsi
  1522. sbb r14,0
  1523. mov r11,r15
  1524. sbb r15,rbp
  1525. sbb rdx,0
  1526. cmovc r12,r8
  1527. cmovc r13,r9
  1528. mov QWORD[rdi],r12
  1529. cmovc r14,r10
  1530. mov QWORD[8+rdi],r13
  1531. cmovc r15,r11
  1532. mov QWORD[16+rdi],r14
  1533. mov QWORD[24+rdi],r15
  1534. DB 0F3h,0C3h ;repret
  1535. global ecp_nistz256_select_w5
  1536. ALIGN 32
  1537. ecp_nistz256_select_w5:
  1538. lea rax,[OPENSSL_ia32cap_P]
  1539. mov rax,QWORD[8+rax]
  1540. test eax,32
  1541. jnz NEAR $L$avx2_select_w5
  1542. lea rax,[((-136))+rsp]
  1543. $L$SEH_begin_ecp_nistz256_select_w5:
  1544. DB 0x48,0x8d,0x60,0xe0
  1545. DB 0x0f,0x29,0x70,0xe0
  1546. DB 0x0f,0x29,0x78,0xf0
  1547. DB 0x44,0x0f,0x29,0x00
  1548. DB 0x44,0x0f,0x29,0x48,0x10
  1549. DB 0x44,0x0f,0x29,0x50,0x20
  1550. DB 0x44,0x0f,0x29,0x58,0x30
  1551. DB 0x44,0x0f,0x29,0x60,0x40
  1552. DB 0x44,0x0f,0x29,0x68,0x50
  1553. DB 0x44,0x0f,0x29,0x70,0x60
  1554. DB 0x44,0x0f,0x29,0x78,0x70
  1555. movdqa xmm0,XMMWORD[$L$One]
  1556. movd xmm1,r8d
  1557. pxor xmm2,xmm2
  1558. pxor xmm3,xmm3
  1559. pxor xmm4,xmm4
  1560. pxor xmm5,xmm5
  1561. pxor xmm6,xmm6
  1562. pxor xmm7,xmm7
  1563. movdqa xmm8,xmm0
  1564. pshufd xmm1,xmm1,0
  1565. mov rax,16
  1566. $L$select_loop_sse_w5:
  1567. movdqa xmm15,xmm8
  1568. paddd xmm8,xmm0
  1569. pcmpeqd xmm15,xmm1
  1570. movdqa xmm9,XMMWORD[rdx]
  1571. movdqa xmm10,XMMWORD[16+rdx]
  1572. movdqa xmm11,XMMWORD[32+rdx]
  1573. movdqa xmm12,XMMWORD[48+rdx]
  1574. movdqa xmm13,XMMWORD[64+rdx]
  1575. movdqa xmm14,XMMWORD[80+rdx]
  1576. lea rdx,[96+rdx]
  1577. pand xmm9,xmm15
  1578. pand xmm10,xmm15
  1579. por xmm2,xmm9
  1580. pand xmm11,xmm15
  1581. por xmm3,xmm10
  1582. pand xmm12,xmm15
  1583. por xmm4,xmm11
  1584. pand xmm13,xmm15
  1585. por xmm5,xmm12
  1586. pand xmm14,xmm15
  1587. por xmm6,xmm13
  1588. por xmm7,xmm14
  1589. dec rax
  1590. jnz NEAR $L$select_loop_sse_w5
  1591. movdqu XMMWORD[rcx],xmm2
  1592. movdqu XMMWORD[16+rcx],xmm3
  1593. movdqu XMMWORD[32+rcx],xmm4
  1594. movdqu XMMWORD[48+rcx],xmm5
  1595. movdqu XMMWORD[64+rcx],xmm6
  1596. movdqu XMMWORD[80+rcx],xmm7
  1597. movaps xmm6,XMMWORD[rsp]
  1598. movaps xmm7,XMMWORD[16+rsp]
  1599. movaps xmm8,XMMWORD[32+rsp]
  1600. movaps xmm9,XMMWORD[48+rsp]
  1601. movaps xmm10,XMMWORD[64+rsp]
  1602. movaps xmm11,XMMWORD[80+rsp]
  1603. movaps xmm12,XMMWORD[96+rsp]
  1604. movaps xmm13,XMMWORD[112+rsp]
  1605. movaps xmm14,XMMWORD[128+rsp]
  1606. movaps xmm15,XMMWORD[144+rsp]
  1607. lea rsp,[168+rsp]
  1608. DB 0F3h,0C3h ;repret
  1609. $L$SEH_end_ecp_nistz256_select_w5:
  1610. global ecp_nistz256_select_w7
  1611. ALIGN 32
  1612. ecp_nistz256_select_w7:
  1613. lea rax,[OPENSSL_ia32cap_P]
  1614. mov rax,QWORD[8+rax]
  1615. test eax,32
  1616. jnz NEAR $L$avx2_select_w7
  1617. lea rax,[((-136))+rsp]
  1618. $L$SEH_begin_ecp_nistz256_select_w7:
  1619. DB 0x48,0x8d,0x60,0xe0
  1620. DB 0x0f,0x29,0x70,0xe0
  1621. DB 0x0f,0x29,0x78,0xf0
  1622. DB 0x44,0x0f,0x29,0x00
  1623. DB 0x44,0x0f,0x29,0x48,0x10
  1624. DB 0x44,0x0f,0x29,0x50,0x20
  1625. DB 0x44,0x0f,0x29,0x58,0x30
  1626. DB 0x44,0x0f,0x29,0x60,0x40
  1627. DB 0x44,0x0f,0x29,0x68,0x50
  1628. DB 0x44,0x0f,0x29,0x70,0x60
  1629. DB 0x44,0x0f,0x29,0x78,0x70
  1630. movdqa xmm8,XMMWORD[$L$One]
  1631. movd xmm1,r8d
  1632. pxor xmm2,xmm2
  1633. pxor xmm3,xmm3
  1634. pxor xmm4,xmm4
  1635. pxor xmm5,xmm5
  1636. movdqa xmm0,xmm8
  1637. pshufd xmm1,xmm1,0
  1638. mov rax,64
  1639. $L$select_loop_sse_w7:
  1640. movdqa xmm15,xmm8
  1641. paddd xmm8,xmm0
  1642. movdqa xmm9,XMMWORD[rdx]
  1643. movdqa xmm10,XMMWORD[16+rdx]
  1644. pcmpeqd xmm15,xmm1
  1645. movdqa xmm11,XMMWORD[32+rdx]
  1646. movdqa xmm12,XMMWORD[48+rdx]
  1647. lea rdx,[64+rdx]
  1648. pand xmm9,xmm15
  1649. pand xmm10,xmm15
  1650. por xmm2,xmm9
  1651. pand xmm11,xmm15
  1652. por xmm3,xmm10
  1653. pand xmm12,xmm15
  1654. por xmm4,xmm11
  1655. prefetcht0 [255+rdx]
  1656. por xmm5,xmm12
  1657. dec rax
  1658. jnz NEAR $L$select_loop_sse_w7
  1659. movdqu XMMWORD[rcx],xmm2
  1660. movdqu XMMWORD[16+rcx],xmm3
  1661. movdqu XMMWORD[32+rcx],xmm4
  1662. movdqu XMMWORD[48+rcx],xmm5
  1663. movaps xmm6,XMMWORD[rsp]
  1664. movaps xmm7,XMMWORD[16+rsp]
  1665. movaps xmm8,XMMWORD[32+rsp]
  1666. movaps xmm9,XMMWORD[48+rsp]
  1667. movaps xmm10,XMMWORD[64+rsp]
  1668. movaps xmm11,XMMWORD[80+rsp]
  1669. movaps xmm12,XMMWORD[96+rsp]
  1670. movaps xmm13,XMMWORD[112+rsp]
  1671. movaps xmm14,XMMWORD[128+rsp]
  1672. movaps xmm15,XMMWORD[144+rsp]
  1673. lea rsp,[168+rsp]
  1674. DB 0F3h,0C3h ;repret
  1675. $L$SEH_end_ecp_nistz256_select_w7:
  1676. ALIGN 32
  1677. ecp_nistz256_avx2_select_w5:
  1678. $L$avx2_select_w5:
  1679. vzeroupper
  1680. lea rax,[((-136))+rsp]
  1681. mov r11,rsp
  1682. $L$SEH_begin_ecp_nistz256_avx2_select_w5:
  1683. DB 0x48,0x8d,0x60,0xe0
  1684. DB 0xc5,0xf8,0x29,0x70,0xe0
  1685. DB 0xc5,0xf8,0x29,0x78,0xf0
  1686. DB 0xc5,0x78,0x29,0x40,0x00
  1687. DB 0xc5,0x78,0x29,0x48,0x10
  1688. DB 0xc5,0x78,0x29,0x50,0x20
  1689. DB 0xc5,0x78,0x29,0x58,0x30
  1690. DB 0xc5,0x78,0x29,0x60,0x40
  1691. DB 0xc5,0x78,0x29,0x68,0x50
  1692. DB 0xc5,0x78,0x29,0x70,0x60
  1693. DB 0xc5,0x78,0x29,0x78,0x70
  1694. vmovdqa ymm0,YMMWORD[$L$Two]
  1695. vpxor ymm2,ymm2,ymm2
  1696. vpxor ymm3,ymm3,ymm3
  1697. vpxor ymm4,ymm4,ymm4
  1698. vmovdqa ymm5,YMMWORD[$L$One]
  1699. vmovdqa ymm10,YMMWORD[$L$Two]
  1700. vmovd xmm1,r8d
  1701. vpermd ymm1,ymm2,ymm1
  1702. mov rax,8
  1703. $L$select_loop_avx2_w5:
  1704. vmovdqa ymm6,YMMWORD[rdx]
  1705. vmovdqa ymm7,YMMWORD[32+rdx]
  1706. vmovdqa ymm8,YMMWORD[64+rdx]
  1707. vmovdqa ymm11,YMMWORD[96+rdx]
  1708. vmovdqa ymm12,YMMWORD[128+rdx]
  1709. vmovdqa ymm13,YMMWORD[160+rdx]
  1710. vpcmpeqd ymm9,ymm5,ymm1
  1711. vpcmpeqd ymm14,ymm10,ymm1
  1712. vpaddd ymm5,ymm5,ymm0
  1713. vpaddd ymm10,ymm10,ymm0
  1714. lea rdx,[192+rdx]
  1715. vpand ymm6,ymm6,ymm9
  1716. vpand ymm7,ymm7,ymm9
  1717. vpand ymm8,ymm8,ymm9
  1718. vpand ymm11,ymm11,ymm14
  1719. vpand ymm12,ymm12,ymm14
  1720. vpand ymm13,ymm13,ymm14
  1721. vpxor ymm2,ymm2,ymm6
  1722. vpxor ymm3,ymm3,ymm7
  1723. vpxor ymm4,ymm4,ymm8
  1724. vpxor ymm2,ymm2,ymm11
  1725. vpxor ymm3,ymm3,ymm12
  1726. vpxor ymm4,ymm4,ymm13
  1727. dec rax
  1728. jnz NEAR $L$select_loop_avx2_w5
  1729. vmovdqu YMMWORD[rcx],ymm2
  1730. vmovdqu YMMWORD[32+rcx],ymm3
  1731. vmovdqu YMMWORD[64+rcx],ymm4
  1732. vzeroupper
  1733. movaps xmm6,XMMWORD[rsp]
  1734. movaps xmm7,XMMWORD[16+rsp]
  1735. movaps xmm8,XMMWORD[32+rsp]
  1736. movaps xmm9,XMMWORD[48+rsp]
  1737. movaps xmm10,XMMWORD[64+rsp]
  1738. movaps xmm11,XMMWORD[80+rsp]
  1739. movaps xmm12,XMMWORD[96+rsp]
  1740. movaps xmm13,XMMWORD[112+rsp]
  1741. movaps xmm14,XMMWORD[128+rsp]
  1742. movaps xmm15,XMMWORD[144+rsp]
  1743. lea rsp,[r11]
  1744. DB 0F3h,0C3h ;repret
  1745. $L$SEH_end_ecp_nistz256_avx2_select_w5:
  1746. global ecp_nistz256_avx2_select_w7
  1747. ALIGN 32
  1748. ecp_nistz256_avx2_select_w7:
  1749. $L$avx2_select_w7:
  1750. vzeroupper
  1751. mov r11,rsp
  1752. lea rax,[((-136))+rsp]
  1753. $L$SEH_begin_ecp_nistz256_avx2_select_w7:
  1754. DB 0x48,0x8d,0x60,0xe0
  1755. DB 0xc5,0xf8,0x29,0x70,0xe0
  1756. DB 0xc5,0xf8,0x29,0x78,0xf0
  1757. DB 0xc5,0x78,0x29,0x40,0x00
  1758. DB 0xc5,0x78,0x29,0x48,0x10
  1759. DB 0xc5,0x78,0x29,0x50,0x20
  1760. DB 0xc5,0x78,0x29,0x58,0x30
  1761. DB 0xc5,0x78,0x29,0x60,0x40
  1762. DB 0xc5,0x78,0x29,0x68,0x50
  1763. DB 0xc5,0x78,0x29,0x70,0x60
  1764. DB 0xc5,0x78,0x29,0x78,0x70
  1765. vmovdqa ymm0,YMMWORD[$L$Three]
  1766. vpxor ymm2,ymm2,ymm2
  1767. vpxor ymm3,ymm3,ymm3
  1768. vmovdqa ymm4,YMMWORD[$L$One]
  1769. vmovdqa ymm8,YMMWORD[$L$Two]
  1770. vmovdqa ymm12,YMMWORD[$L$Three]
  1771. vmovd xmm1,r8d
  1772. vpermd ymm1,ymm2,ymm1
  1773. mov rax,21
  1774. $L$select_loop_avx2_w7:
  1775. vmovdqa ymm5,YMMWORD[rdx]
  1776. vmovdqa ymm6,YMMWORD[32+rdx]
  1777. vmovdqa ymm9,YMMWORD[64+rdx]
  1778. vmovdqa ymm10,YMMWORD[96+rdx]
  1779. vmovdqa ymm13,YMMWORD[128+rdx]
  1780. vmovdqa ymm14,YMMWORD[160+rdx]
  1781. vpcmpeqd ymm7,ymm4,ymm1
  1782. vpcmpeqd ymm11,ymm8,ymm1
  1783. vpcmpeqd ymm15,ymm12,ymm1
  1784. vpaddd ymm4,ymm4,ymm0
  1785. vpaddd ymm8,ymm8,ymm0
  1786. vpaddd ymm12,ymm12,ymm0
  1787. lea rdx,[192+rdx]
  1788. vpand ymm5,ymm5,ymm7
  1789. vpand ymm6,ymm6,ymm7
  1790. vpand ymm9,ymm9,ymm11
  1791. vpand ymm10,ymm10,ymm11
  1792. vpand ymm13,ymm13,ymm15
  1793. vpand ymm14,ymm14,ymm15
  1794. vpxor ymm2,ymm2,ymm5
  1795. vpxor ymm3,ymm3,ymm6
  1796. vpxor ymm2,ymm2,ymm9
  1797. vpxor ymm3,ymm3,ymm10
  1798. vpxor ymm2,ymm2,ymm13
  1799. vpxor ymm3,ymm3,ymm14
  1800. dec rax
  1801. jnz NEAR $L$select_loop_avx2_w7
  1802. vmovdqa ymm5,YMMWORD[rdx]
  1803. vmovdqa ymm6,YMMWORD[32+rdx]
  1804. vpcmpeqd ymm7,ymm4,ymm1
  1805. vpand ymm5,ymm5,ymm7
  1806. vpand ymm6,ymm6,ymm7
  1807. vpxor ymm2,ymm2,ymm5
  1808. vpxor ymm3,ymm3,ymm6
  1809. vmovdqu YMMWORD[rcx],ymm2
  1810. vmovdqu YMMWORD[32+rcx],ymm3
  1811. vzeroupper
  1812. movaps xmm6,XMMWORD[rsp]
  1813. movaps xmm7,XMMWORD[16+rsp]
  1814. movaps xmm8,XMMWORD[32+rsp]
  1815. movaps xmm9,XMMWORD[48+rsp]
  1816. movaps xmm10,XMMWORD[64+rsp]
  1817. movaps xmm11,XMMWORD[80+rsp]
  1818. movaps xmm12,XMMWORD[96+rsp]
  1819. movaps xmm13,XMMWORD[112+rsp]
  1820. movaps xmm14,XMMWORD[128+rsp]
  1821. movaps xmm15,XMMWORD[144+rsp]
  1822. lea rsp,[r11]
  1823. DB 0F3h,0C3h ;repret
  1824. $L$SEH_end_ecp_nistz256_avx2_select_w7:
  1825. ALIGN 32
  1826. __ecp_nistz256_add_toq:
  1827. xor r11,r11
  1828. add r12,QWORD[rbx]
  1829. adc r13,QWORD[8+rbx]
  1830. mov rax,r12
  1831. adc r8,QWORD[16+rbx]
  1832. adc r9,QWORD[24+rbx]
  1833. mov rbp,r13
  1834. adc r11,0
  1835. sub r12,-1
  1836. mov rcx,r8
  1837. sbb r13,r14
  1838. sbb r8,0
  1839. mov r10,r9
  1840. sbb r9,r15
  1841. sbb r11,0
  1842. cmovc r12,rax
  1843. cmovc r13,rbp
  1844. mov QWORD[rdi],r12
  1845. cmovc r8,rcx
  1846. mov QWORD[8+rdi],r13
  1847. cmovc r9,r10
  1848. mov QWORD[16+rdi],r8
  1849. mov QWORD[24+rdi],r9
  1850. DB 0F3h,0C3h ;repret
  1851. ALIGN 32
  1852. __ecp_nistz256_sub_fromq:
  1853. sub r12,QWORD[rbx]
  1854. sbb r13,QWORD[8+rbx]
  1855. mov rax,r12
  1856. sbb r8,QWORD[16+rbx]
  1857. sbb r9,QWORD[24+rbx]
  1858. mov rbp,r13
  1859. sbb r11,r11
  1860. add r12,-1
  1861. mov rcx,r8
  1862. adc r13,r14
  1863. adc r8,0
  1864. mov r10,r9
  1865. adc r9,r15
  1866. test r11,r11
  1867. cmovz r12,rax
  1868. cmovz r13,rbp
  1869. mov QWORD[rdi],r12
  1870. cmovz r8,rcx
  1871. mov QWORD[8+rdi],r13
  1872. cmovz r9,r10
  1873. mov QWORD[16+rdi],r8
  1874. mov QWORD[24+rdi],r9
  1875. DB 0F3h,0C3h ;repret
  1876. ALIGN 32
  1877. __ecp_nistz256_subq:
  1878. sub rax,r12
  1879. sbb rbp,r13
  1880. mov r12,rax
  1881. sbb rcx,r8
  1882. sbb r10,r9
  1883. mov r13,rbp
  1884. sbb r11,r11
  1885. add rax,-1
  1886. mov r8,rcx
  1887. adc rbp,r14
  1888. adc rcx,0
  1889. mov r9,r10
  1890. adc r10,r15
  1891. test r11,r11
  1892. cmovnz r12,rax
  1893. cmovnz r13,rbp
  1894. cmovnz r8,rcx
  1895. cmovnz r9,r10
  1896. DB 0F3h,0C3h ;repret
  1897. ALIGN 32
  1898. __ecp_nistz256_mul_by_2q:
  1899. xor r11,r11
  1900. add r12,r12
  1901. adc r13,r13
  1902. mov rax,r12
  1903. adc r8,r8
  1904. adc r9,r9
  1905. mov rbp,r13
  1906. adc r11,0
  1907. sub r12,-1
  1908. mov rcx,r8
  1909. sbb r13,r14
  1910. sbb r8,0
  1911. mov r10,r9
  1912. sbb r9,r15
  1913. sbb r11,0
  1914. cmovc r12,rax
  1915. cmovc r13,rbp
  1916. mov QWORD[rdi],r12
  1917. cmovc r8,rcx
  1918. mov QWORD[8+rdi],r13
  1919. cmovc r9,r10
  1920. mov QWORD[16+rdi],r8
  1921. mov QWORD[24+rdi],r9
  1922. DB 0F3h,0C3h ;repret
  1923. global ecp_nistz256_point_double
  1924. ALIGN 32
  1925. ecp_nistz256_point_double:
  1926. mov QWORD[8+rsp],rdi ;WIN64 prologue
  1927. mov QWORD[16+rsp],rsi
  1928. mov rax,rsp
  1929. $L$SEH_begin_ecp_nistz256_point_double:
  1930. mov rdi,rcx
  1931. mov rsi,rdx
  1932. lea rcx,[OPENSSL_ia32cap_P]
  1933. mov rcx,QWORD[8+rcx]
  1934. and ecx,0x80100
  1935. cmp ecx,0x80100
  1936. je NEAR $L$point_doublex
  1937. push rbp
  1938. push rbx
  1939. push r12
  1940. push r13
  1941. push r14
  1942. push r15
  1943. sub rsp,32*5+8
  1944. $L$point_doubleq_body:
  1945. $L$point_double_shortcutq:
  1946. movdqu xmm0,XMMWORD[rsi]
  1947. mov rbx,rsi
  1948. movdqu xmm1,XMMWORD[16+rsi]
  1949. mov r12,QWORD[((32+0))+rsi]
  1950. mov r13,QWORD[((32+8))+rsi]
  1951. mov r8,QWORD[((32+16))+rsi]
  1952. mov r9,QWORD[((32+24))+rsi]
  1953. mov r14,QWORD[(($L$poly+8))]
  1954. mov r15,QWORD[(($L$poly+24))]
  1955. movdqa XMMWORD[96+rsp],xmm0
  1956. movdqa XMMWORD[(96+16)+rsp],xmm1
  1957. lea r10,[32+rdi]
  1958. lea r11,[64+rdi]
  1959. DB 102,72,15,110,199
  1960. DB 102,73,15,110,202
  1961. DB 102,73,15,110,211
  1962. lea rdi,[rsp]
  1963. call __ecp_nistz256_mul_by_2q
  1964. mov rax,QWORD[((64+0))+rsi]
  1965. mov r14,QWORD[((64+8))+rsi]
  1966. mov r15,QWORD[((64+16))+rsi]
  1967. mov r8,QWORD[((64+24))+rsi]
  1968. lea rsi,[((64-0))+rsi]
  1969. lea rdi,[64+rsp]
  1970. call __ecp_nistz256_sqr_montq
  1971. mov rax,QWORD[((0+0))+rsp]
  1972. mov r14,QWORD[((8+0))+rsp]
  1973. lea rsi,[((0+0))+rsp]
  1974. mov r15,QWORD[((16+0))+rsp]
  1975. mov r8,QWORD[((24+0))+rsp]
  1976. lea rdi,[rsp]
  1977. call __ecp_nistz256_sqr_montq
  1978. mov rax,QWORD[32+rbx]
  1979. mov r9,QWORD[((64+0))+rbx]
  1980. mov r10,QWORD[((64+8))+rbx]
  1981. mov r11,QWORD[((64+16))+rbx]
  1982. mov r12,QWORD[((64+24))+rbx]
  1983. lea rsi,[((64-0))+rbx]
  1984. lea rbx,[32+rbx]
  1985. DB 102,72,15,126,215
  1986. call __ecp_nistz256_mul_montq
  1987. call __ecp_nistz256_mul_by_2q
  1988. mov r12,QWORD[((96+0))+rsp]
  1989. mov r13,QWORD[((96+8))+rsp]
  1990. lea rbx,[64+rsp]
  1991. mov r8,QWORD[((96+16))+rsp]
  1992. mov r9,QWORD[((96+24))+rsp]
  1993. lea rdi,[32+rsp]
  1994. call __ecp_nistz256_add_toq
  1995. mov r12,QWORD[((96+0))+rsp]
  1996. mov r13,QWORD[((96+8))+rsp]
  1997. lea rbx,[64+rsp]
  1998. mov r8,QWORD[((96+16))+rsp]
  1999. mov r9,QWORD[((96+24))+rsp]
  2000. lea rdi,[64+rsp]
  2001. call __ecp_nistz256_sub_fromq
  2002. mov rax,QWORD[((0+0))+rsp]
  2003. mov r14,QWORD[((8+0))+rsp]
  2004. lea rsi,[((0+0))+rsp]
  2005. mov r15,QWORD[((16+0))+rsp]
  2006. mov r8,QWORD[((24+0))+rsp]
  2007. DB 102,72,15,126,207
  2008. call __ecp_nistz256_sqr_montq
  2009. xor r9,r9
  2010. mov rax,r12
  2011. add r12,-1
  2012. mov r10,r13
  2013. adc r13,rsi
  2014. mov rcx,r14
  2015. adc r14,0
  2016. mov r8,r15
  2017. adc r15,rbp
  2018. adc r9,0
  2019. xor rsi,rsi
  2020. test rax,1
  2021. cmovz r12,rax
  2022. cmovz r13,r10
  2023. cmovz r14,rcx
  2024. cmovz r15,r8
  2025. cmovz r9,rsi
  2026. mov rax,r13
  2027. shr r12,1
  2028. shl rax,63
  2029. mov r10,r14
  2030. shr r13,1
  2031. or r12,rax
  2032. shl r10,63
  2033. mov rcx,r15
  2034. shr r14,1
  2035. or r13,r10
  2036. shl rcx,63
  2037. mov QWORD[rdi],r12
  2038. shr r15,1
  2039. mov QWORD[8+rdi],r13
  2040. shl r9,63
  2041. or r14,rcx
  2042. or r15,r9
  2043. mov QWORD[16+rdi],r14
  2044. mov QWORD[24+rdi],r15
  2045. mov rax,QWORD[64+rsp]
  2046. lea rbx,[64+rsp]
  2047. mov r9,QWORD[((0+32))+rsp]
  2048. mov r10,QWORD[((8+32))+rsp]
  2049. lea rsi,[((0+32))+rsp]
  2050. mov r11,QWORD[((16+32))+rsp]
  2051. mov r12,QWORD[((24+32))+rsp]
  2052. lea rdi,[32+rsp]
  2053. call __ecp_nistz256_mul_montq
  2054. lea rdi,[128+rsp]
  2055. call __ecp_nistz256_mul_by_2q
  2056. lea rbx,[32+rsp]
  2057. lea rdi,[32+rsp]
  2058. call __ecp_nistz256_add_toq
  2059. mov rax,QWORD[96+rsp]
  2060. lea rbx,[96+rsp]
  2061. mov r9,QWORD[((0+0))+rsp]
  2062. mov r10,QWORD[((8+0))+rsp]
  2063. lea rsi,[((0+0))+rsp]
  2064. mov r11,QWORD[((16+0))+rsp]
  2065. mov r12,QWORD[((24+0))+rsp]
  2066. lea rdi,[rsp]
  2067. call __ecp_nistz256_mul_montq
  2068. lea rdi,[128+rsp]
  2069. call __ecp_nistz256_mul_by_2q
  2070. mov rax,QWORD[((0+32))+rsp]
  2071. mov r14,QWORD[((8+32))+rsp]
  2072. lea rsi,[((0+32))+rsp]
  2073. mov r15,QWORD[((16+32))+rsp]
  2074. mov r8,QWORD[((24+32))+rsp]
  2075. DB 102,72,15,126,199
  2076. call __ecp_nistz256_sqr_montq
  2077. lea rbx,[128+rsp]
  2078. mov r8,r14
  2079. mov r9,r15
  2080. mov r14,rsi
  2081. mov r15,rbp
  2082. call __ecp_nistz256_sub_fromq
  2083. mov rax,QWORD[((0+0))+rsp]
  2084. mov rbp,QWORD[((0+8))+rsp]
  2085. mov rcx,QWORD[((0+16))+rsp]
  2086. mov r10,QWORD[((0+24))+rsp]
  2087. lea rdi,[rsp]
  2088. call __ecp_nistz256_subq
  2089. mov rax,QWORD[32+rsp]
  2090. lea rbx,[32+rsp]
  2091. mov r14,r12
  2092. xor ecx,ecx
  2093. mov QWORD[((0+0))+rsp],r12
  2094. mov r10,r13
  2095. mov QWORD[((0+8))+rsp],r13
  2096. cmovz r11,r8
  2097. mov QWORD[((0+16))+rsp],r8
  2098. lea rsi,[((0-0))+rsp]
  2099. cmovz r12,r9
  2100. mov QWORD[((0+24))+rsp],r9
  2101. mov r9,r14
  2102. lea rdi,[rsp]
  2103. call __ecp_nistz256_mul_montq
  2104. DB 102,72,15,126,203
  2105. DB 102,72,15,126,207
  2106. call __ecp_nistz256_sub_fromq
  2107. lea rsi,[((160+56))+rsp]
  2108. mov r15,QWORD[((-48))+rsi]
  2109. mov r14,QWORD[((-40))+rsi]
  2110. mov r13,QWORD[((-32))+rsi]
  2111. mov r12,QWORD[((-24))+rsi]
  2112. mov rbx,QWORD[((-16))+rsi]
  2113. mov rbp,QWORD[((-8))+rsi]
  2114. lea rsp,[rsi]
  2115. $L$point_doubleq_epilogue:
  2116. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  2117. mov rsi,QWORD[16+rsp]
  2118. DB 0F3h,0C3h ;repret
  2119. $L$SEH_end_ecp_nistz256_point_double:
  2120. global ecp_nistz256_point_add
  2121. ALIGN 32
  2122. ecp_nistz256_point_add:
  2123. mov QWORD[8+rsp],rdi ;WIN64 prologue
  2124. mov QWORD[16+rsp],rsi
  2125. mov rax,rsp
  2126. $L$SEH_begin_ecp_nistz256_point_add:
  2127. mov rdi,rcx
  2128. mov rsi,rdx
  2129. mov rdx,r8
  2130. lea rcx,[OPENSSL_ia32cap_P]
  2131. mov rcx,QWORD[8+rcx]
  2132. and ecx,0x80100
  2133. cmp ecx,0x80100
  2134. je NEAR $L$point_addx
  2135. push rbp
  2136. push rbx
  2137. push r12
  2138. push r13
  2139. push r14
  2140. push r15
  2141. sub rsp,32*18+8
  2142. $L$point_addq_body:
  2143. movdqu xmm0,XMMWORD[rsi]
  2144. movdqu xmm1,XMMWORD[16+rsi]
  2145. movdqu xmm2,XMMWORD[32+rsi]
  2146. movdqu xmm3,XMMWORD[48+rsi]
  2147. movdqu xmm4,XMMWORD[64+rsi]
  2148. movdqu xmm5,XMMWORD[80+rsi]
  2149. mov rbx,rsi
  2150. mov rsi,rdx
  2151. movdqa XMMWORD[384+rsp],xmm0
  2152. movdqa XMMWORD[(384+16)+rsp],xmm1
  2153. movdqa XMMWORD[416+rsp],xmm2
  2154. movdqa XMMWORD[(416+16)+rsp],xmm3
  2155. movdqa XMMWORD[448+rsp],xmm4
  2156. movdqa XMMWORD[(448+16)+rsp],xmm5
  2157. por xmm5,xmm4
  2158. movdqu xmm0,XMMWORD[rsi]
  2159. pshufd xmm3,xmm5,0xb1
  2160. movdqu xmm1,XMMWORD[16+rsi]
  2161. movdqu xmm2,XMMWORD[32+rsi]
  2162. por xmm5,xmm3
  2163. movdqu xmm3,XMMWORD[48+rsi]
  2164. mov rax,QWORD[((64+0))+rsi]
  2165. mov r14,QWORD[((64+8))+rsi]
  2166. mov r15,QWORD[((64+16))+rsi]
  2167. mov r8,QWORD[((64+24))+rsi]
  2168. movdqa XMMWORD[480+rsp],xmm0
  2169. pshufd xmm4,xmm5,0x1e
  2170. movdqa XMMWORD[(480+16)+rsp],xmm1
  2171. movdqu xmm0,XMMWORD[64+rsi]
  2172. movdqu xmm1,XMMWORD[80+rsi]
  2173. movdqa XMMWORD[512+rsp],xmm2
  2174. movdqa XMMWORD[(512+16)+rsp],xmm3
  2175. por xmm5,xmm4
  2176. pxor xmm4,xmm4
  2177. por xmm1,xmm0
  2178. DB 102,72,15,110,199
  2179. lea rsi,[((64-0))+rsi]
  2180. mov QWORD[((544+0))+rsp],rax
  2181. mov QWORD[((544+8))+rsp],r14
  2182. mov QWORD[((544+16))+rsp],r15
  2183. mov QWORD[((544+24))+rsp],r8
  2184. lea rdi,[96+rsp]
  2185. call __ecp_nistz256_sqr_montq
  2186. pcmpeqd xmm5,xmm4
  2187. pshufd xmm4,xmm1,0xb1
  2188. por xmm4,xmm1
  2189. pshufd xmm5,xmm5,0
  2190. pshufd xmm3,xmm4,0x1e
  2191. por xmm4,xmm3
  2192. pxor xmm3,xmm3
  2193. pcmpeqd xmm4,xmm3
  2194. pshufd xmm4,xmm4,0
  2195. mov rax,QWORD[((64+0))+rbx]
  2196. mov r14,QWORD[((64+8))+rbx]
  2197. mov r15,QWORD[((64+16))+rbx]
  2198. mov r8,QWORD[((64+24))+rbx]
  2199. DB 102,72,15,110,203
  2200. lea rsi,[((64-0))+rbx]
  2201. lea rdi,[32+rsp]
  2202. call __ecp_nistz256_sqr_montq
  2203. mov rax,QWORD[544+rsp]
  2204. lea rbx,[544+rsp]
  2205. mov r9,QWORD[((0+96))+rsp]
  2206. mov r10,QWORD[((8+96))+rsp]
  2207. lea rsi,[((0+96))+rsp]
  2208. mov r11,QWORD[((16+96))+rsp]
  2209. mov r12,QWORD[((24+96))+rsp]
  2210. lea rdi,[224+rsp]
  2211. call __ecp_nistz256_mul_montq
  2212. mov rax,QWORD[448+rsp]
  2213. lea rbx,[448+rsp]
  2214. mov r9,QWORD[((0+32))+rsp]
  2215. mov r10,QWORD[((8+32))+rsp]
  2216. lea rsi,[((0+32))+rsp]
  2217. mov r11,QWORD[((16+32))+rsp]
  2218. mov r12,QWORD[((24+32))+rsp]
  2219. lea rdi,[256+rsp]
  2220. call __ecp_nistz256_mul_montq
  2221. mov rax,QWORD[416+rsp]
  2222. lea rbx,[416+rsp]
  2223. mov r9,QWORD[((0+224))+rsp]
  2224. mov r10,QWORD[((8+224))+rsp]
  2225. lea rsi,[((0+224))+rsp]
  2226. mov r11,QWORD[((16+224))+rsp]
  2227. mov r12,QWORD[((24+224))+rsp]
  2228. lea rdi,[224+rsp]
  2229. call __ecp_nistz256_mul_montq
  2230. mov rax,QWORD[512+rsp]
  2231. lea rbx,[512+rsp]
  2232. mov r9,QWORD[((0+256))+rsp]
  2233. mov r10,QWORD[((8+256))+rsp]
  2234. lea rsi,[((0+256))+rsp]
  2235. mov r11,QWORD[((16+256))+rsp]
  2236. mov r12,QWORD[((24+256))+rsp]
  2237. lea rdi,[256+rsp]
  2238. call __ecp_nistz256_mul_montq
  2239. lea rbx,[224+rsp]
  2240. lea rdi,[64+rsp]
  2241. call __ecp_nistz256_sub_fromq
  2242. or r12,r13
  2243. movdqa xmm2,xmm4
  2244. or r12,r8
  2245. or r12,r9
  2246. por xmm2,xmm5
  2247. DB 102,73,15,110,220
  2248. mov rax,QWORD[384+rsp]
  2249. lea rbx,[384+rsp]
  2250. mov r9,QWORD[((0+96))+rsp]
  2251. mov r10,QWORD[((8+96))+rsp]
  2252. lea rsi,[((0+96))+rsp]
  2253. mov r11,QWORD[((16+96))+rsp]
  2254. mov r12,QWORD[((24+96))+rsp]
  2255. lea rdi,[160+rsp]
  2256. call __ecp_nistz256_mul_montq
  2257. mov rax,QWORD[480+rsp]
  2258. lea rbx,[480+rsp]
  2259. mov r9,QWORD[((0+32))+rsp]
  2260. mov r10,QWORD[((8+32))+rsp]
  2261. lea rsi,[((0+32))+rsp]
  2262. mov r11,QWORD[((16+32))+rsp]
  2263. mov r12,QWORD[((24+32))+rsp]
  2264. lea rdi,[192+rsp]
  2265. call __ecp_nistz256_mul_montq
  2266. lea rbx,[160+rsp]
  2267. lea rdi,[rsp]
  2268. call __ecp_nistz256_sub_fromq
  2269. or r12,r13
  2270. or r12,r8
  2271. or r12,r9
  2272. DB 102,73,15,126,208
  2273. DB 102,73,15,126,217
  2274. or r12,r8
  2275. DB 0x3e
  2276. jnz NEAR $L$add_proceedq
  2277. test r9,r9
  2278. jz NEAR $L$add_doubleq
  2279. DB 102,72,15,126,199
  2280. pxor xmm0,xmm0
  2281. movdqu XMMWORD[rdi],xmm0
  2282. movdqu XMMWORD[16+rdi],xmm0
  2283. movdqu XMMWORD[32+rdi],xmm0
  2284. movdqu XMMWORD[48+rdi],xmm0
  2285. movdqu XMMWORD[64+rdi],xmm0
  2286. movdqu XMMWORD[80+rdi],xmm0
  2287. jmp NEAR $L$add_doneq
  2288. ALIGN 32
  2289. $L$add_doubleq:
  2290. DB 102,72,15,126,206
  2291. DB 102,72,15,126,199
  2292. add rsp,416
  2293. jmp NEAR $L$point_double_shortcutq
  2294. ALIGN 32
  2295. $L$add_proceedq:
  2296. mov rax,QWORD[((0+64))+rsp]
  2297. mov r14,QWORD[((8+64))+rsp]
  2298. lea rsi,[((0+64))+rsp]
  2299. mov r15,QWORD[((16+64))+rsp]
  2300. mov r8,QWORD[((24+64))+rsp]
  2301. lea rdi,[96+rsp]
  2302. call __ecp_nistz256_sqr_montq
  2303. mov rax,QWORD[448+rsp]
  2304. lea rbx,[448+rsp]
  2305. mov r9,QWORD[((0+0))+rsp]
  2306. mov r10,QWORD[((8+0))+rsp]
  2307. lea rsi,[((0+0))+rsp]
  2308. mov r11,QWORD[((16+0))+rsp]
  2309. mov r12,QWORD[((24+0))+rsp]
  2310. lea rdi,[352+rsp]
  2311. call __ecp_nistz256_mul_montq
  2312. mov rax,QWORD[((0+0))+rsp]
  2313. mov r14,QWORD[((8+0))+rsp]
  2314. lea rsi,[((0+0))+rsp]
  2315. mov r15,QWORD[((16+0))+rsp]
  2316. mov r8,QWORD[((24+0))+rsp]
  2317. lea rdi,[32+rsp]
  2318. call __ecp_nistz256_sqr_montq
  2319. mov rax,QWORD[544+rsp]
  2320. lea rbx,[544+rsp]
  2321. mov r9,QWORD[((0+352))+rsp]
  2322. mov r10,QWORD[((8+352))+rsp]
  2323. lea rsi,[((0+352))+rsp]
  2324. mov r11,QWORD[((16+352))+rsp]
  2325. mov r12,QWORD[((24+352))+rsp]
  2326. lea rdi,[352+rsp]
  2327. call __ecp_nistz256_mul_montq
  2328. mov rax,QWORD[rsp]
  2329. lea rbx,[rsp]
  2330. mov r9,QWORD[((0+32))+rsp]
  2331. mov r10,QWORD[((8+32))+rsp]
  2332. lea rsi,[((0+32))+rsp]
  2333. mov r11,QWORD[((16+32))+rsp]
  2334. mov r12,QWORD[((24+32))+rsp]
  2335. lea rdi,[128+rsp]
  2336. call __ecp_nistz256_mul_montq
  2337. mov rax,QWORD[160+rsp]
  2338. lea rbx,[160+rsp]
  2339. mov r9,QWORD[((0+32))+rsp]
  2340. mov r10,QWORD[((8+32))+rsp]
  2341. lea rsi,[((0+32))+rsp]
  2342. mov r11,QWORD[((16+32))+rsp]
  2343. mov r12,QWORD[((24+32))+rsp]
  2344. lea rdi,[192+rsp]
  2345. call __ecp_nistz256_mul_montq
  2346. xor r11,r11
  2347. add r12,r12
  2348. lea rsi,[96+rsp]
  2349. adc r13,r13
  2350. mov rax,r12
  2351. adc r8,r8
  2352. adc r9,r9
  2353. mov rbp,r13
  2354. adc r11,0
  2355. sub r12,-1
  2356. mov rcx,r8
  2357. sbb r13,r14
  2358. sbb r8,0
  2359. mov r10,r9
  2360. sbb r9,r15
  2361. sbb r11,0
  2362. cmovc r12,rax
  2363. mov rax,QWORD[rsi]
  2364. cmovc r13,rbp
  2365. mov rbp,QWORD[8+rsi]
  2366. cmovc r8,rcx
  2367. mov rcx,QWORD[16+rsi]
  2368. cmovc r9,r10
  2369. mov r10,QWORD[24+rsi]
  2370. call __ecp_nistz256_subq
  2371. lea rbx,[128+rsp]
  2372. lea rdi,[288+rsp]
  2373. call __ecp_nistz256_sub_fromq
  2374. mov rax,QWORD[((192+0))+rsp]
  2375. mov rbp,QWORD[((192+8))+rsp]
  2376. mov rcx,QWORD[((192+16))+rsp]
  2377. mov r10,QWORD[((192+24))+rsp]
  2378. lea rdi,[320+rsp]
  2379. call __ecp_nistz256_subq
  2380. mov QWORD[rdi],r12
  2381. mov QWORD[8+rdi],r13
  2382. mov QWORD[16+rdi],r8
  2383. mov QWORD[24+rdi],r9
  2384. mov rax,QWORD[128+rsp]
  2385. lea rbx,[128+rsp]
  2386. mov r9,QWORD[((0+224))+rsp]
  2387. mov r10,QWORD[((8+224))+rsp]
  2388. lea rsi,[((0+224))+rsp]
  2389. mov r11,QWORD[((16+224))+rsp]
  2390. mov r12,QWORD[((24+224))+rsp]
  2391. lea rdi,[256+rsp]
  2392. call __ecp_nistz256_mul_montq
  2393. mov rax,QWORD[320+rsp]
  2394. lea rbx,[320+rsp]
  2395. mov r9,QWORD[((0+64))+rsp]
  2396. mov r10,QWORD[((8+64))+rsp]
  2397. lea rsi,[((0+64))+rsp]
  2398. mov r11,QWORD[((16+64))+rsp]
  2399. mov r12,QWORD[((24+64))+rsp]
  2400. lea rdi,[320+rsp]
  2401. call __ecp_nistz256_mul_montq
  2402. lea rbx,[256+rsp]
  2403. lea rdi,[320+rsp]
  2404. call __ecp_nistz256_sub_fromq
  2405. DB 102,72,15,126,199
  2406. movdqa xmm0,xmm5
  2407. movdqa xmm1,xmm5
  2408. pandn xmm0,XMMWORD[352+rsp]
  2409. movdqa xmm2,xmm5
  2410. pandn xmm1,XMMWORD[((352+16))+rsp]
  2411. movdqa xmm3,xmm5
  2412. pand xmm2,XMMWORD[544+rsp]
  2413. pand xmm3,XMMWORD[((544+16))+rsp]
  2414. por xmm2,xmm0
  2415. por xmm3,xmm1
  2416. movdqa xmm0,xmm4
  2417. movdqa xmm1,xmm4
  2418. pandn xmm0,xmm2
  2419. movdqa xmm2,xmm4
  2420. pandn xmm1,xmm3
  2421. movdqa xmm3,xmm4
  2422. pand xmm2,XMMWORD[448+rsp]
  2423. pand xmm3,XMMWORD[((448+16))+rsp]
  2424. por xmm2,xmm0
  2425. por xmm3,xmm1
  2426. movdqu XMMWORD[64+rdi],xmm2
  2427. movdqu XMMWORD[80+rdi],xmm3
  2428. movdqa xmm0,xmm5
  2429. movdqa xmm1,xmm5
  2430. pandn xmm0,XMMWORD[288+rsp]
  2431. movdqa xmm2,xmm5
  2432. pandn xmm1,XMMWORD[((288+16))+rsp]
  2433. movdqa xmm3,xmm5
  2434. pand xmm2,XMMWORD[480+rsp]
  2435. pand xmm3,XMMWORD[((480+16))+rsp]
  2436. por xmm2,xmm0
  2437. por xmm3,xmm1
  2438. movdqa xmm0,xmm4
  2439. movdqa xmm1,xmm4
  2440. pandn xmm0,xmm2
  2441. movdqa xmm2,xmm4
  2442. pandn xmm1,xmm3
  2443. movdqa xmm3,xmm4
  2444. pand xmm2,XMMWORD[384+rsp]
  2445. pand xmm3,XMMWORD[((384+16))+rsp]
  2446. por xmm2,xmm0
  2447. por xmm3,xmm1
  2448. movdqu XMMWORD[rdi],xmm2
  2449. movdqu XMMWORD[16+rdi],xmm3
  2450. movdqa xmm0,xmm5
  2451. movdqa xmm1,xmm5
  2452. pandn xmm0,XMMWORD[320+rsp]
  2453. movdqa xmm2,xmm5
  2454. pandn xmm1,XMMWORD[((320+16))+rsp]
  2455. movdqa xmm3,xmm5
  2456. pand xmm2,XMMWORD[512+rsp]
  2457. pand xmm3,XMMWORD[((512+16))+rsp]
  2458. por xmm2,xmm0
  2459. por xmm3,xmm1
  2460. movdqa xmm0,xmm4
  2461. movdqa xmm1,xmm4
  2462. pandn xmm0,xmm2
  2463. movdqa xmm2,xmm4
  2464. pandn xmm1,xmm3
  2465. movdqa xmm3,xmm4
  2466. pand xmm2,XMMWORD[416+rsp]
  2467. pand xmm3,XMMWORD[((416+16))+rsp]
  2468. por xmm2,xmm0
  2469. por xmm3,xmm1
  2470. movdqu XMMWORD[32+rdi],xmm2
  2471. movdqu XMMWORD[48+rdi],xmm3
  2472. $L$add_doneq:
  2473. lea rsi,[((576+56))+rsp]
  2474. mov r15,QWORD[((-48))+rsi]
  2475. mov r14,QWORD[((-40))+rsi]
  2476. mov r13,QWORD[((-32))+rsi]
  2477. mov r12,QWORD[((-24))+rsi]
  2478. mov rbx,QWORD[((-16))+rsi]
  2479. mov rbp,QWORD[((-8))+rsi]
  2480. lea rsp,[rsi]
  2481. $L$point_addq_epilogue:
  2482. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  2483. mov rsi,QWORD[16+rsp]
  2484. DB 0F3h,0C3h ;repret
  2485. $L$SEH_end_ecp_nistz256_point_add:
  2486. global ecp_nistz256_point_add_affine
  2487. ALIGN 32
  2488. ecp_nistz256_point_add_affine:
  2489. mov QWORD[8+rsp],rdi ;WIN64 prologue
  2490. mov QWORD[16+rsp],rsi
  2491. mov rax,rsp
  2492. $L$SEH_begin_ecp_nistz256_point_add_affine:
  2493. mov rdi,rcx
  2494. mov rsi,rdx
  2495. mov rdx,r8
  2496. lea rcx,[OPENSSL_ia32cap_P]
  2497. mov rcx,QWORD[8+rcx]
  2498. and ecx,0x80100
  2499. cmp ecx,0x80100
  2500. je NEAR $L$point_add_affinex
  2501. push rbp
  2502. push rbx
  2503. push r12
  2504. push r13
  2505. push r14
  2506. push r15
  2507. sub rsp,32*15+8
  2508. $L$add_affineq_body:
  2509. movdqu xmm0,XMMWORD[rsi]
  2510. mov rbx,rdx
  2511. movdqu xmm1,XMMWORD[16+rsi]
  2512. movdqu xmm2,XMMWORD[32+rsi]
  2513. movdqu xmm3,XMMWORD[48+rsi]
  2514. movdqu xmm4,XMMWORD[64+rsi]
  2515. movdqu xmm5,XMMWORD[80+rsi]
  2516. mov rax,QWORD[((64+0))+rsi]
  2517. mov r14,QWORD[((64+8))+rsi]
  2518. mov r15,QWORD[((64+16))+rsi]
  2519. mov r8,QWORD[((64+24))+rsi]
  2520. movdqa XMMWORD[320+rsp],xmm0
  2521. movdqa XMMWORD[(320+16)+rsp],xmm1
  2522. movdqa XMMWORD[352+rsp],xmm2
  2523. movdqa XMMWORD[(352+16)+rsp],xmm3
  2524. movdqa XMMWORD[384+rsp],xmm4
  2525. movdqa XMMWORD[(384+16)+rsp],xmm5
  2526. por xmm5,xmm4
  2527. movdqu xmm0,XMMWORD[rbx]
  2528. pshufd xmm3,xmm5,0xb1
  2529. movdqu xmm1,XMMWORD[16+rbx]
  2530. movdqu xmm2,XMMWORD[32+rbx]
  2531. por xmm5,xmm3
  2532. movdqu xmm3,XMMWORD[48+rbx]
  2533. movdqa XMMWORD[416+rsp],xmm0
  2534. pshufd xmm4,xmm5,0x1e
  2535. movdqa XMMWORD[(416+16)+rsp],xmm1
  2536. por xmm1,xmm0
  2537. DB 102,72,15,110,199
  2538. movdqa XMMWORD[448+rsp],xmm2
  2539. movdqa XMMWORD[(448+16)+rsp],xmm3
  2540. por xmm3,xmm2
  2541. por xmm5,xmm4
  2542. pxor xmm4,xmm4
  2543. por xmm3,xmm1
  2544. lea rsi,[((64-0))+rsi]
  2545. lea rdi,[32+rsp]
  2546. call __ecp_nistz256_sqr_montq
  2547. pcmpeqd xmm5,xmm4
  2548. pshufd xmm4,xmm3,0xb1
  2549. mov rax,QWORD[rbx]
  2550. mov r9,r12
  2551. por xmm4,xmm3
  2552. pshufd xmm5,xmm5,0
  2553. pshufd xmm3,xmm4,0x1e
  2554. mov r10,r13
  2555. por xmm4,xmm3
  2556. pxor xmm3,xmm3
  2557. mov r11,r14
  2558. pcmpeqd xmm4,xmm3
  2559. pshufd xmm4,xmm4,0
  2560. lea rsi,[((32-0))+rsp]
  2561. mov r12,r15
  2562. lea rdi,[rsp]
  2563. call __ecp_nistz256_mul_montq
  2564. lea rbx,[320+rsp]
  2565. lea rdi,[64+rsp]
  2566. call __ecp_nistz256_sub_fromq
  2567. mov rax,QWORD[384+rsp]
  2568. lea rbx,[384+rsp]
  2569. mov r9,QWORD[((0+32))+rsp]
  2570. mov r10,QWORD[((8+32))+rsp]
  2571. lea rsi,[((0+32))+rsp]
  2572. mov r11,QWORD[((16+32))+rsp]
  2573. mov r12,QWORD[((24+32))+rsp]
  2574. lea rdi,[32+rsp]
  2575. call __ecp_nistz256_mul_montq
  2576. mov rax,QWORD[384+rsp]
  2577. lea rbx,[384+rsp]
  2578. mov r9,QWORD[((0+64))+rsp]
  2579. mov r10,QWORD[((8+64))+rsp]
  2580. lea rsi,[((0+64))+rsp]
  2581. mov r11,QWORD[((16+64))+rsp]
  2582. mov r12,QWORD[((24+64))+rsp]
  2583. lea rdi,[288+rsp]
  2584. call __ecp_nistz256_mul_montq
  2585. mov rax,QWORD[448+rsp]
  2586. lea rbx,[448+rsp]
  2587. mov r9,QWORD[((0+32))+rsp]
  2588. mov r10,QWORD[((8+32))+rsp]
  2589. lea rsi,[((0+32))+rsp]
  2590. mov r11,QWORD[((16+32))+rsp]
  2591. mov r12,QWORD[((24+32))+rsp]
  2592. lea rdi,[32+rsp]
  2593. call __ecp_nistz256_mul_montq
  2594. lea rbx,[352+rsp]
  2595. lea rdi,[96+rsp]
  2596. call __ecp_nistz256_sub_fromq
  2597. mov rax,QWORD[((0+64))+rsp]
  2598. mov r14,QWORD[((8+64))+rsp]
  2599. lea rsi,[((0+64))+rsp]
  2600. mov r15,QWORD[((16+64))+rsp]
  2601. mov r8,QWORD[((24+64))+rsp]
  2602. lea rdi,[128+rsp]
  2603. call __ecp_nistz256_sqr_montq
  2604. mov rax,QWORD[((0+96))+rsp]
  2605. mov r14,QWORD[((8+96))+rsp]
  2606. lea rsi,[((0+96))+rsp]
  2607. mov r15,QWORD[((16+96))+rsp]
  2608. mov r8,QWORD[((24+96))+rsp]
  2609. lea rdi,[192+rsp]
  2610. call __ecp_nistz256_sqr_montq
  2611. mov rax,QWORD[128+rsp]
  2612. lea rbx,[128+rsp]
  2613. mov r9,QWORD[((0+64))+rsp]
  2614. mov r10,QWORD[((8+64))+rsp]
  2615. lea rsi,[((0+64))+rsp]
  2616. mov r11,QWORD[((16+64))+rsp]
  2617. mov r12,QWORD[((24+64))+rsp]
  2618. lea rdi,[160+rsp]
  2619. call __ecp_nistz256_mul_montq
  2620. mov rax,QWORD[320+rsp]
  2621. lea rbx,[320+rsp]
  2622. mov r9,QWORD[((0+128))+rsp]
  2623. mov r10,QWORD[((8+128))+rsp]
  2624. lea rsi,[((0+128))+rsp]
  2625. mov r11,QWORD[((16+128))+rsp]
  2626. mov r12,QWORD[((24+128))+rsp]
  2627. lea rdi,[rsp]
  2628. call __ecp_nistz256_mul_montq
  2629. xor r11,r11
  2630. add r12,r12
  2631. lea rsi,[192+rsp]
  2632. adc r13,r13
  2633. mov rax,r12
  2634. adc r8,r8
  2635. adc r9,r9
  2636. mov rbp,r13
  2637. adc r11,0
  2638. sub r12,-1
  2639. mov rcx,r8
  2640. sbb r13,r14
  2641. sbb r8,0
  2642. mov r10,r9
  2643. sbb r9,r15
  2644. sbb r11,0
  2645. cmovc r12,rax
  2646. mov rax,QWORD[rsi]
  2647. cmovc r13,rbp
  2648. mov rbp,QWORD[8+rsi]
  2649. cmovc r8,rcx
  2650. mov rcx,QWORD[16+rsi]
  2651. cmovc r9,r10
  2652. mov r10,QWORD[24+rsi]
  2653. call __ecp_nistz256_subq
  2654. lea rbx,[160+rsp]
  2655. lea rdi,[224+rsp]
  2656. call __ecp_nistz256_sub_fromq
  2657. mov rax,QWORD[((0+0))+rsp]
  2658. mov rbp,QWORD[((0+8))+rsp]
  2659. mov rcx,QWORD[((0+16))+rsp]
  2660. mov r10,QWORD[((0+24))+rsp]
  2661. lea rdi,[64+rsp]
  2662. call __ecp_nistz256_subq
  2663. mov QWORD[rdi],r12
  2664. mov QWORD[8+rdi],r13
  2665. mov QWORD[16+rdi],r8
  2666. mov QWORD[24+rdi],r9
  2667. mov rax,QWORD[352+rsp]
  2668. lea rbx,[352+rsp]
  2669. mov r9,QWORD[((0+160))+rsp]
  2670. mov r10,QWORD[((8+160))+rsp]
  2671. lea rsi,[((0+160))+rsp]
  2672. mov r11,QWORD[((16+160))+rsp]
  2673. mov r12,QWORD[((24+160))+rsp]
  2674. lea rdi,[32+rsp]
  2675. call __ecp_nistz256_mul_montq
  2676. mov rax,QWORD[96+rsp]
  2677. lea rbx,[96+rsp]
  2678. mov r9,QWORD[((0+64))+rsp]
  2679. mov r10,QWORD[((8+64))+rsp]
  2680. lea rsi,[((0+64))+rsp]
  2681. mov r11,QWORD[((16+64))+rsp]
  2682. mov r12,QWORD[((24+64))+rsp]
  2683. lea rdi,[64+rsp]
  2684. call __ecp_nistz256_mul_montq
  2685. lea rbx,[32+rsp]
  2686. lea rdi,[256+rsp]
  2687. call __ecp_nistz256_sub_fromq
  2688. DB 102,72,15,126,199
  2689. movdqa xmm0,xmm5
  2690. movdqa xmm1,xmm5
  2691. pandn xmm0,XMMWORD[288+rsp]
  2692. movdqa xmm2,xmm5
  2693. pandn xmm1,XMMWORD[((288+16))+rsp]
  2694. movdqa xmm3,xmm5
  2695. pand xmm2,XMMWORD[$L$ONE_mont]
  2696. pand xmm3,XMMWORD[(($L$ONE_mont+16))]
  2697. por xmm2,xmm0
  2698. por xmm3,xmm1
  2699. movdqa xmm0,xmm4
  2700. movdqa xmm1,xmm4
  2701. pandn xmm0,xmm2
  2702. movdqa xmm2,xmm4
  2703. pandn xmm1,xmm3
  2704. movdqa xmm3,xmm4
  2705. pand xmm2,XMMWORD[384+rsp]
  2706. pand xmm3,XMMWORD[((384+16))+rsp]
  2707. por xmm2,xmm0
  2708. por xmm3,xmm1
  2709. movdqu XMMWORD[64+rdi],xmm2
  2710. movdqu XMMWORD[80+rdi],xmm3
  2711. movdqa xmm0,xmm5
  2712. movdqa xmm1,xmm5
  2713. pandn xmm0,XMMWORD[224+rsp]
  2714. movdqa xmm2,xmm5
  2715. pandn xmm1,XMMWORD[((224+16))+rsp]
  2716. movdqa xmm3,xmm5
  2717. pand xmm2,XMMWORD[416+rsp]
  2718. pand xmm3,XMMWORD[((416+16))+rsp]
  2719. por xmm2,xmm0
  2720. por xmm3,xmm1
  2721. movdqa xmm0,xmm4
  2722. movdqa xmm1,xmm4
  2723. pandn xmm0,xmm2
  2724. movdqa xmm2,xmm4
  2725. pandn xmm1,xmm3
  2726. movdqa xmm3,xmm4
  2727. pand xmm2,XMMWORD[320+rsp]
  2728. pand xmm3,XMMWORD[((320+16))+rsp]
  2729. por xmm2,xmm0
  2730. por xmm3,xmm1
  2731. movdqu XMMWORD[rdi],xmm2
  2732. movdqu XMMWORD[16+rdi],xmm3
  2733. movdqa xmm0,xmm5
  2734. movdqa xmm1,xmm5
  2735. pandn xmm0,XMMWORD[256+rsp]
  2736. movdqa xmm2,xmm5
  2737. pandn xmm1,XMMWORD[((256+16))+rsp]
  2738. movdqa xmm3,xmm5
  2739. pand xmm2,XMMWORD[448+rsp]
  2740. pand xmm3,XMMWORD[((448+16))+rsp]
  2741. por xmm2,xmm0
  2742. por xmm3,xmm1
  2743. movdqa xmm0,xmm4
  2744. movdqa xmm1,xmm4
  2745. pandn xmm0,xmm2
  2746. movdqa xmm2,xmm4
  2747. pandn xmm1,xmm3
  2748. movdqa xmm3,xmm4
  2749. pand xmm2,XMMWORD[352+rsp]
  2750. pand xmm3,XMMWORD[((352+16))+rsp]
  2751. por xmm2,xmm0
  2752. por xmm3,xmm1
  2753. movdqu XMMWORD[32+rdi],xmm2
  2754. movdqu XMMWORD[48+rdi],xmm3
  2755. lea rsi,[((480+56))+rsp]
  2756. mov r15,QWORD[((-48))+rsi]
  2757. mov r14,QWORD[((-40))+rsi]
  2758. mov r13,QWORD[((-32))+rsi]
  2759. mov r12,QWORD[((-24))+rsi]
  2760. mov rbx,QWORD[((-16))+rsi]
  2761. mov rbp,QWORD[((-8))+rsi]
  2762. lea rsp,[rsi]
  2763. $L$add_affineq_epilogue:
  2764. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  2765. mov rsi,QWORD[16+rsp]
  2766. DB 0F3h,0C3h ;repret
  2767. $L$SEH_end_ecp_nistz256_point_add_affine:
  2768. ALIGN 32
  2769. __ecp_nistz256_add_tox:
  2770. xor r11,r11
  2771. adc r12,QWORD[rbx]
  2772. adc r13,QWORD[8+rbx]
  2773. mov rax,r12
  2774. adc r8,QWORD[16+rbx]
  2775. adc r9,QWORD[24+rbx]
  2776. mov rbp,r13
  2777. adc r11,0
  2778. xor r10,r10
  2779. sbb r12,-1
  2780. mov rcx,r8
  2781. sbb r13,r14
  2782. sbb r8,0
  2783. mov r10,r9
  2784. sbb r9,r15
  2785. sbb r11,0
  2786. cmovc r12,rax
  2787. cmovc r13,rbp
  2788. mov QWORD[rdi],r12
  2789. cmovc r8,rcx
  2790. mov QWORD[8+rdi],r13
  2791. cmovc r9,r10
  2792. mov QWORD[16+rdi],r8
  2793. mov QWORD[24+rdi],r9
  2794. DB 0F3h,0C3h ;repret
  2795. ALIGN 32
  2796. __ecp_nistz256_sub_fromx:
  2797. xor r11,r11
  2798. sbb r12,QWORD[rbx]
  2799. sbb r13,QWORD[8+rbx]
  2800. mov rax,r12
  2801. sbb r8,QWORD[16+rbx]
  2802. sbb r9,QWORD[24+rbx]
  2803. mov rbp,r13
  2804. sbb r11,0
  2805. xor r10,r10
  2806. adc r12,-1
  2807. mov rcx,r8
  2808. adc r13,r14
  2809. adc r8,0
  2810. mov r10,r9
  2811. adc r9,r15
  2812. bt r11,0
  2813. cmovnc r12,rax
  2814. cmovnc r13,rbp
  2815. mov QWORD[rdi],r12
  2816. cmovnc r8,rcx
  2817. mov QWORD[8+rdi],r13
  2818. cmovnc r9,r10
  2819. mov QWORD[16+rdi],r8
  2820. mov QWORD[24+rdi],r9
  2821. DB 0F3h,0C3h ;repret
  2822. ALIGN 32
  2823. __ecp_nistz256_subx:
  2824. xor r11,r11
  2825. sbb rax,r12
  2826. sbb rbp,r13
  2827. mov r12,rax
  2828. sbb rcx,r8
  2829. sbb r10,r9
  2830. mov r13,rbp
  2831. sbb r11,0
  2832. xor r9,r9
  2833. adc rax,-1
  2834. mov r8,rcx
  2835. adc rbp,r14
  2836. adc rcx,0
  2837. mov r9,r10
  2838. adc r10,r15
  2839. bt r11,0
  2840. cmovc r12,rax
  2841. cmovc r13,rbp
  2842. cmovc r8,rcx
  2843. cmovc r9,r10
  2844. DB 0F3h,0C3h ;repret
  2845. ALIGN 32
  2846. __ecp_nistz256_mul_by_2x:
  2847. xor r11,r11
  2848. adc r12,r12
  2849. adc r13,r13
  2850. mov rax,r12
  2851. adc r8,r8
  2852. adc r9,r9
  2853. mov rbp,r13
  2854. adc r11,0
  2855. xor r10,r10
  2856. sbb r12,-1
  2857. mov rcx,r8
  2858. sbb r13,r14
  2859. sbb r8,0
  2860. mov r10,r9
  2861. sbb r9,r15
  2862. sbb r11,0
  2863. cmovc r12,rax
  2864. cmovc r13,rbp
  2865. mov QWORD[rdi],r12
  2866. cmovc r8,rcx
  2867. mov QWORD[8+rdi],r13
  2868. cmovc r9,r10
  2869. mov QWORD[16+rdi],r8
  2870. mov QWORD[24+rdi],r9
  2871. DB 0F3h,0C3h ;repret
  2872. ALIGN 32
  2873. ecp_nistz256_point_doublex:
  2874. mov QWORD[8+rsp],rdi ;WIN64 prologue
  2875. mov QWORD[16+rsp],rsi
  2876. mov rax,rsp
  2877. $L$SEH_begin_ecp_nistz256_point_doublex:
  2878. mov rdi,rcx
  2879. mov rsi,rdx
  2880. $L$point_doublex:
  2881. push rbp
  2882. push rbx
  2883. push r12
  2884. push r13
  2885. push r14
  2886. push r15
  2887. sub rsp,32*5+8
  2888. $L$point_doublex_body:
  2889. $L$point_double_shortcutx:
  2890. movdqu xmm0,XMMWORD[rsi]
  2891. mov rbx,rsi
  2892. movdqu xmm1,XMMWORD[16+rsi]
  2893. mov r12,QWORD[((32+0))+rsi]
  2894. mov r13,QWORD[((32+8))+rsi]
  2895. mov r8,QWORD[((32+16))+rsi]
  2896. mov r9,QWORD[((32+24))+rsi]
  2897. mov r14,QWORD[(($L$poly+8))]
  2898. mov r15,QWORD[(($L$poly+24))]
  2899. movdqa XMMWORD[96+rsp],xmm0
  2900. movdqa XMMWORD[(96+16)+rsp],xmm1
  2901. lea r10,[32+rdi]
  2902. lea r11,[64+rdi]
  2903. DB 102,72,15,110,199
  2904. DB 102,73,15,110,202
  2905. DB 102,73,15,110,211
  2906. lea rdi,[rsp]
  2907. call __ecp_nistz256_mul_by_2x
  2908. mov rdx,QWORD[((64+0))+rsi]
  2909. mov r14,QWORD[((64+8))+rsi]
  2910. mov r15,QWORD[((64+16))+rsi]
  2911. mov r8,QWORD[((64+24))+rsi]
  2912. lea rsi,[((64-128))+rsi]
  2913. lea rdi,[64+rsp]
  2914. call __ecp_nistz256_sqr_montx
  2915. mov rdx,QWORD[((0+0))+rsp]
  2916. mov r14,QWORD[((8+0))+rsp]
  2917. lea rsi,[((-128+0))+rsp]
  2918. mov r15,QWORD[((16+0))+rsp]
  2919. mov r8,QWORD[((24+0))+rsp]
  2920. lea rdi,[rsp]
  2921. call __ecp_nistz256_sqr_montx
  2922. mov rdx,QWORD[32+rbx]
  2923. mov r9,QWORD[((64+0))+rbx]
  2924. mov r10,QWORD[((64+8))+rbx]
  2925. mov r11,QWORD[((64+16))+rbx]
  2926. mov r12,QWORD[((64+24))+rbx]
  2927. lea rsi,[((64-128))+rbx]
  2928. lea rbx,[32+rbx]
  2929. DB 102,72,15,126,215
  2930. call __ecp_nistz256_mul_montx
  2931. call __ecp_nistz256_mul_by_2x
  2932. mov r12,QWORD[((96+0))+rsp]
  2933. mov r13,QWORD[((96+8))+rsp]
  2934. lea rbx,[64+rsp]
  2935. mov r8,QWORD[((96+16))+rsp]
  2936. mov r9,QWORD[((96+24))+rsp]
  2937. lea rdi,[32+rsp]
  2938. call __ecp_nistz256_add_tox
  2939. mov r12,QWORD[((96+0))+rsp]
  2940. mov r13,QWORD[((96+8))+rsp]
  2941. lea rbx,[64+rsp]
  2942. mov r8,QWORD[((96+16))+rsp]
  2943. mov r9,QWORD[((96+24))+rsp]
  2944. lea rdi,[64+rsp]
  2945. call __ecp_nistz256_sub_fromx
  2946. mov rdx,QWORD[((0+0))+rsp]
  2947. mov r14,QWORD[((8+0))+rsp]
  2948. lea rsi,[((-128+0))+rsp]
  2949. mov r15,QWORD[((16+0))+rsp]
  2950. mov r8,QWORD[((24+0))+rsp]
  2951. DB 102,72,15,126,207
  2952. call __ecp_nistz256_sqr_montx
  2953. xor r9,r9
  2954. mov rax,r12
  2955. add r12,-1
  2956. mov r10,r13
  2957. adc r13,rsi
  2958. mov rcx,r14
  2959. adc r14,0
  2960. mov r8,r15
  2961. adc r15,rbp
  2962. adc r9,0
  2963. xor rsi,rsi
  2964. test rax,1
  2965. cmovz r12,rax
  2966. cmovz r13,r10
  2967. cmovz r14,rcx
  2968. cmovz r15,r8
  2969. cmovz r9,rsi
  2970. mov rax,r13
  2971. shr r12,1
  2972. shl rax,63
  2973. mov r10,r14
  2974. shr r13,1
  2975. or r12,rax
  2976. shl r10,63
  2977. mov rcx,r15
  2978. shr r14,1
  2979. or r13,r10
  2980. shl rcx,63
  2981. mov QWORD[rdi],r12
  2982. shr r15,1
  2983. mov QWORD[8+rdi],r13
  2984. shl r9,63
  2985. or r14,rcx
  2986. or r15,r9
  2987. mov QWORD[16+rdi],r14
  2988. mov QWORD[24+rdi],r15
  2989. mov rdx,QWORD[64+rsp]
  2990. lea rbx,[64+rsp]
  2991. mov r9,QWORD[((0+32))+rsp]
  2992. mov r10,QWORD[((8+32))+rsp]
  2993. lea rsi,[((-128+32))+rsp]
  2994. mov r11,QWORD[((16+32))+rsp]
  2995. mov r12,QWORD[((24+32))+rsp]
  2996. lea rdi,[32+rsp]
  2997. call __ecp_nistz256_mul_montx
  2998. lea rdi,[128+rsp]
  2999. call __ecp_nistz256_mul_by_2x
  3000. lea rbx,[32+rsp]
  3001. lea rdi,[32+rsp]
  3002. call __ecp_nistz256_add_tox
  3003. mov rdx,QWORD[96+rsp]
  3004. lea rbx,[96+rsp]
  3005. mov r9,QWORD[((0+0))+rsp]
  3006. mov r10,QWORD[((8+0))+rsp]
  3007. lea rsi,[((-128+0))+rsp]
  3008. mov r11,QWORD[((16+0))+rsp]
  3009. mov r12,QWORD[((24+0))+rsp]
  3010. lea rdi,[rsp]
  3011. call __ecp_nistz256_mul_montx
  3012. lea rdi,[128+rsp]
  3013. call __ecp_nistz256_mul_by_2x
  3014. mov rdx,QWORD[((0+32))+rsp]
  3015. mov r14,QWORD[((8+32))+rsp]
  3016. lea rsi,[((-128+32))+rsp]
  3017. mov r15,QWORD[((16+32))+rsp]
  3018. mov r8,QWORD[((24+32))+rsp]
  3019. DB 102,72,15,126,199
  3020. call __ecp_nistz256_sqr_montx
  3021. lea rbx,[128+rsp]
  3022. mov r8,r14
  3023. mov r9,r15
  3024. mov r14,rsi
  3025. mov r15,rbp
  3026. call __ecp_nistz256_sub_fromx
  3027. mov rax,QWORD[((0+0))+rsp]
  3028. mov rbp,QWORD[((0+8))+rsp]
  3029. mov rcx,QWORD[((0+16))+rsp]
  3030. mov r10,QWORD[((0+24))+rsp]
  3031. lea rdi,[rsp]
  3032. call __ecp_nistz256_subx
  3033. mov rdx,QWORD[32+rsp]
  3034. lea rbx,[32+rsp]
  3035. mov r14,r12
  3036. xor ecx,ecx
  3037. mov QWORD[((0+0))+rsp],r12
  3038. mov r10,r13
  3039. mov QWORD[((0+8))+rsp],r13
  3040. cmovz r11,r8
  3041. mov QWORD[((0+16))+rsp],r8
  3042. lea rsi,[((0-128))+rsp]
  3043. cmovz r12,r9
  3044. mov QWORD[((0+24))+rsp],r9
  3045. mov r9,r14
  3046. lea rdi,[rsp]
  3047. call __ecp_nistz256_mul_montx
  3048. DB 102,72,15,126,203
  3049. DB 102,72,15,126,207
  3050. call __ecp_nistz256_sub_fromx
  3051. lea rsi,[((160+56))+rsp]
  3052. mov r15,QWORD[((-48))+rsi]
  3053. mov r14,QWORD[((-40))+rsi]
  3054. mov r13,QWORD[((-32))+rsi]
  3055. mov r12,QWORD[((-24))+rsi]
  3056. mov rbx,QWORD[((-16))+rsi]
  3057. mov rbp,QWORD[((-8))+rsi]
  3058. lea rsp,[rsi]
  3059. $L$point_doublex_epilogue:
  3060. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  3061. mov rsi,QWORD[16+rsp]
  3062. DB 0F3h,0C3h ;repret
  3063. $L$SEH_end_ecp_nistz256_point_doublex:
  3064. ALIGN 32
  3065. ecp_nistz256_point_addx:
  3066. mov QWORD[8+rsp],rdi ;WIN64 prologue
  3067. mov QWORD[16+rsp],rsi
  3068. mov rax,rsp
  3069. $L$SEH_begin_ecp_nistz256_point_addx:
  3070. mov rdi,rcx
  3071. mov rsi,rdx
  3072. mov rdx,r8
  3073. $L$point_addx:
  3074. push rbp
  3075. push rbx
  3076. push r12
  3077. push r13
  3078. push r14
  3079. push r15
  3080. sub rsp,32*18+8
  3081. $L$point_addx_body:
  3082. movdqu xmm0,XMMWORD[rsi]
  3083. movdqu xmm1,XMMWORD[16+rsi]
  3084. movdqu xmm2,XMMWORD[32+rsi]
  3085. movdqu xmm3,XMMWORD[48+rsi]
  3086. movdqu xmm4,XMMWORD[64+rsi]
  3087. movdqu xmm5,XMMWORD[80+rsi]
  3088. mov rbx,rsi
  3089. mov rsi,rdx
  3090. movdqa XMMWORD[384+rsp],xmm0
  3091. movdqa XMMWORD[(384+16)+rsp],xmm1
  3092. movdqa XMMWORD[416+rsp],xmm2
  3093. movdqa XMMWORD[(416+16)+rsp],xmm3
  3094. movdqa XMMWORD[448+rsp],xmm4
  3095. movdqa XMMWORD[(448+16)+rsp],xmm5
  3096. por xmm5,xmm4
  3097. movdqu xmm0,XMMWORD[rsi]
  3098. pshufd xmm3,xmm5,0xb1
  3099. movdqu xmm1,XMMWORD[16+rsi]
  3100. movdqu xmm2,XMMWORD[32+rsi]
  3101. por xmm5,xmm3
  3102. movdqu xmm3,XMMWORD[48+rsi]
  3103. mov rdx,QWORD[((64+0))+rsi]
  3104. mov r14,QWORD[((64+8))+rsi]
  3105. mov r15,QWORD[((64+16))+rsi]
  3106. mov r8,QWORD[((64+24))+rsi]
  3107. movdqa XMMWORD[480+rsp],xmm0
  3108. pshufd xmm4,xmm5,0x1e
  3109. movdqa XMMWORD[(480+16)+rsp],xmm1
  3110. movdqu xmm0,XMMWORD[64+rsi]
  3111. movdqu xmm1,XMMWORD[80+rsi]
  3112. movdqa XMMWORD[512+rsp],xmm2
  3113. movdqa XMMWORD[(512+16)+rsp],xmm3
  3114. por xmm5,xmm4
  3115. pxor xmm4,xmm4
  3116. por xmm1,xmm0
  3117. DB 102,72,15,110,199
  3118. lea rsi,[((64-128))+rsi]
  3119. mov QWORD[((544+0))+rsp],rdx
  3120. mov QWORD[((544+8))+rsp],r14
  3121. mov QWORD[((544+16))+rsp],r15
  3122. mov QWORD[((544+24))+rsp],r8
  3123. lea rdi,[96+rsp]
  3124. call __ecp_nistz256_sqr_montx
  3125. pcmpeqd xmm5,xmm4
  3126. pshufd xmm4,xmm1,0xb1
  3127. por xmm4,xmm1
  3128. pshufd xmm5,xmm5,0
  3129. pshufd xmm3,xmm4,0x1e
  3130. por xmm4,xmm3
  3131. pxor xmm3,xmm3
  3132. pcmpeqd xmm4,xmm3
  3133. pshufd xmm4,xmm4,0
  3134. mov rdx,QWORD[((64+0))+rbx]
  3135. mov r14,QWORD[((64+8))+rbx]
  3136. mov r15,QWORD[((64+16))+rbx]
  3137. mov r8,QWORD[((64+24))+rbx]
  3138. DB 102,72,15,110,203
  3139. lea rsi,[((64-128))+rbx]
  3140. lea rdi,[32+rsp]
  3141. call __ecp_nistz256_sqr_montx
  3142. mov rdx,QWORD[544+rsp]
  3143. lea rbx,[544+rsp]
  3144. mov r9,QWORD[((0+96))+rsp]
  3145. mov r10,QWORD[((8+96))+rsp]
  3146. lea rsi,[((-128+96))+rsp]
  3147. mov r11,QWORD[((16+96))+rsp]
  3148. mov r12,QWORD[((24+96))+rsp]
  3149. lea rdi,[224+rsp]
  3150. call __ecp_nistz256_mul_montx
  3151. mov rdx,QWORD[448+rsp]
  3152. lea rbx,[448+rsp]
  3153. mov r9,QWORD[((0+32))+rsp]
  3154. mov r10,QWORD[((8+32))+rsp]
  3155. lea rsi,[((-128+32))+rsp]
  3156. mov r11,QWORD[((16+32))+rsp]
  3157. mov r12,QWORD[((24+32))+rsp]
  3158. lea rdi,[256+rsp]
  3159. call __ecp_nistz256_mul_montx
  3160. mov rdx,QWORD[416+rsp]
  3161. lea rbx,[416+rsp]
  3162. mov r9,QWORD[((0+224))+rsp]
  3163. mov r10,QWORD[((8+224))+rsp]
  3164. lea rsi,[((-128+224))+rsp]
  3165. mov r11,QWORD[((16+224))+rsp]
  3166. mov r12,QWORD[((24+224))+rsp]
  3167. lea rdi,[224+rsp]
  3168. call __ecp_nistz256_mul_montx
  3169. mov rdx,QWORD[512+rsp]
  3170. lea rbx,[512+rsp]
  3171. mov r9,QWORD[((0+256))+rsp]
  3172. mov r10,QWORD[((8+256))+rsp]
  3173. lea rsi,[((-128+256))+rsp]
  3174. mov r11,QWORD[((16+256))+rsp]
  3175. mov r12,QWORD[((24+256))+rsp]
  3176. lea rdi,[256+rsp]
  3177. call __ecp_nistz256_mul_montx
  3178. lea rbx,[224+rsp]
  3179. lea rdi,[64+rsp]
  3180. call __ecp_nistz256_sub_fromx
  3181. or r12,r13
  3182. movdqa xmm2,xmm4
  3183. or r12,r8
  3184. or r12,r9
  3185. por xmm2,xmm5
  3186. DB 102,73,15,110,220
  3187. mov rdx,QWORD[384+rsp]
  3188. lea rbx,[384+rsp]
  3189. mov r9,QWORD[((0+96))+rsp]
  3190. mov r10,QWORD[((8+96))+rsp]
  3191. lea rsi,[((-128+96))+rsp]
  3192. mov r11,QWORD[((16+96))+rsp]
  3193. mov r12,QWORD[((24+96))+rsp]
  3194. lea rdi,[160+rsp]
  3195. call __ecp_nistz256_mul_montx
  3196. mov rdx,QWORD[480+rsp]
  3197. lea rbx,[480+rsp]
  3198. mov r9,QWORD[((0+32))+rsp]
  3199. mov r10,QWORD[((8+32))+rsp]
  3200. lea rsi,[((-128+32))+rsp]
  3201. mov r11,QWORD[((16+32))+rsp]
  3202. mov r12,QWORD[((24+32))+rsp]
  3203. lea rdi,[192+rsp]
  3204. call __ecp_nistz256_mul_montx
  3205. lea rbx,[160+rsp]
  3206. lea rdi,[rsp]
  3207. call __ecp_nistz256_sub_fromx
  3208. or r12,r13
  3209. or r12,r8
  3210. or r12,r9
  3211. DB 102,73,15,126,208
  3212. DB 102,73,15,126,217
  3213. or r12,r8
  3214. DB 0x3e
  3215. jnz NEAR $L$add_proceedx
  3216. test r9,r9
  3217. jz NEAR $L$add_doublex
  3218. DB 102,72,15,126,199
  3219. pxor xmm0,xmm0
  3220. movdqu XMMWORD[rdi],xmm0
  3221. movdqu XMMWORD[16+rdi],xmm0
  3222. movdqu XMMWORD[32+rdi],xmm0
  3223. movdqu XMMWORD[48+rdi],xmm0
  3224. movdqu XMMWORD[64+rdi],xmm0
  3225. movdqu XMMWORD[80+rdi],xmm0
  3226. jmp NEAR $L$add_donex
  3227. ALIGN 32
  3228. $L$add_doublex:
  3229. DB 102,72,15,126,206
  3230. DB 102,72,15,126,199
  3231. add rsp,416
  3232. jmp NEAR $L$point_double_shortcutx
  3233. ALIGN 32
  3234. $L$add_proceedx:
  3235. mov rdx,QWORD[((0+64))+rsp]
  3236. mov r14,QWORD[((8+64))+rsp]
  3237. lea rsi,[((-128+64))+rsp]
  3238. mov r15,QWORD[((16+64))+rsp]
  3239. mov r8,QWORD[((24+64))+rsp]
  3240. lea rdi,[96+rsp]
  3241. call __ecp_nistz256_sqr_montx
  3242. mov rdx,QWORD[448+rsp]
  3243. lea rbx,[448+rsp]
  3244. mov r9,QWORD[((0+0))+rsp]
  3245. mov r10,QWORD[((8+0))+rsp]
  3246. lea rsi,[((-128+0))+rsp]
  3247. mov r11,QWORD[((16+0))+rsp]
  3248. mov r12,QWORD[((24+0))+rsp]
  3249. lea rdi,[352+rsp]
  3250. call __ecp_nistz256_mul_montx
  3251. mov rdx,QWORD[((0+0))+rsp]
  3252. mov r14,QWORD[((8+0))+rsp]
  3253. lea rsi,[((-128+0))+rsp]
  3254. mov r15,QWORD[((16+0))+rsp]
  3255. mov r8,QWORD[((24+0))+rsp]
  3256. lea rdi,[32+rsp]
  3257. call __ecp_nistz256_sqr_montx
  3258. mov rdx,QWORD[544+rsp]
  3259. lea rbx,[544+rsp]
  3260. mov r9,QWORD[((0+352))+rsp]
  3261. mov r10,QWORD[((8+352))+rsp]
  3262. lea rsi,[((-128+352))+rsp]
  3263. mov r11,QWORD[((16+352))+rsp]
  3264. mov r12,QWORD[((24+352))+rsp]
  3265. lea rdi,[352+rsp]
  3266. call __ecp_nistz256_mul_montx
  3267. mov rdx,QWORD[rsp]
  3268. lea rbx,[rsp]
  3269. mov r9,QWORD[((0+32))+rsp]
  3270. mov r10,QWORD[((8+32))+rsp]
  3271. lea rsi,[((-128+32))+rsp]
  3272. mov r11,QWORD[((16+32))+rsp]
  3273. mov r12,QWORD[((24+32))+rsp]
  3274. lea rdi,[128+rsp]
  3275. call __ecp_nistz256_mul_montx
  3276. mov rdx,QWORD[160+rsp]
  3277. lea rbx,[160+rsp]
  3278. mov r9,QWORD[((0+32))+rsp]
  3279. mov r10,QWORD[((8+32))+rsp]
  3280. lea rsi,[((-128+32))+rsp]
  3281. mov r11,QWORD[((16+32))+rsp]
  3282. mov r12,QWORD[((24+32))+rsp]
  3283. lea rdi,[192+rsp]
  3284. call __ecp_nistz256_mul_montx
  3285. xor r11,r11
  3286. add r12,r12
  3287. lea rsi,[96+rsp]
  3288. adc r13,r13
  3289. mov rax,r12
  3290. adc r8,r8
  3291. adc r9,r9
  3292. mov rbp,r13
  3293. adc r11,0
  3294. sub r12,-1
  3295. mov rcx,r8
  3296. sbb r13,r14
  3297. sbb r8,0
  3298. mov r10,r9
  3299. sbb r9,r15
  3300. sbb r11,0
  3301. cmovc r12,rax
  3302. mov rax,QWORD[rsi]
  3303. cmovc r13,rbp
  3304. mov rbp,QWORD[8+rsi]
  3305. cmovc r8,rcx
  3306. mov rcx,QWORD[16+rsi]
  3307. cmovc r9,r10
  3308. mov r10,QWORD[24+rsi]
  3309. call __ecp_nistz256_subx
  3310. lea rbx,[128+rsp]
  3311. lea rdi,[288+rsp]
  3312. call __ecp_nistz256_sub_fromx
  3313. mov rax,QWORD[((192+0))+rsp]
  3314. mov rbp,QWORD[((192+8))+rsp]
  3315. mov rcx,QWORD[((192+16))+rsp]
  3316. mov r10,QWORD[((192+24))+rsp]
  3317. lea rdi,[320+rsp]
  3318. call __ecp_nistz256_subx
  3319. mov QWORD[rdi],r12
  3320. mov QWORD[8+rdi],r13
  3321. mov QWORD[16+rdi],r8
  3322. mov QWORD[24+rdi],r9
  3323. mov rdx,QWORD[128+rsp]
  3324. lea rbx,[128+rsp]
  3325. mov r9,QWORD[((0+224))+rsp]
  3326. mov r10,QWORD[((8+224))+rsp]
  3327. lea rsi,[((-128+224))+rsp]
  3328. mov r11,QWORD[((16+224))+rsp]
  3329. mov r12,QWORD[((24+224))+rsp]
  3330. lea rdi,[256+rsp]
  3331. call __ecp_nistz256_mul_montx
  3332. mov rdx,QWORD[320+rsp]
  3333. lea rbx,[320+rsp]
  3334. mov r9,QWORD[((0+64))+rsp]
  3335. mov r10,QWORD[((8+64))+rsp]
  3336. lea rsi,[((-128+64))+rsp]
  3337. mov r11,QWORD[((16+64))+rsp]
  3338. mov r12,QWORD[((24+64))+rsp]
  3339. lea rdi,[320+rsp]
  3340. call __ecp_nistz256_mul_montx
  3341. lea rbx,[256+rsp]
  3342. lea rdi,[320+rsp]
  3343. call __ecp_nistz256_sub_fromx
  3344. DB 102,72,15,126,199
  3345. movdqa xmm0,xmm5
  3346. movdqa xmm1,xmm5
  3347. pandn xmm0,XMMWORD[352+rsp]
  3348. movdqa xmm2,xmm5
  3349. pandn xmm1,XMMWORD[((352+16))+rsp]
  3350. movdqa xmm3,xmm5
  3351. pand xmm2,XMMWORD[544+rsp]
  3352. pand xmm3,XMMWORD[((544+16))+rsp]
  3353. por xmm2,xmm0
  3354. por xmm3,xmm1
  3355. movdqa xmm0,xmm4
  3356. movdqa xmm1,xmm4
  3357. pandn xmm0,xmm2
  3358. movdqa xmm2,xmm4
  3359. pandn xmm1,xmm3
  3360. movdqa xmm3,xmm4
  3361. pand xmm2,XMMWORD[448+rsp]
  3362. pand xmm3,XMMWORD[((448+16))+rsp]
  3363. por xmm2,xmm0
  3364. por xmm3,xmm1
  3365. movdqu XMMWORD[64+rdi],xmm2
  3366. movdqu XMMWORD[80+rdi],xmm3
  3367. movdqa xmm0,xmm5
  3368. movdqa xmm1,xmm5
  3369. pandn xmm0,XMMWORD[288+rsp]
  3370. movdqa xmm2,xmm5
  3371. pandn xmm1,XMMWORD[((288+16))+rsp]
  3372. movdqa xmm3,xmm5
  3373. pand xmm2,XMMWORD[480+rsp]
  3374. pand xmm3,XMMWORD[((480+16))+rsp]
  3375. por xmm2,xmm0
  3376. por xmm3,xmm1
  3377. movdqa xmm0,xmm4
  3378. movdqa xmm1,xmm4
  3379. pandn xmm0,xmm2
  3380. movdqa xmm2,xmm4
  3381. pandn xmm1,xmm3
  3382. movdqa xmm3,xmm4
  3383. pand xmm2,XMMWORD[384+rsp]
  3384. pand xmm3,XMMWORD[((384+16))+rsp]
  3385. por xmm2,xmm0
  3386. por xmm3,xmm1
  3387. movdqu XMMWORD[rdi],xmm2
  3388. movdqu XMMWORD[16+rdi],xmm3
  3389. movdqa xmm0,xmm5
  3390. movdqa xmm1,xmm5
  3391. pandn xmm0,XMMWORD[320+rsp]
  3392. movdqa xmm2,xmm5
  3393. pandn xmm1,XMMWORD[((320+16))+rsp]
  3394. movdqa xmm3,xmm5
  3395. pand xmm2,XMMWORD[512+rsp]
  3396. pand xmm3,XMMWORD[((512+16))+rsp]
  3397. por xmm2,xmm0
  3398. por xmm3,xmm1
  3399. movdqa xmm0,xmm4
  3400. movdqa xmm1,xmm4
  3401. pandn xmm0,xmm2
  3402. movdqa xmm2,xmm4
  3403. pandn xmm1,xmm3
  3404. movdqa xmm3,xmm4
  3405. pand xmm2,XMMWORD[416+rsp]
  3406. pand xmm3,XMMWORD[((416+16))+rsp]
  3407. por xmm2,xmm0
  3408. por xmm3,xmm1
  3409. movdqu XMMWORD[32+rdi],xmm2
  3410. movdqu XMMWORD[48+rdi],xmm3
  3411. $L$add_donex:
  3412. lea rsi,[((576+56))+rsp]
  3413. mov r15,QWORD[((-48))+rsi]
  3414. mov r14,QWORD[((-40))+rsi]
  3415. mov r13,QWORD[((-32))+rsi]
  3416. mov r12,QWORD[((-24))+rsi]
  3417. mov rbx,QWORD[((-16))+rsi]
  3418. mov rbp,QWORD[((-8))+rsi]
  3419. lea rsp,[rsi]
  3420. $L$point_addx_epilogue:
  3421. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  3422. mov rsi,QWORD[16+rsp]
  3423. DB 0F3h,0C3h ;repret
  3424. $L$SEH_end_ecp_nistz256_point_addx:
  3425. ALIGN 32
  3426. ecp_nistz256_point_add_affinex:
  3427. mov QWORD[8+rsp],rdi ;WIN64 prologue
  3428. mov QWORD[16+rsp],rsi
  3429. mov rax,rsp
  3430. $L$SEH_begin_ecp_nistz256_point_add_affinex:
  3431. mov rdi,rcx
  3432. mov rsi,rdx
  3433. mov rdx,r8
  3434. $L$point_add_affinex:
  3435. push rbp
  3436. push rbx
  3437. push r12
  3438. push r13
  3439. push r14
  3440. push r15
  3441. sub rsp,32*15+8
  3442. $L$add_affinex_body:
  3443. movdqu xmm0,XMMWORD[rsi]
  3444. mov rbx,rdx
  3445. movdqu xmm1,XMMWORD[16+rsi]
  3446. movdqu xmm2,XMMWORD[32+rsi]
  3447. movdqu xmm3,XMMWORD[48+rsi]
  3448. movdqu xmm4,XMMWORD[64+rsi]
  3449. movdqu xmm5,XMMWORD[80+rsi]
  3450. mov rdx,QWORD[((64+0))+rsi]
  3451. mov r14,QWORD[((64+8))+rsi]
  3452. mov r15,QWORD[((64+16))+rsi]
  3453. mov r8,QWORD[((64+24))+rsi]
  3454. movdqa XMMWORD[320+rsp],xmm0
  3455. movdqa XMMWORD[(320+16)+rsp],xmm1
  3456. movdqa XMMWORD[352+rsp],xmm2
  3457. movdqa XMMWORD[(352+16)+rsp],xmm3
  3458. movdqa XMMWORD[384+rsp],xmm4
  3459. movdqa XMMWORD[(384+16)+rsp],xmm5
  3460. por xmm5,xmm4
  3461. movdqu xmm0,XMMWORD[rbx]
  3462. pshufd xmm3,xmm5,0xb1
  3463. movdqu xmm1,XMMWORD[16+rbx]
  3464. movdqu xmm2,XMMWORD[32+rbx]
  3465. por xmm5,xmm3
  3466. movdqu xmm3,XMMWORD[48+rbx]
  3467. movdqa XMMWORD[416+rsp],xmm0
  3468. pshufd xmm4,xmm5,0x1e
  3469. movdqa XMMWORD[(416+16)+rsp],xmm1
  3470. por xmm1,xmm0
  3471. DB 102,72,15,110,199
  3472. movdqa XMMWORD[448+rsp],xmm2
  3473. movdqa XMMWORD[(448+16)+rsp],xmm3
  3474. por xmm3,xmm2
  3475. por xmm5,xmm4
  3476. pxor xmm4,xmm4
  3477. por xmm3,xmm1
  3478. lea rsi,[((64-128))+rsi]
  3479. lea rdi,[32+rsp]
  3480. call __ecp_nistz256_sqr_montx
  3481. pcmpeqd xmm5,xmm4
  3482. pshufd xmm4,xmm3,0xb1
  3483. mov rdx,QWORD[rbx]
  3484. mov r9,r12
  3485. por xmm4,xmm3
  3486. pshufd xmm5,xmm5,0
  3487. pshufd xmm3,xmm4,0x1e
  3488. mov r10,r13
  3489. por xmm4,xmm3
  3490. pxor xmm3,xmm3
  3491. mov r11,r14
  3492. pcmpeqd xmm4,xmm3
  3493. pshufd xmm4,xmm4,0
  3494. lea rsi,[((32-128))+rsp]
  3495. mov r12,r15
  3496. lea rdi,[rsp]
  3497. call __ecp_nistz256_mul_montx
  3498. lea rbx,[320+rsp]
  3499. lea rdi,[64+rsp]
  3500. call __ecp_nistz256_sub_fromx
  3501. mov rdx,QWORD[384+rsp]
  3502. lea rbx,[384+rsp]
  3503. mov r9,QWORD[((0+32))+rsp]
  3504. mov r10,QWORD[((8+32))+rsp]
  3505. lea rsi,[((-128+32))+rsp]
  3506. mov r11,QWORD[((16+32))+rsp]
  3507. mov r12,QWORD[((24+32))+rsp]
  3508. lea rdi,[32+rsp]
  3509. call __ecp_nistz256_mul_montx
  3510. mov rdx,QWORD[384+rsp]
  3511. lea rbx,[384+rsp]
  3512. mov r9,QWORD[((0+64))+rsp]
  3513. mov r10,QWORD[((8+64))+rsp]
  3514. lea rsi,[((-128+64))+rsp]
  3515. mov r11,QWORD[((16+64))+rsp]
  3516. mov r12,QWORD[((24+64))+rsp]
  3517. lea rdi,[288+rsp]
  3518. call __ecp_nistz256_mul_montx
  3519. mov rdx,QWORD[448+rsp]
  3520. lea rbx,[448+rsp]
  3521. mov r9,QWORD[((0+32))+rsp]
  3522. mov r10,QWORD[((8+32))+rsp]
  3523. lea rsi,[((-128+32))+rsp]
  3524. mov r11,QWORD[((16+32))+rsp]
  3525. mov r12,QWORD[((24+32))+rsp]
  3526. lea rdi,[32+rsp]
  3527. call __ecp_nistz256_mul_montx
  3528. lea rbx,[352+rsp]
  3529. lea rdi,[96+rsp]
  3530. call __ecp_nistz256_sub_fromx
  3531. mov rdx,QWORD[((0+64))+rsp]
  3532. mov r14,QWORD[((8+64))+rsp]
  3533. lea rsi,[((-128+64))+rsp]
  3534. mov r15,QWORD[((16+64))+rsp]
  3535. mov r8,QWORD[((24+64))+rsp]
  3536. lea rdi,[128+rsp]
  3537. call __ecp_nistz256_sqr_montx
  3538. mov rdx,QWORD[((0+96))+rsp]
  3539. mov r14,QWORD[((8+96))+rsp]
  3540. lea rsi,[((-128+96))+rsp]
  3541. mov r15,QWORD[((16+96))+rsp]
  3542. mov r8,QWORD[((24+96))+rsp]
  3543. lea rdi,[192+rsp]
  3544. call __ecp_nistz256_sqr_montx
  3545. mov rdx,QWORD[128+rsp]
  3546. lea rbx,[128+rsp]
  3547. mov r9,QWORD[((0+64))+rsp]
  3548. mov r10,QWORD[((8+64))+rsp]
  3549. lea rsi,[((-128+64))+rsp]
  3550. mov r11,QWORD[((16+64))+rsp]
  3551. mov r12,QWORD[((24+64))+rsp]
  3552. lea rdi,[160+rsp]
  3553. call __ecp_nistz256_mul_montx
  3554. mov rdx,QWORD[320+rsp]
  3555. lea rbx,[320+rsp]
  3556. mov r9,QWORD[((0+128))+rsp]
  3557. mov r10,QWORD[((8+128))+rsp]
  3558. lea rsi,[((-128+128))+rsp]
  3559. mov r11,QWORD[((16+128))+rsp]
  3560. mov r12,QWORD[((24+128))+rsp]
  3561. lea rdi,[rsp]
  3562. call __ecp_nistz256_mul_montx
  3563. xor r11,r11
  3564. add r12,r12
  3565. lea rsi,[192+rsp]
  3566. adc r13,r13
  3567. mov rax,r12
  3568. adc r8,r8
  3569. adc r9,r9
  3570. mov rbp,r13
  3571. adc r11,0
  3572. sub r12,-1
  3573. mov rcx,r8
  3574. sbb r13,r14
  3575. sbb r8,0
  3576. mov r10,r9
  3577. sbb r9,r15
  3578. sbb r11,0
  3579. cmovc r12,rax
  3580. mov rax,QWORD[rsi]
  3581. cmovc r13,rbp
  3582. mov rbp,QWORD[8+rsi]
  3583. cmovc r8,rcx
  3584. mov rcx,QWORD[16+rsi]
  3585. cmovc r9,r10
  3586. mov r10,QWORD[24+rsi]
  3587. call __ecp_nistz256_subx
  3588. lea rbx,[160+rsp]
  3589. lea rdi,[224+rsp]
  3590. call __ecp_nistz256_sub_fromx
  3591. mov rax,QWORD[((0+0))+rsp]
  3592. mov rbp,QWORD[((0+8))+rsp]
  3593. mov rcx,QWORD[((0+16))+rsp]
  3594. mov r10,QWORD[((0+24))+rsp]
  3595. lea rdi,[64+rsp]
  3596. call __ecp_nistz256_subx
  3597. mov QWORD[rdi],r12
  3598. mov QWORD[8+rdi],r13
  3599. mov QWORD[16+rdi],r8
  3600. mov QWORD[24+rdi],r9
  3601. mov rdx,QWORD[352+rsp]
  3602. lea rbx,[352+rsp]
  3603. mov r9,QWORD[((0+160))+rsp]
  3604. mov r10,QWORD[((8+160))+rsp]
  3605. lea rsi,[((-128+160))+rsp]
  3606. mov r11,QWORD[((16+160))+rsp]
  3607. mov r12,QWORD[((24+160))+rsp]
  3608. lea rdi,[32+rsp]
  3609. call __ecp_nistz256_mul_montx
  3610. mov rdx,QWORD[96+rsp]
  3611. lea rbx,[96+rsp]
  3612. mov r9,QWORD[((0+64))+rsp]
  3613. mov r10,QWORD[((8+64))+rsp]
  3614. lea rsi,[((-128+64))+rsp]
  3615. mov r11,QWORD[((16+64))+rsp]
  3616. mov r12,QWORD[((24+64))+rsp]
  3617. lea rdi,[64+rsp]
  3618. call __ecp_nistz256_mul_montx
  3619. lea rbx,[32+rsp]
  3620. lea rdi,[256+rsp]
  3621. call __ecp_nistz256_sub_fromx
  3622. DB 102,72,15,126,199
  3623. movdqa xmm0,xmm5
  3624. movdqa xmm1,xmm5
  3625. pandn xmm0,XMMWORD[288+rsp]
  3626. movdqa xmm2,xmm5
  3627. pandn xmm1,XMMWORD[((288+16))+rsp]
  3628. movdqa xmm3,xmm5
  3629. pand xmm2,XMMWORD[$L$ONE_mont]
  3630. pand xmm3,XMMWORD[(($L$ONE_mont+16))]
  3631. por xmm2,xmm0
  3632. por xmm3,xmm1
  3633. movdqa xmm0,xmm4
  3634. movdqa xmm1,xmm4
  3635. pandn xmm0,xmm2
  3636. movdqa xmm2,xmm4
  3637. pandn xmm1,xmm3
  3638. movdqa xmm3,xmm4
  3639. pand xmm2,XMMWORD[384+rsp]
  3640. pand xmm3,XMMWORD[((384+16))+rsp]
  3641. por xmm2,xmm0
  3642. por xmm3,xmm1
  3643. movdqu XMMWORD[64+rdi],xmm2
  3644. movdqu XMMWORD[80+rdi],xmm3
  3645. movdqa xmm0,xmm5
  3646. movdqa xmm1,xmm5
  3647. pandn xmm0,XMMWORD[224+rsp]
  3648. movdqa xmm2,xmm5
  3649. pandn xmm1,XMMWORD[((224+16))+rsp]
  3650. movdqa xmm3,xmm5
  3651. pand xmm2,XMMWORD[416+rsp]
  3652. pand xmm3,XMMWORD[((416+16))+rsp]
  3653. por xmm2,xmm0
  3654. por xmm3,xmm1
  3655. movdqa xmm0,xmm4
  3656. movdqa xmm1,xmm4
  3657. pandn xmm0,xmm2
  3658. movdqa xmm2,xmm4
  3659. pandn xmm1,xmm3
  3660. movdqa xmm3,xmm4
  3661. pand xmm2,XMMWORD[320+rsp]
  3662. pand xmm3,XMMWORD[((320+16))+rsp]
  3663. por xmm2,xmm0
  3664. por xmm3,xmm1
  3665. movdqu XMMWORD[rdi],xmm2
  3666. movdqu XMMWORD[16+rdi],xmm3
  3667. movdqa xmm0,xmm5
  3668. movdqa xmm1,xmm5
  3669. pandn xmm0,XMMWORD[256+rsp]
  3670. movdqa xmm2,xmm5
  3671. pandn xmm1,XMMWORD[((256+16))+rsp]
  3672. movdqa xmm3,xmm5
  3673. pand xmm2,XMMWORD[448+rsp]
  3674. pand xmm3,XMMWORD[((448+16))+rsp]
  3675. por xmm2,xmm0
  3676. por xmm3,xmm1
  3677. movdqa xmm0,xmm4
  3678. movdqa xmm1,xmm4
  3679. pandn xmm0,xmm2
  3680. movdqa xmm2,xmm4
  3681. pandn xmm1,xmm3
  3682. movdqa xmm3,xmm4
  3683. pand xmm2,XMMWORD[352+rsp]
  3684. pand xmm3,XMMWORD[((352+16))+rsp]
  3685. por xmm2,xmm0
  3686. por xmm3,xmm1
  3687. movdqu XMMWORD[32+rdi],xmm2
  3688. movdqu XMMWORD[48+rdi],xmm3
  3689. lea rsi,[((480+56))+rsp]
  3690. mov r15,QWORD[((-48))+rsi]
  3691. mov r14,QWORD[((-40))+rsi]
  3692. mov r13,QWORD[((-32))+rsi]
  3693. mov r12,QWORD[((-24))+rsi]
  3694. mov rbx,QWORD[((-16))+rsi]
  3695. mov rbp,QWORD[((-8))+rsi]
  3696. lea rsp,[rsi]
  3697. $L$add_affinex_epilogue:
  3698. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  3699. mov rsi,QWORD[16+rsp]
  3700. DB 0F3h,0C3h ;repret
  3701. $L$SEH_end_ecp_nistz256_point_add_affinex:
  3702. EXTERN __imp_RtlVirtualUnwind
  3703. ALIGN 16
  3704. short_handler:
  3705. push rsi
  3706. push rdi
  3707. push rbx
  3708. push rbp
  3709. push r12
  3710. push r13
  3711. push r14
  3712. push r15
  3713. pushfq
  3714. sub rsp,64
  3715. mov rax,QWORD[120+r8]
  3716. mov rbx,QWORD[248+r8]
  3717. mov rsi,QWORD[8+r9]
  3718. mov r11,QWORD[56+r9]
  3719. mov r10d,DWORD[r11]
  3720. lea r10,[r10*1+rsi]
  3721. cmp rbx,r10
  3722. jb NEAR $L$common_seh_tail
  3723. mov rax,QWORD[152+r8]
  3724. mov r10d,DWORD[4+r11]
  3725. lea r10,[r10*1+rsi]
  3726. cmp rbx,r10
  3727. jae NEAR $L$common_seh_tail
  3728. lea rax,[16+rax]
  3729. mov r12,QWORD[((-8))+rax]
  3730. mov r13,QWORD[((-16))+rax]
  3731. mov QWORD[216+r8],r12
  3732. mov QWORD[224+r8],r13
  3733. jmp NEAR $L$common_seh_tail
  3734. ALIGN 16
  3735. full_handler:
  3736. push rsi
  3737. push rdi
  3738. push rbx
  3739. push rbp
  3740. push r12
  3741. push r13
  3742. push r14
  3743. push r15
  3744. pushfq
  3745. sub rsp,64
  3746. mov rax,QWORD[120+r8]
  3747. mov rbx,QWORD[248+r8]
  3748. mov rsi,QWORD[8+r9]
  3749. mov r11,QWORD[56+r9]
  3750. mov r10d,DWORD[r11]
  3751. lea r10,[r10*1+rsi]
  3752. cmp rbx,r10
  3753. jb NEAR $L$common_seh_tail
  3754. mov rax,QWORD[152+r8]
  3755. mov r10d,DWORD[4+r11]
  3756. lea r10,[r10*1+rsi]
  3757. cmp rbx,r10
  3758. jae NEAR $L$common_seh_tail
  3759. mov r10d,DWORD[8+r11]
  3760. lea rax,[r10*1+rax]
  3761. mov rbp,QWORD[((-8))+rax]
  3762. mov rbx,QWORD[((-16))+rax]
  3763. mov r12,QWORD[((-24))+rax]
  3764. mov r13,QWORD[((-32))+rax]
  3765. mov r14,QWORD[((-40))+rax]
  3766. mov r15,QWORD[((-48))+rax]
  3767. mov QWORD[144+r8],rbx
  3768. mov QWORD[160+r8],rbp
  3769. mov QWORD[216+r8],r12
  3770. mov QWORD[224+r8],r13
  3771. mov QWORD[232+r8],r14
  3772. mov QWORD[240+r8],r15
  3773. $L$common_seh_tail:
  3774. mov rdi,QWORD[8+rax]
  3775. mov rsi,QWORD[16+rax]
  3776. mov QWORD[152+r8],rax
  3777. mov QWORD[168+r8],rsi
  3778. mov QWORD[176+r8],rdi
  3779. mov rdi,QWORD[40+r9]
  3780. mov rsi,r8
  3781. mov ecx,154
  3782. DD 0xa548f3fc
  3783. mov rsi,r9
  3784. xor rcx,rcx
  3785. mov rdx,QWORD[8+rsi]
  3786. mov r8,QWORD[rsi]
  3787. mov r9,QWORD[16+rsi]
  3788. mov r10,QWORD[40+rsi]
  3789. lea r11,[56+rsi]
  3790. lea r12,[24+rsi]
  3791. mov QWORD[32+rsp],r10
  3792. mov QWORD[40+rsp],r11
  3793. mov QWORD[48+rsp],r12
  3794. mov QWORD[56+rsp],rcx
  3795. call QWORD[__imp_RtlVirtualUnwind]
  3796. mov eax,1
  3797. add rsp,64
  3798. popfq
  3799. pop r15
  3800. pop r14
  3801. pop r13
  3802. pop r12
  3803. pop rbp
  3804. pop rbx
  3805. pop rdi
  3806. pop rsi
  3807. DB 0F3h,0C3h ;repret
  3808. section .pdata rdata align=4
  3809. ALIGN 4
  3810. DD $L$SEH_begin_ecp_nistz256_neg wrt ..imagebase
  3811. DD $L$SEH_end_ecp_nistz256_neg wrt ..imagebase
  3812. DD $L$SEH_info_ecp_nistz256_neg wrt ..imagebase
  3813. DD $L$SEH_begin_ecp_nistz256_ord_mul_mont wrt ..imagebase
  3814. DD $L$SEH_end_ecp_nistz256_ord_mul_mont wrt ..imagebase
  3815. DD $L$SEH_info_ecp_nistz256_ord_mul_mont wrt ..imagebase
  3816. DD $L$SEH_begin_ecp_nistz256_ord_sqr_mont wrt ..imagebase
  3817. DD $L$SEH_end_ecp_nistz256_ord_sqr_mont wrt ..imagebase
  3818. DD $L$SEH_info_ecp_nistz256_ord_sqr_mont wrt ..imagebase
  3819. DD $L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase
  3820. DD $L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase
  3821. DD $L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase
  3822. DD $L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase
  3823. DD $L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase
  3824. DD $L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase
  3825. DD $L$SEH_begin_ecp_nistz256_mul_mont wrt ..imagebase
  3826. DD $L$SEH_end_ecp_nistz256_mul_mont wrt ..imagebase
  3827. DD $L$SEH_info_ecp_nistz256_mul_mont wrt ..imagebase
  3828. DD $L$SEH_begin_ecp_nistz256_sqr_mont wrt ..imagebase
  3829. DD $L$SEH_end_ecp_nistz256_sqr_mont wrt ..imagebase
  3830. DD $L$SEH_info_ecp_nistz256_sqr_mont wrt ..imagebase
  3831. DD $L$SEH_begin_ecp_nistz256_select_w5 wrt ..imagebase
  3832. DD $L$SEH_end_ecp_nistz256_select_w5 wrt ..imagebase
  3833. DD $L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase
  3834. DD $L$SEH_begin_ecp_nistz256_select_w7 wrt ..imagebase
  3835. DD $L$SEH_end_ecp_nistz256_select_w7 wrt ..imagebase
  3836. DD $L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase
  3837. DD $L$SEH_begin_ecp_nistz256_avx2_select_w5 wrt ..imagebase
  3838. DD $L$SEH_end_ecp_nistz256_avx2_select_w5 wrt ..imagebase
  3839. DD $L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase
  3840. DD $L$SEH_begin_ecp_nistz256_avx2_select_w7 wrt ..imagebase
  3841. DD $L$SEH_end_ecp_nistz256_avx2_select_w7 wrt ..imagebase
  3842. DD $L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase
  3843. DD $L$SEH_begin_ecp_nistz256_point_double wrt ..imagebase
  3844. DD $L$SEH_end_ecp_nistz256_point_double wrt ..imagebase
  3845. DD $L$SEH_info_ecp_nistz256_point_double wrt ..imagebase
  3846. DD $L$SEH_begin_ecp_nistz256_point_add wrt ..imagebase
  3847. DD $L$SEH_end_ecp_nistz256_point_add wrt ..imagebase
  3848. DD $L$SEH_info_ecp_nistz256_point_add wrt ..imagebase
  3849. DD $L$SEH_begin_ecp_nistz256_point_add_affine wrt ..imagebase
  3850. DD $L$SEH_end_ecp_nistz256_point_add_affine wrt ..imagebase
  3851. DD $L$SEH_info_ecp_nistz256_point_add_affine wrt ..imagebase
  3852. DD $L$SEH_begin_ecp_nistz256_point_doublex wrt ..imagebase
  3853. DD $L$SEH_end_ecp_nistz256_point_doublex wrt ..imagebase
  3854. DD $L$SEH_info_ecp_nistz256_point_doublex wrt ..imagebase
  3855. DD $L$SEH_begin_ecp_nistz256_point_addx wrt ..imagebase
  3856. DD $L$SEH_end_ecp_nistz256_point_addx wrt ..imagebase
  3857. DD $L$SEH_info_ecp_nistz256_point_addx wrt ..imagebase
  3858. DD $L$SEH_begin_ecp_nistz256_point_add_affinex wrt ..imagebase
  3859. DD $L$SEH_end_ecp_nistz256_point_add_affinex wrt ..imagebase
  3860. DD $L$SEH_info_ecp_nistz256_point_add_affinex wrt ..imagebase
  3861. section .xdata rdata align=8
  3862. ALIGN 8
  3863. $L$SEH_info_ecp_nistz256_neg:
  3864. DB 9,0,0,0
  3865. DD short_handler wrt ..imagebase
  3866. DD $L$neg_body wrt ..imagebase,$L$neg_epilogue wrt ..imagebase
  3867. $L$SEH_info_ecp_nistz256_ord_mul_mont:
  3868. DB 9,0,0,0
  3869. DD full_handler wrt ..imagebase
  3870. DD $L$ord_mul_body wrt ..imagebase,$L$ord_mul_epilogue wrt ..imagebase
  3871. DD 48,0
  3872. $L$SEH_info_ecp_nistz256_ord_sqr_mont:
  3873. DB 9,0,0,0
  3874. DD full_handler wrt ..imagebase
  3875. DD $L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase
  3876. DD 48,0
  3877. $L$SEH_info_ecp_nistz256_ord_mul_montx:
  3878. DB 9,0,0,0
  3879. DD full_handler wrt ..imagebase
  3880. DD $L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase
  3881. DD 48,0
  3882. $L$SEH_info_ecp_nistz256_ord_sqr_montx:
  3883. DB 9,0,0,0
  3884. DD full_handler wrt ..imagebase
  3885. DD $L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase
  3886. DD 48,0
  3887. $L$SEH_info_ecp_nistz256_mul_mont:
  3888. DB 9,0,0,0
  3889. DD full_handler wrt ..imagebase
  3890. DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
  3891. DD 48,0
  3892. $L$SEH_info_ecp_nistz256_sqr_mont:
  3893. DB 9,0,0,0
  3894. DD full_handler wrt ..imagebase
  3895. DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
  3896. DD 48,0
  3897. $L$SEH_info_ecp_nistz256_select_wX:
  3898. DB 0x01,0x33,0x16,0x00
  3899. DB 0x33,0xf8,0x09,0x00
  3900. DB 0x2e,0xe8,0x08,0x00
  3901. DB 0x29,0xd8,0x07,0x00
  3902. DB 0x24,0xc8,0x06,0x00
  3903. DB 0x1f,0xb8,0x05,0x00
  3904. DB 0x1a,0xa8,0x04,0x00
  3905. DB 0x15,0x98,0x03,0x00
  3906. DB 0x10,0x88,0x02,0x00
  3907. DB 0x0c,0x78,0x01,0x00
  3908. DB 0x08,0x68,0x00,0x00
  3909. DB 0x04,0x01,0x15,0x00
  3910. ALIGN 8
  3911. $L$SEH_info_ecp_nistz256_avx2_select_wX:
  3912. DB 0x01,0x36,0x17,0x0b
  3913. DB 0x36,0xf8,0x09,0x00
  3914. DB 0x31,0xe8,0x08,0x00
  3915. DB 0x2c,0xd8,0x07,0x00
  3916. DB 0x27,0xc8,0x06,0x00
  3917. DB 0x22,0xb8,0x05,0x00
  3918. DB 0x1d,0xa8,0x04,0x00
  3919. DB 0x18,0x98,0x03,0x00
  3920. DB 0x13,0x88,0x02,0x00
  3921. DB 0x0e,0x78,0x01,0x00
  3922. DB 0x09,0x68,0x00,0x00
  3923. DB 0x04,0x01,0x15,0x00
  3924. DB 0x00,0xb3,0x00,0x00
  3925. ALIGN 8
  3926. $L$SEH_info_ecp_nistz256_point_double:
  3927. DB 9,0,0,0
  3928. DD full_handler wrt ..imagebase
  3929. DD $L$point_doubleq_body wrt ..imagebase,$L$point_doubleq_epilogue wrt ..imagebase
  3930. DD 32*5+56,0
  3931. $L$SEH_info_ecp_nistz256_point_add:
  3932. DB 9,0,0,0
  3933. DD full_handler wrt ..imagebase
  3934. DD $L$point_addq_body wrt ..imagebase,$L$point_addq_epilogue wrt ..imagebase
  3935. DD 32*18+56,0
  3936. $L$SEH_info_ecp_nistz256_point_add_affine:
  3937. DB 9,0,0,0
  3938. DD full_handler wrt ..imagebase
  3939. DD $L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase
  3940. DD 32*15+56,0
  3941. ALIGN 8
  3942. $L$SEH_info_ecp_nistz256_point_doublex:
  3943. DB 9,0,0,0
  3944. DD full_handler wrt ..imagebase
  3945. DD $L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase
  3946. DD 32*5+56,0
  3947. $L$SEH_info_ecp_nistz256_point_addx:
  3948. DB 9,0,0,0
  3949. DD full_handler wrt ..imagebase
  3950. DD $L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase
  3951. DD 32*18+56,0
  3952. $L$SEH_info_ecp_nistz256_point_add_affinex:
  3953. DB 9,0,0,0
  3954. DD full_handler wrt ..imagebase
  3955. DD $L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase
  3956. DD 32*15+56,0