You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

nn_ops.py 287 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
6 years ago
6 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
6 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
6 years ago
5 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
6 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
5 years ago
6 years ago
6 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
6 years ago
5 years ago
5 years ago
5 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Operators for nn."""
  16. import math
  17. import operator
  18. from functools import reduce
  19. import numpy as np
  20. from ... import context
  21. from .. import signature as sig
  22. from ..._checkparam import Validator as validator
  23. from ..._checkparam import Rel
  24. from ...common import dtype as mstype
  25. from ..primitive import Primitive, PrimitiveWithInfer, PrimitiveWithCheck, prim_attr_register
  26. def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=False, ret_four=False):
  27. """
  28. Checks whether an argument is a positive int or tuple with 2 or 4(when allow_four is True) positive int elements.
  29. """
  30. def _raise_message():
  31. raise ValueError(f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two "
  32. f"{'or four ' if allow_four else ''}positive int numbers, but got {arg_value}")
  33. def _get_return_value():
  34. if isinstance(arg_value, int):
  35. ret = (1, 1, arg_value, arg_value) if ret_four else (arg_value, arg_value)
  36. elif len(arg_value) == 2:
  37. ret = (1, 1, arg_value[0], arg_value[1]) if ret_four else arg_value
  38. elif len(arg_value) == 4:
  39. if not allow_four:
  40. _raise_message()
  41. ret = arg_value if ret_four else (arg_value[2], arg_value[3])
  42. else:
  43. _raise_message()
  44. return ret
  45. validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name)
  46. ret_value = _get_return_value()
  47. for item in ret_value:
  48. if isinstance(item, int) and item > 0:
  49. continue
  50. _raise_message()
  51. return ret_value
  52. def _check_shape(arg_name, arg_value, prim_name):
  53. """
  54. Checks whether an shape dims is a positive int elements.
  55. """
  56. def _raise_message():
  57. raise ValueError(f"For '{prim_name}' attr '{arg_name}' dims elements should be positive int numbers, "
  58. f"but got {arg_value}")
  59. validator.check_value_type(arg_name, arg_value, (list, tuple), prim_name)
  60. for item in arg_value:
  61. if isinstance(item, int) and item > 0:
  62. continue
  63. _raise_message()
  64. return arg_value
  65. class Flatten(PrimitiveWithInfer):
  66. r"""
  67. Flattens a tensor without changing its batch size on the 0-th axis.
  68. Inputs:
  69. - **input_x** (Tensor) - Tensor of shape :math:`(N, \ldots)` to be flattened.
  70. Outputs:
  71. Tensor, the shape of the output tensor is :math:`(N, X)`, where :math:`X` is
  72. the product of the remaining dimension.
  73. Examples:
  74. >>> input_tensor = Tensor(np.ones(shape=[1, 2, 3, 4]), mindspore.float32)
  75. >>> flatten = P.Flatten()
  76. >>> output = flatten(input_tensor)
  77. >>> assert output.shape == (1, 24)
  78. """
  79. @prim_attr_register
  80. def __init__(self):
  81. pass
  82. def infer_shape(self, input_x):
  83. validator.check_int(len(input_x), 1, Rel.GE, 'input_x rank', self.name)
  84. prod = 1 if len(input_x) == 1 else reduce(operator.mul, input_x[1:])
  85. return input_x[0], prod
  86. def infer_dtype(self, input_x):
  87. validator.check_subclass("input_x", input_x, mstype.tensor, self.name)
  88. return input_x
  89. class Softmax(PrimitiveWithInfer):
  90. r"""
  91. Softmax operation.
  92. Applies the Softmax operation to the input tensor on the specified axis.
  93. Suppose a slice in the given aixs :math:`x`, then for each element :math:`x_i`,
  94. the Softmax function is shown as follows:
  95. .. math::
  96. \text{output}(x_i) = \frac{exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)},
  97. where :math:`N` is the length of the tensor.
  98. Args:
  99. axis (Union[int, tuple]): The axis to perform the Softmax operation. Default: -1.
  100. Inputs:
  101. - **logits** (Tensor) - The input of Softmax, with float16 or float32 data type.
  102. Outputs:
  103. Tensor, with the same type and shape as the logits.
  104. Examples:
  105. >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
  106. >>> softmax = P.Softmax()
  107. >>> softmax(input_x)
  108. [0.01165623, 0.03168492, 0.08612854, 0.23412167, 0.6364086]
  109. """
  110. @prim_attr_register
  111. def __init__(self, axis=-1):
  112. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  113. validator.check_value_type("axis", axis, [int, tuple], self.name)
  114. if isinstance(axis, int):
  115. self.add_prim_attr('axis', (axis,))
  116. for item in self.axis:
  117. validator.check_value_type("item of axis", item, [int], self.name)
  118. def infer_shape(self, logits):
  119. validator.check_int(len(self.axis), 1, Rel.GE, "length of axis", self.name)
  120. rank = len(logits)
  121. for axis_v in self.axis:
  122. validator.check_int_range(axis_v, -rank, rank, Rel.INC_LEFT, "axis", self.name)
  123. return logits
  124. def infer_dtype(self, logits):
  125. validator.check_subclass("logits", logits, mstype.tensor, self.name)
  126. validator.check_tensor_type_same({"logits": logits}, mstype.float_type, self.name)
  127. return logits
  128. class LogSoftmax(PrimitiveWithInfer):
  129. r"""
  130. Log Softmax activation function.
  131. Applies the Log Softmax function to the input tensor on the specified axis.
  132. Suppose a slice in the given aixs, :math:`x` for each element :math:`x_i`,
  133. the Log Softmax function is shown as follows:
  134. .. math::
  135. \text{output}(x_i) = \log \left(\frac{exp(x_i)} {\sum_{j = 0}^{N-1}\exp(x_j)}\right),
  136. where :math:`N` is the length of the Tensor.
  137. Args:
  138. axis (int): The axis to perform the Log softmax operation. Default: -1.
  139. Inputs:
  140. - **logits** (Tensor) - The input of Log Softmax, with float16 or float32 data type.
  141. Outputs:
  142. Tensor, with the same type and shape as the logits.
  143. Examples:
  144. >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
  145. >>> log_softmax = P.LogSoftmax()
  146. >>> log_softmax(input_x)
  147. [-4.4519143, -3.4519143, -2.4519143, -1.4519144, -0.4519144]
  148. """
  149. @prim_attr_register
  150. def __init__(self, axis=-1):
  151. validator.check_value_type("axis", axis, [int], self.name)
  152. def infer_shape(self, logits):
  153. rank = len(logits)
  154. validator.check_int_range(self.axis, -rank, rank, Rel.INC_LEFT, 'axis', self.name)
  155. return logits
  156. def infer_dtype(self, logits):
  157. validator.check_subclass("logits", logits, mstype.tensor, self.name)
  158. validator.check_tensor_type_same({"logits": logits}, mstype.float_type, self.name)
  159. return logits
  160. class Softplus(PrimitiveWithInfer):
  161. r"""
  162. Softplus activation function.
  163. Softplus is a smooth approximation to the ReLU function.
  164. The function is shown as follows:
  165. .. math::
  166. \text{output} = \log(1 + \exp(\text{input_x})),
  167. Inputs:
  168. - **input_x** (Tensor) - The input tensor whose data type must be float.
  169. Outputs:
  170. Tensor, with the same type and shape as the `input_x`.
  171. Examples:
  172. >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
  173. >>> softplus = P.Softplus()
  174. >>> softplus(input_x)
  175. [1.3132615, 2.126928, 3.0485873, 4.01815, 5.0067153]
  176. """
  177. @prim_attr_register
  178. def __init__(self):
  179. """Initialize Softplus"""
  180. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  181. def infer_shape(self, input_x):
  182. return input_x
  183. def infer_dtype(self, input_x):
  184. validator.check_tensor_type_same({'input_x': input_x}, mstype.float_type, self.name)
  185. return input_x
  186. class Softsign(PrimitiveWithInfer):
  187. r"""
  188. Softsign activation function.
  189. The function is shown as follows:
  190. .. math::
  191. \text{output} = \frac{\text{input_x}}{1 + \left| \text{input_x} \right|},
  192. Inputs:
  193. - **input_x** (Tensor) - The input tensor whose data type must be float16 or float32.
  194. Outputs:
  195. Tensor, with the same type and shape as the `input_x`.
  196. Examples:
  197. >>> input_x = Tensor(np.array([0, -1, 2, 30, -30]), mindspore.float32)
  198. >>> softsign = P.Softsign()
  199. >>> softsign(input_x)
  200. [0. -0.5 0.6666667 0.9677419 -0.9677419]
  201. """
  202. @prim_attr_register
  203. def __init__(self):
  204. """Initialize Softsign"""
  205. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  206. def infer_shape(self, input_x):
  207. return input_x
  208. def infer_dtype(self, input_x):
  209. validator.check_tensor_type_same({'input_x': input_x}, [mstype.float16, mstype.float32], self.name)
  210. return input_x
  211. class ReLU(PrimitiveWithInfer):
  212. r"""
  213. Computes ReLU(Rectified Linear Unit) of input tensor element-wise.
  214. It returns :math:`\max(x,\ 0)` element-wise.
  215. Inputs:
  216. - **input_x** (Tensor) - The input tensor.
  217. Outputs:
  218. Tensor, with the same type and shape as the `input_x`.
  219. Examples:
  220. >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
  221. >>> relu = P.ReLU()
  222. >>> result = relu(input_x)
  223. [[0, 4.0, 0.0], [2.0, 0.0, 9.0]]
  224. """
  225. @prim_attr_register
  226. def __init__(self):
  227. """Initialize ReLU"""
  228. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  229. def infer_shape(self, input_x):
  230. return input_x
  231. def infer_dtype(self, input_x):
  232. validator.check_tensor_type_same({'input_x': input_x}, mstype.number_type, self.name)
  233. return input_x
  234. class ReLU6(PrimitiveWithInfer):
  235. r"""
  236. Computes ReLU(Rectified Linear Unit) upper bounded by 6 of input tensor element-wise.
  237. It returns :math:`\min(\max(0,x), 6)` element-wise.
  238. Inputs:
  239. - **input_x** (Tensor) - The input tensor, with float16 or float32 data type.
  240. Outputs:
  241. Tensor, with the same type and shape as the `input_x`.
  242. Examples:
  243. >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
  244. >>> relu6 = P.ReLU6()
  245. >>> result = relu6(input_x)
  246. """
  247. @prim_attr_register
  248. def __init__(self):
  249. """Initialize ReLU6"""
  250. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  251. def infer_shape(self, input_x):
  252. return input_x
  253. def infer_dtype(self, input_x):
  254. validator.check_tensor_type_same({'input_x': input_x}, (mstype.float16, mstype.float32), self.name)
  255. return input_x
  256. class ReLUV2(PrimitiveWithInfer):
  257. r"""
  258. Computes ReLU(Rectified Linear Unit) of input tensor element-wise.
  259. It returns :math:`\max(x,\ 0)` element-wise.
  260. Inputs:
  261. - **input_x** (Tensor) - The input tensor must be a 4-D tensor.
  262. Outputs:
  263. - **output** (Tensor) - Has the same type and shape as the `input_x`.
  264. - **mask** (Tensor) - A tensor whose data type must be uint8.
  265. Examples:
  266. >>> input_x = Tensor(np.array([[[[1, -2], [-3, 4]], [[-5, 6], [7, -8]]]]), mindspore.float32)
  267. >>> relu_v2 = P.ReLUV2()
  268. >>> output = relu_v2(input_x)
  269. ([[[[1., 0.], [0., 4.]], [[0., 6.], [7., 0.]]]],
  270. [[[[1, 0], [2, 0]], [[2, 0], [1, 0]]]])
  271. """
  272. @prim_attr_register
  273. def __init__(self):
  274. """Initialize ReLUV2"""
  275. self.init_prim_io_names(inputs=['x'], outputs=['output', 'mask'])
  276. def __infer__(self, input_x):
  277. input_shape = list(input_x['shape'])
  278. input_dtype = input_x['dtype']
  279. mask_shape = []
  280. if len(input_shape) != 4:
  281. raise ValueError("The `input_x` should be a 4-D tensor, "
  282. f"but got a {len(input_shape)}-D tensor whose shape is {input_shape}")
  283. for i in enumerate(input_shape):
  284. if i[0] == 1:
  285. if input_dtype == mstype.uint8 and input_dtype == mstype.int8:
  286. mask_shape.append((input_shape[1] + 31) // 32)
  287. else:
  288. mask_shape.append((input_shape[1] + 15) // 16)
  289. else:
  290. mask_shape.append(i[1])
  291. if input_dtype == mstype.uint8 and input_dtype == mstype.int8:
  292. mask_shape.append(4)
  293. else:
  294. mask_shape.append(2)
  295. output_shape = (input_x['shape'], mask_shape)
  296. validator.check_subclass("input_x", input_dtype, mstype.tensor, self.name)
  297. validator.check_tensor_type_same({'input_x': input_dtype}, mstype.number_type, self.name)
  298. mask_dtype = mstype.uint8
  299. output_dtype = (input_dtype, mask_dtype)
  300. return {'shape': output_shape,
  301. 'dtype': output_dtype,
  302. 'value': None}
  303. class Elu(PrimitiveWithInfer):
  304. r"""
  305. Computes exponential linear: `alpha * (exp(x) - 1)` if x < 0, `x` otherwise.
  306. The data type of input tensor must be float.
  307. Args:
  308. alpha (float): The coefficient of negative factor whose type is float,
  309. only support '1.0' currently. Default: 1.0.
  310. Inputs:
  311. - **input_x** (Tensor) - The input tensor whose data type must be float.
  312. Outputs:
  313. Tensor, has the same shape and data type as `input_x`.
  314. Examples:
  315. >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
  316. >>> elu = P.Elu()
  317. >>> result = elu(input_x)
  318. Tensor([[-0.632 4.0 -0.999]
  319. [2.0 -0.993 9.0 ]], shape=(2, 3), dtype=mindspore.float32)
  320. """
  321. @prim_attr_register
  322. def __init__(self, alpha=1.0):
  323. """Initialize Elu"""
  324. validator.check_value_type("alpha", alpha, [float], self.name)
  325. validator.check_number("alpha", alpha, 1.0, Rel.EQ, self.name)
  326. def infer_shape(self, input_x):
  327. return input_x
  328. def infer_dtype(self, input_x):
  329. validator.check_tensor_type_same({'input_x': input_x}, mstype.float_type, self.name)
  330. return input_x
  331. class HSwish(PrimitiveWithInfer):
  332. r"""
  333. Hard swish activation function.
  334. Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
  335. Hard swish is defined as:
  336. .. math::
  337. \text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6},
  338. where :math:`x_{i}` is the :math:`i`-th slice in the given dimension of the input Tensor.
  339. Inputs:
  340. - **input_data** (Tensor) - The input of HSwish, data type must be float16 or float32.
  341. Outputs:
  342. Tensor, with the same type and shape as the `input_data`.
  343. Examples:
  344. >>> hswish = P.HSwish()
  345. >>> input_x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
  346. >>> result = hswish(input_x)
  347. """
  348. @prim_attr_register
  349. def __init__(self):
  350. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  351. def infer_shape(self, xshape):
  352. return xshape
  353. def infer_dtype(self, x_dtype):
  354. validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32), self.name)
  355. return x_dtype
  356. class Sigmoid(PrimitiveWithInfer):
  357. r"""
  358. Sigmoid activation function.
  359. Computes Sigmoid of input element-wise. The Sigmoid function is defined as:
  360. .. math::
  361. \text{sigmoid}(x_i) = \frac{1}{1 + exp(-x_i)},
  362. where :math:`x_i` is the element of the input.
  363. Inputs:
  364. - **input_x** (Tensor) - The input of Sigmoid, data type must be float16 or float32.
  365. Outputs:
  366. Tensor, with the same type and shape as the input_x.
  367. Examples:
  368. >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
  369. >>> sigmoid = P.Sigmoid()
  370. >>> sigmoid(input_x)
  371. [0.73105866, 0.880797, 0.9525742, 0.98201376, 0.9933071]
  372. """
  373. @prim_attr_register
  374. def __init__(self):
  375. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  376. def infer_shape(self, input_x):
  377. return input_x
  378. def infer_dtype(self, input_x):
  379. validator.check_tensor_type_same({"input_x": input_x}, (mstype.float16, mstype.float32), self.name)
  380. return input_x
  381. class HSigmoid(PrimitiveWithInfer):
  382. r"""
  383. Hard sigmoid activation function.
  384. Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
  385. Hard sigmoid is defined as:
  386. .. math::
  387. \text{hsigmoid}(x_{i}) = max(0, min(1, \frac{x_{i} + 3}{6})),
  388. where :math:`x_{i}` is the :math:`i`-th slice in the given dimension of the input Tensor.
  389. Inputs:
  390. - **input_data** (Tensor) - The input of HSigmoid, data type must be float16 or float32.
  391. Outputs:
  392. Tensor, with the same type and shape as the `input_data`.
  393. Examples:
  394. >>> hsigmoid = P.HSigmoid()
  395. >>> input_x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
  396. >>> result = hsigmoid(input_x)
  397. """
  398. @prim_attr_register
  399. def __init__(self):
  400. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  401. def infer_shape(self, x_shape):
  402. return x_shape
  403. def infer_dtype(self, x_dtype):
  404. validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32), self.name)
  405. return x_dtype
  406. class Tanh(PrimitiveWithInfer):
  407. r"""
  408. Tanh activation function.
  409. Computes hyperbolic tangent of input element-wise. The Tanh function is defined as:
  410. .. math::
  411. tanh(x_i) = \frac{\exp(x_i) - \exp(-x_i)}{\exp(x_i) + \exp(-x_i)} = \frac{\exp(2x_i) - 1}{\exp(2x_i) + 1},
  412. where :math:`x_i` is an element of the input Tensor.
  413. Inputs:
  414. - **input_x** (Tensor) - The input of Tanh.
  415. Outputs:
  416. Tensor, with the same type and shape as the input_x.
  417. Examples:
  418. >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
  419. >>> tanh = P.Tanh()
  420. >>> tanh(input_x)
  421. [0.7615941, 0.9640276, 0.9950548, 0.9993293, 0.99990916]
  422. """
  423. @prim_attr_register
  424. def __init__(self):
  425. pass
  426. def infer_shape(self, input_x):
  427. return input_x
  428. def infer_dtype(self, input_x):
  429. validator.check_subclass("input_x", input_x, mstype.tensor, self.name)
  430. return input_x
  431. class FusedBatchNorm(Primitive):
  432. r"""
  433. FusedBatchNorm is a BatchNorm that moving mean and moving variance will be computed instead of being loaded.
  434. Batch Normalization is widely used in convolutional networks. This operation applies
  435. Batch Normalization over input to avoid internal covariate shift as described in the
  436. paper `Batch Normalization: Accelerating Deep Network Training by Reducing Internal
  437. Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
  438. feature using a mini-batch of data and the learned parameters which can be described
  439. in the following formula.
  440. .. math::
  441. y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
  442. where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon.
  443. Args:
  444. mode (int): Mode of batch normalization, value is 0 or 1. Default: 0.
  445. epsilon (float): A small value added for numerical stability. Default: 1e-5.
  446. momentum (float): The hyper parameter to compute moving average for running_mean and running_var
  447. (e.g. :math:`new\_running\_mean = momentum * running\_mean + (1 - momentum) * current\_mean`).
  448. Momentum value must be [0, 1]. Default: 0.9.
  449. Inputs:
  450. - **input_x** (Tensor) - Tensor of shape :math:`(N, C)`.
  451. - **scale** (Tensor) - Tensor of shape :math:`(C,)`.
  452. - **bias** (Tensor) - Tensor of shape :math:`(C,)`.
  453. - **mean** (Tensor) - Tensor of shape :math:`(C,)`.
  454. - **variance** (Tensor) - Tensor of shape :math:`(C,)`.
  455. Outputs:
  456. Tuple of 5 Tensor, the normalized input and the updated parameters.
  457. - **output_x** (Tensor) - The same type and shape as the `input_x`.
  458. - **updated_scale** (Tensor) - Tensor of shape :math:`(C,)`.
  459. - **updated_bias** (Tensor) - Tensor of shape :math:`(C,)`.
  460. - **updated_moving_mean** (Tensor) - Tensor of shape :math:`(C,)`.
  461. - **updated_moving_variance** (Tensor) - Tensor of shape :math:`(C,)`.
  462. Examples:
  463. >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32)
  464. >>> scale = Tensor(np.ones([64]), mindspore.float32)
  465. >>> bias = Tensor(np.ones([64]), mindspore.float32)
  466. >>> mean = Tensor(np.ones([64]), mindspore.float32)
  467. >>> variance = Tensor(np.ones([64]), mindspore.float32)
  468. >>> op = P.FusedBatchNorm()
  469. >>> output = op(input_x, scale, bias, mean, variance)
  470. """
  471. __mindspore_signature__ = (
  472. sig.make_sig('input_x', dtype=sig.sig_dtype.T2),
  473. sig.make_sig('scale', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  474. sig.make_sig('bias', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  475. sig.make_sig('mean', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  476. sig.make_sig('variance', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  477. )
  478. @prim_attr_register
  479. def __init__(self, mode=0, epsilon=1e-5, momentum=0.1):
  480. self.init_prim_io_names(inputs=['x', 'scale', 'b', 'mean', 'variance'],
  481. outputs=['y', 'running_mean', 'running_variance', 'save_mean', 'save_inv_variance'])
  482. self.mode = validator.check_int(mode, [0, 1], Rel.IN, 'mode', self.name)
  483. self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
  484. self.momentum = validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name)
  485. self._update_parameter = True
  486. class FusedBatchNormEx(PrimitiveWithInfer):
  487. r"""
  488. FusedBatchNormEx is an extension of FusedBatchNorm, FusedBatchNormEx has one more output(output reserve)
  489. than FusedBatchNorm, reserve will be used in backpropagation phase. FusedBatchNorm is a BatchNorm that
  490. moving mean and moving variance will be computed instead of being loaded.
  491. Batch Normalization is widely used in convolutional networks. This operation applies
  492. Batch Normalization over input to avoid internal covariate shift as described in the
  493. paper `Batch Normalization: Accelerating Deep Network Training by Reducing Internal
  494. Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
  495. feature using a mini-batch of data and the learned parameters which can be described
  496. in the following formula.
  497. .. math::
  498. y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
  499. where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon.
  500. Args:
  501. mode (int): Mode of batch normalization, value is 0 or 1. Default: 0.
  502. epsilon (float): A small value added for numerical stability. Default: 1e-5.
  503. momentum (float): The hyper parameter to compute moving average for running_mean and running_var
  504. (e.g. :math:`new\_running\_mean = momentum * running\_mean + (1 - momentum) * current\_mean`).
  505. Momentum value must be [0, 1]. Default: 0.9.
  506. Inputs:
  507. - **input_x** (Tensor) - The input of FusedBatchNormEx, Tensor of shape :math:`(N, C)`,
  508. data type: float16 or float32.
  509. - **scale** (Tensor) - Parameter scale, same with gamma above-mentioned, Tensor of shape :math:`(C,)`,
  510. data type: float32.
  511. - **bias** (Tensor) - Parameter bias, same with beta above-mentioned, Tensor of shape :math:`(C,)`,
  512. data type: float32.
  513. - **mean** (Tensor) - mean value, Tensor of shape :math:`(C,)`, data type: float32.
  514. - **variance** (Tensor) - variance value, Tensor of shape :math:`(C,)`, data type: float32.
  515. Outputs:
  516. Tuple of 6 Tensors, the normalized input, the updated parameters and reserve.
  517. - **output_x** (Tensor) - The input of FusedBatchNormEx, same type and shape as the `input_x`.
  518. - **updated_scale** (Tensor) - Updated parameter scale, Tensor of shape :math:`(C,)`, data type: float32.
  519. - **updated_bias** (Tensor) - Updated parameter bias, Tensor of shape :math:`(C,)`, data type: float32.
  520. - **updated_moving_mean** (Tensor) - Updated mean value, Tensor of shape :math:`(C,)`, data type: float32.
  521. - **updated_moving_variance** (Tensor) - Updated variance value, Tensor of shape :math:`(C,)`,
  522. data type: float32.
  523. - **reserve** (Tensor) - reserve space, Tensor of shape :math:`(C,)`, data type: float32.
  524. Examples:
  525. >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32)
  526. >>> scale = Tensor(np.ones([64]), mindspore.float32)
  527. >>> bias = Tensor(np.ones([64]), mindspore.float32)
  528. >>> mean = Tensor(np.ones([64]), mindspore.float32)
  529. >>> variance = Tensor(np.ones([64]), mindspore.float32)
  530. >>> op = P.FusedBatchNormEx()
  531. >>> output = op(input_x, scale, bias, mean, variance)
  532. """
  533. __mindspore_signature__ = (
  534. sig.make_sig('input_x', dtype=sig.sig_dtype.T2),
  535. sig.make_sig('scale', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  536. sig.make_sig('bias', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  537. sig.make_sig('mean', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  538. sig.make_sig('variance', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  539. )
  540. @prim_attr_register
  541. def __init__(self, mode=0, epsilon=1e-5, momentum=0.1):
  542. self.init_prim_io_names(inputs=['x', 'scale', 'b', 'mean', 'variance'],
  543. outputs=['y', 'save_scale', 'save_bias', 'save_mean', 'save_inv_variance', 'reserve'])
  544. self.mode = validator.check_int(mode, [0, 1], Rel.IN, 'mode', self.name)
  545. self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
  546. self.momentum = validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name)
  547. self._update_parameter = True
  548. self.add_prim_attr('data_format', "NCHW")
  549. def infer_shape(self, input_x, scale, bias, mean, variance):
  550. validator.check_equal_int(len(scale), 1, "scale rank", self.name)
  551. validator.check("scale shape", scale, "bias shape", bias, Rel.EQ, self.name)
  552. validator.check("scale shape[0]", scale[0], "input_x shape[1]", input_x[1], Rel.EQ, self.name)
  553. validator.check_equal_int(len(mean), 1, "mean rank", self.name)
  554. validator.check("mean shape", mean, "variance shape", variance, Rel.EQ, self.name)
  555. validator.check("mean shape", mean, "scale shape", scale, Rel.EQ, self.name)
  556. return (input_x, scale, scale, scale, scale, scale)
  557. def infer_dtype(self, input_x, scale, bias, mean, variance):
  558. validator.check_tensor_type_same({"input_x": input_x}, [mstype.float16, mstype.float32], self.name)
  559. args = {"scale": scale, "bias": bias}
  560. validator.check_tensor_type_same(args, [mstype.float32], self.name)
  561. args_moving = {"mean": mean, "variance": variance}
  562. valid_types = [mstype.tensor_type(mstype.float32)]
  563. validator.check_type_same(args_moving, valid_types, self.name)
  564. return (input_x, scale, scale, scale, scale, scale)
  565. class BNTrainingReduce(PrimitiveWithInfer):
  566. """
  567. For BatchNorm operator, this operator update the moving averages for training and is used in conjunction with
  568. BNTrainingUpdate.
  569. Inputs:
  570. - **x** (Tensor) - A 4-D Tensor with float16 or float32 data type. Tensor of shape :math:`(N, C, A, B)`.
  571. Outputs:
  572. - **sum** (Tensor) - A 1-D Tensor with float32 data type. Tensor of shape :math:`(C,)`.
  573. - **square_sum** (Tensor) - A 1-D Tensor with float32 data type. Tensor of shape :math:`(C,)`.
  574. Examples:
  575. >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32)
  576. >>> bn_training_reduce = P.BNTrainingReduce(input_x)
  577. >>> output = bn_training_reduce(input_x)
  578. """
  579. @prim_attr_register
  580. def __init__(self):
  581. self.init_prim_io_names(inputs=['x'], outputs=['sum', 'square_sum'])
  582. def infer_shape(self, x_shape):
  583. validator.check_equal_int(len(x_shape), 4, "x rank", self.name)
  584. return ([x_shape[1]], [x_shape[1]])
  585. def infer_dtype(self, x_type):
  586. validator.check_tensor_type_same({"x_type": x_type}, [mstype.float16, mstype.float32], self.name)
  587. return (x_type, x_type)
  588. class BNTrainingUpdate(PrimitiveWithInfer):
  589. """
  590. For BatchNorm operator, this operator update the moving averages for training and is used in conjunction with
  591. BNTrainingReduce.
  592. Args:
  593. isRef (bool): If a ref. Default: True.
  594. epsilon (float): A small value added to variance avoid dividing by zero. Default: 1e-5.
  595. factor (float): A weight for updating the mean and variance. Default: 0.1.
  596. Inputs:
  597. - **x** (Tensor) - A 4-D Tensor with float16 or float32 data type. Tensor of shape :math:`(N, C, A, B)`.
  598. - **sum** (Tensor) - A 1-D Tensor with float16 or float32 data type for the output of operator BNTrainingReduce.
  599. Tensor of shape :math:`(C,)`.
  600. - **square_sum** (Tensor) - A 1-D Tensor with float16 or float32 data type for the output of operator
  601. BNTrainingReduce. Tensor of shape :math:`(C,)`.
  602. - **scale** (Tensor) - A 1-D Tensor with float16 or float32, for the scaling factor.
  603. Tensor of shape :math:`(C,)`.
  604. - **offset** (Tensor) - A 1-D Tensor with float16 or float32, for the scaling offset.
  605. Tensor of shape :math:`(C,)`.
  606. - **mean** (Tensor) - A 1-D Tensor with float16 or float32, for the scaling mean. Tensor of shape :math:`(C,)`.
  607. - **variance** (Tensor) - A 1-D Tensor with float16 or float32, for the update variance.
  608. Tensor of shape :math:`(C,)`.
  609. Outputs:
  610. - **y** (Tensor) - Tensor, has the same shape data type as `x`.
  611. - **mean** (Tensor) - Tensor for the updated mean, with float32 data type.
  612. Has the same shape as `variance`.
  613. - **variance** (Tensor) - Tensor for the updated variance, with float32 data type.
  614. Has the same shape as `variance`.
  615. - **batch_mean** (Tensor) - Tensor for the mean of `x`, with float32 data type.
  616. Has the same shape as `variance`.
  617. - **batch_variance** (Tensor) - Tensor for the mean of `variance`, with float32 data type.
  618. Has the same shape as `variance`.
  619. Examples:
  620. >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32)
  621. >>> sum = Tensor(np.ones([64]), mindspore.float32)
  622. >>> square_sum = Tensor(np.ones([64]), mindspore.float32)
  623. >>> scale = Tensor(np.ones([64]), mindspore.float32)
  624. >>> offset = Tensor(np.ones([64]), mindspore.float32)
  625. >>> mean = Tensor(np.ones([64]), mindspore.float32)
  626. >>> variance = Tensor(np.ones([64]), mindspore.float32)
  627. >>> bn_training_update = P.BNTrainingUpdate()
  628. >>> output = bn_training_update(input_x, sum, square_sum, scale, offset, mean, variance)
  629. """
  630. @prim_attr_register
  631. def __init__(self, isRef=True, epsilon=1e-5, factor=0.1):
  632. self.init_prim_io_names(inputs=['x', 'sum', 'square_sum', 'scale', 'b', 'mean', 'variance'],
  633. outputs=['y', 'running_mean', 'running_variance', 'save_mean', 'save_inv_variance'])
  634. validator.check_value_type("isRef", isRef, [bool], self.name)
  635. validator.check_value_type("epsilon", epsilon, [float], self.name)
  636. validator.check_value_type("factor", factor, [float], self.name)
  637. self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', 'BNTrainingUpdate')
  638. self.factor = validator.check_float_range(factor, 0, 1, Rel.INC_BOTH, 'factor', 'BNTrainingUpdate')
  639. def infer_shape(self, x, sum, square_sum, scale, b, mean, variance):
  640. validator.check_equal_int(len(x), 4, "x rank", self.name)
  641. validator.check_equal_int(len(sum), 1, "sum rank", self.name)
  642. validator.check_equal_int(len(square_sum), 1, "square_sum rank", self.name)
  643. validator.check_equal_int(len(scale), 1, "scale rank", self.name)
  644. validator.check_equal_int(len(b), 1, "b rank", self.name)
  645. validator.check_equal_int(len(mean), 1, "mean rank", self.name)
  646. validator.check_equal_int(len(variance), 1, "variance rank", self.name)
  647. validator.check("sum shape", sum, "x_shape[1]", x[1], Rel.EQ, self.name)
  648. validator.check("square_sum shape", square_sum, "sum", sum, Rel.EQ, self.name)
  649. validator.check("scale shape", scale, "x_shape[1]", x[1], Rel.EQ, self.name)
  650. validator.check("offset shape", b, "x_shape[1]", x[1], Rel.EQ, self.name)
  651. validator.check("mean shape", mean, "x_shape[1]", x[1], Rel.EQ, self.name)
  652. validator.check("variance shape", variance, "x_shape[1]", x[1], Rel.EQ, self.name)
  653. return (x, variance, variance, variance, variance)
  654. def infer_dtype(self, x, sum, square_sum, scale, b, mean, variance):
  655. validator.check_tensor_type_same({"x_type": x}, [mstype.float16, mstype.float32], self.name)
  656. validator.check_tensor_type_same({"sum_type": sum}, [mstype.float16, mstype.float32], self.name)
  657. validator.check_tensor_type_same({"square_sum_type": square_sum}, [mstype.float16, mstype.float32], self.name)
  658. validator.check_tensor_type_same({"scale_type": scale}, [mstype.float16, mstype.float32], self.name)
  659. validator.check_tensor_type_same({"b_type": b}, [mstype.float16, mstype.float32], self.name)
  660. validator.check_tensor_type_same({"mean_type": mean}, [mstype.float16, mstype.float32], self.name)
  661. validator.check_tensor_type_same({"variance_type": variance}, [mstype.float16, mstype.float32], self.name)
  662. return (x, variance, variance, variance, variance)
  663. class BatchNorm(PrimitiveWithInfer):
  664. r"""
  665. Batch Normalization for input data and updated parameters.
  666. Batch Normalization is widely used in convolutional neural networks. This operation
  667. applies Batch Normalization over input to avoid internal covariate shift as described
  668. in the paper `Batch Normalization: Accelerating Deep Network Training by Reducing Internal
  669. Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
  670. features using a mini-batch of data and the learned parameters which can be described
  671. in the following formula,
  672. .. math::
  673. y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
  674. where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon.
  675. Args:
  676. is_training (bool): If `is_training` is True, `mean` and `variance` are computed during training.
  677. If `is_training` is False, they're loaded from checkpoint during inference. Default: False.
  678. epsilon (float): A small value added for numerical stability. Default: 1e-5.
  679. Inputs:
  680. - **input_x** (Tensor) - Tensor of shape :math:`(N, C)`, with float16 or float32 data type.
  681. - **scale** (Tensor) - Tensor of shape :math:`(C,)`, with float16 or float32 data type.
  682. - **bias** (Tensor) - Tensor of shape :math:`(C,)`, has the same data type with `scale`.
  683. - **mean** (Tensor) - Tensor of shape :math:`(C,)`, with float16 or float32 data type.
  684. - **variance** (Tensor) - Tensor of shape :math:`(C,)`, has the same data type with `mean`.
  685. Outputs:
  686. Tuple of 5 Tensor, the normalized inputs and the updated parameters.
  687. - **output_x** (Tensor) - The same type and shape as the input_x. The shape is :math:`(N, C)`.
  688. - **updated_scale** (Tensor) - Tensor of shape :math:`(C,)`.
  689. - **updated_bias** (Tensor) - Tensor of shape :math:`(C,)`.
  690. - **reserve_space_1** (Tensor) - Tensor of shape :math:`(C,)`.
  691. - **reserve_space_2** (Tensor) - Tensor of shape :math:`(C,)`.
  692. Examples:
  693. >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32)
  694. >>> scale = Tensor(np.ones([64]), mindspore.float32)
  695. >>> bias = Tensor(np.ones([64]), mindspore.float32)
  696. >>> mean = Tensor(np.ones([64]), mindspore.float32)
  697. >>> variance = Tensor(np.ones([64]), mindspore.float32)
  698. >>> batch_norm = P.BatchNorm()
  699. >>> output = batch_norm(input_x, scale, bias, mean, variance)
  700. """
  701. @prim_attr_register
  702. def __init__(self, is_training=False, epsilon=1e-5):
  703. validator.check_value_type('is_training', is_training, (bool,), self.name)
  704. validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
  705. self.add_prim_attr('data_format', "NCHW")
  706. self.init_prim_io_names(inputs=['x', 'scale', 'offset', 'mean', 'variance'],
  707. outputs=['y', 'batch_mean', 'batch_variance', 'reserve_space_1', 'reserve_space_2'])
  708. def infer_shape(self, input_x, scale, bias, mean, variance):
  709. validator.check_equal_int(len(scale), 1, "scale rank", self.name)
  710. validator.check("scale shape", scale, "bias shape", bias, Rel.EQ, self.name)
  711. validator.check("scale shape[0]", scale[0], "input_x shape[1]", input_x[1], Rel.EQ, self.name)
  712. if not self.is_training:
  713. validator.check_equal_int(len(mean), 1, "mean rank", self.name)
  714. validator.check("mean shape", mean, "variance shape", variance, Rel.EQ, self.name)
  715. validator.check("mean shape", mean, "scale shape", scale, Rel.EQ, self.name)
  716. return (input_x, scale, scale, scale, scale)
  717. def infer_dtype(self, input_x, scale, bias, mean, variance):
  718. validator.check_tensor_type_same({"input_x": input_x}, [mstype.float16, mstype.float32], self.name)
  719. args = {"scale": scale, "bias": bias}
  720. validator.check_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
  721. args_moving = {"mean": mean, "variance": variance}
  722. if self.is_training:
  723. valid_types = [mstype.tensor_type(mstype.float16), mstype.tensor_type(mstype.float32), None]
  724. validator.check_type_same(args_moving, valid_types, self.name)
  725. else:
  726. args_moving = {"mean": mean, "variance": variance}
  727. validator.check_tensor_type_same(args_moving, [mstype.float16, mstype.float32], self.name)
  728. return (input_x, scale, bias, input_x, input_x)
  729. class Conv2D(PrimitiveWithInfer):
  730. r"""
  731. 2D convolution layer.
  732. Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
  733. where :math:`N` is batch size and :math:`C_{in}` is channel number. For each batch of shape
  734. :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
  735. .. math::
  736. out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
  737. where :math:`ccor` is the cross correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
  738. from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
  739. filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
  740. of kernel and it has shape :math:`(\text{ks_h}, \text{ks_w})`, where :math:`\text{ks_h}` and
  741. :math:`\text{ks_w}` are the height and width of the convolution kernel. The full kernel has shape
  742. :math:`(C_{out}, C_{in} // \text{group}, \text{ks_h}, \text{ks_w})`, where group is the group number
  743. to split the input in the channel dimension.
  744. If the 'pad_mode' is set to be "valid", the output height and width will be
  745. :math:`\left \lfloor{1 + \frac{H_{in} + 2 \times \text{padding} - \text{ks_h} -
  746. (\text{ks_h} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` and
  747. :math:`\left \lfloor{1 + \frac{W_{in} + 2 \times \text{padding} - \text{ks_w} -
  748. (\text{ks_w} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` respectively.
  749. The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
  750. <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. More detailed introduction can be found here:
  751. http://cs231n.github.io/convolutional-networks/.
  752. Args:
  753. out_channel (int): The dimension of the output.
  754. kernel_size (Union[int, tuple[int]]): The kernel size of the 2D convolution.
  755. mode (int): Modes for different convolutions. 0 Math convolutiuon, 1 cross-correlation convolution ,
  756. 2 deconvolution, 3 depthwise convolution. Default: 1.
  757. pad_mode (str): Modes to fill padding. It could be "valid", "same", or "pad". Default: "valid".
  758. pad (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `pad` is an integer, the paddings of
  759. top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers, the
  760. padding of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3] correspondingly.
  761. stride (Union(int, tuple[int])): The stride to be applied to the convolution filter. Default: 1.
  762. dilation (Union(int, tuple[int])): Specifies the space to use between kernel elements. Default: 1.
  763. group (int): Splits input into groups. Default: 1.
  764. Returns:
  765. Tensor, the value that applied 2D convolution.
  766. Inputs:
  767. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  768. - **weight** (Tensor) - Set size of kernel is :math:`(K_1, K_2)`, then the shape is
  769. :math:`(C_{out}, C_{in}, K_1, K_2)`.
  770. Outputs:
  771. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  772. Examples:
  773. >>> input = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
  774. >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
  775. >>> conv2d = P.Conv2D(out_channel=32, kernel_size=3)
  776. >>> conv2d(input, weight)
  777. """
  778. @prim_attr_register
  779. def __init__(self,
  780. out_channel,
  781. kernel_size,
  782. mode=1,
  783. pad_mode="valid",
  784. pad=0,
  785. stride=1,
  786. dilation=1,
  787. group=1):
  788. """Initialize Conv2D"""
  789. self.init_prim_io_names(inputs=['x', 'w'], outputs=['output'])
  790. self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
  791. self.stride = _check_positive_int_or_tuple('stride', stride, self.name, allow_four=True, ret_four=True)
  792. self.add_prim_attr('stride', self.stride)
  793. self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name, allow_four=True, ret_four=True)
  794. self.add_prim_attr('dilation', self.dilation)
  795. validator.check_value_type('pad', pad, (int, tuple), self.name)
  796. if isinstance(pad, int):
  797. pad = (pad,) * 4
  798. else:
  799. validator.check_equal_int(len(pad), 4, 'pad size', self.name)
  800. self.padding = pad
  801. self.pad_mode = validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.name)
  802. if pad_mode != 'pad' and pad != (0, 0, 0, 0):
  803. raise ValueError(f"For '{self.name}', padding must be zero when pad_mode is '{pad_mode}'.")
  804. if self.pad_mode == 'pad':
  805. for item in pad:
  806. validator.check_non_negative_int(item, 'pad item', self.name)
  807. self.mode = validator.check_equal_int(mode, 1, 'mode', self.name)
  808. self.add_prim_attr('data_format', "NCHW")
  809. self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
  810. self.group = validator.check_positive_int(group, 'group', self.name)
  811. self.add_prim_attr('offset_a', 0)
  812. def infer_shape(self, x_shape, w_shape, b_shape=None):
  813. validator.check_equal_int(len(w_shape), 4, "weight rank", self.name)
  814. validator.check_equal_int(len(x_shape), 4, "x rank", self.name)
  815. validator.check(f"x_shape[1] / group", x_shape[1] // self.group, "w_shape[1]", w_shape[1], Rel.EQ, self.name)
  816. validator.check('out_channel', self.out_channel, 'w_shape[0]', w_shape[0], Rel.EQ, self.name)
  817. validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), Rel.EQ, self.name)
  818. kernel_size_h = w_shape[2]
  819. kernel_size_w = w_shape[3]
  820. stride_h = self.stride[2]
  821. stride_w = self.stride[3]
  822. dilation_h = self.dilation[2]
  823. dilation_w = self.dilation[3]
  824. if self.pad_mode == "valid":
  825. h_out = math.ceil((x_shape[2] - dilation_h * (kernel_size_h - 1)) / stride_h)
  826. w_out = math.ceil((x_shape[3] - dilation_w * (kernel_size_w - 1)) / stride_w)
  827. pad_top, pad_bottom, pad_left, pad_right = 0, 0, 0, 0
  828. elif self.pad_mode == "same":
  829. h_out = math.ceil(x_shape[2] / stride_h)
  830. w_out = math.ceil(x_shape[3] / stride_w)
  831. pad_needed_h = max(0, (h_out - 1) * stride_h + dilation_h * (kernel_size_h - 1) + 1 - x_shape[2])
  832. pad_top = math.floor(pad_needed_h / 2)
  833. pad_bottom = pad_needed_h - pad_top
  834. pad_needed_w = max(0, (w_out - 1) * stride_w + dilation_w * (kernel_size_w - 1) + 1 - x_shape[3])
  835. pad_left = math.floor(pad_needed_w / 2)
  836. pad_right = pad_needed_w - pad_left
  837. elif self.pad_mode == 'pad':
  838. pad_top, pad_bottom, pad_left, pad_right = self.padding
  839. h_out = 1 + (x_shape[2] + pad_top + pad_bottom - kernel_size_h - (kernel_size_h - 1) * (dilation_h - 1)) \
  840. / stride_h
  841. w_out = 1 + (x_shape[3] + pad_left + pad_right - kernel_size_w - (kernel_size_w - 1) * (dilation_w - 1)) \
  842. / stride_w
  843. h_out = math.floor(h_out)
  844. w_out = math.floor(w_out)
  845. self.pad_list = [pad_top, pad_bottom, pad_left, pad_right]
  846. self.add_prim_attr('pad_list', (pad_top, pad_bottom, pad_left, pad_right))
  847. out_channel = self.out_channel
  848. out_shape = [x_shape[0], out_channel, h_out, w_out]
  849. _check_shape('output', out_shape, self.name)
  850. return out_shape
  851. def infer_dtype(self, x_dtype, w_dtype, b_dtype=None):
  852. args = {'x': x_dtype, 'w': w_dtype}
  853. valid_types = [mstype.int8, mstype.int32, mstype.float16, mstype.float32]
  854. validator.check_tensor_type_same(args, valid_types, self.name)
  855. if x_dtype.element_type() == mstype.int8:
  856. return mstype.tensor_type(mstype.int32)
  857. return x_dtype
  858. class DepthwiseConv2dNative(PrimitiveWithInfer):
  859. r"""
  860. Returns the depth-wise convolution value for the input.
  861. Applies depthwise conv2d for the input, which will generate more channels with channel_multiplier.
  862. Given an input tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` where :math:`N` is the batch size and a
  863. filter tensor with kernel size :math:`(ks_{h}, ks_{w})`, containing :math:`C_{in} * \text{channel_multiplier}`
  864. convolutional filters of depth 1; it applies different filters to each input channel (channel_multiplier channels
  865. for each input channel has the default value 1), then concatenates the results together. The output has
  866. :math:`\text{in_channels} * \text{channel_multiplier}` channels.
  867. Args:
  868. channel_multiplier (int): The multipiler for the original output convolution. Its value must be greater than 0.
  869. kernel_size (Union[int, tuple[int]]): The size of the convolution kernel.
  870. mode (int): Modes for different convolutions. 0 Math convolution, 1 cross-correlation convolution ,
  871. 2 deconvolution, 3 depthwise convolution. Default: 3.
  872. pad_mode (str): Modes to fill padding. It could be "valid", "same", or "pad". Default: "valid".
  873. pad (Union[int, tuple[int]]): The pad value to be filled. If `pad` is an integer, the paddings of
  874. top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers, the padding
  875. of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3] correspondingly. Default: 0.
  876. stride (Union[int, tuple[int]]): The stride to be applied to the convolution filter. Default: 1.
  877. dilation (Union[int, tuple[int]]): Specifies the dilation rate to be used for the dilated convolution.
  878. Default: 1.
  879. group (int): Splits input into groups. Default: 1.
  880. Inputs:
  881. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  882. - **weight** (Tensor) - Set the size of kernel as :math:`(K_1, K_2)`, then the shape is
  883. :math:`(K, C_{in}, K_1, K_2)`, `K` must be 1.
  884. Outputs:
  885. Tensor of shape :math:`(N, C_{in} * \text{channel_multiplier}, H_{out}, W_{out})`.
  886. Examples:
  887. >>> input = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
  888. >>> weight = Tensor(np.ones([1, 32, 3, 3]), mindspore.float32)
  889. >>> depthwise_conv2d = P.DepthwiseConv2dNative(channel_multiplier = 3, kernel_size = (3, 3))
  890. >>> output = depthwise_conv2d(input, weight)
  891. >>> output.shape == (10, 96, 30, 30)
  892. """
  893. @prim_attr_register
  894. def __init__(self,
  895. channel_multiplier,
  896. kernel_size,
  897. mode=3,
  898. pad_mode="valid",
  899. pad=0,
  900. stride=1,
  901. dilation=1,
  902. group=1):
  903. """Initialize DepthwiseConv2dNative"""
  904. self.init_prim_io_names(inputs=['x', 'w'], outputs=['output'])
  905. self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
  906. self.stride = _check_positive_int_or_tuple('stride', stride, self.name)
  907. if self.stride[0] != self.stride[1]:
  908. raise ValueError("The height and width of stride should be equal,"
  909. f"but got height:{self.stride[0]}, width:{self.stride[1]}")
  910. self.add_prim_attr('stride', (1, 1, self.stride[0], self.stride[1]))
  911. self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name)
  912. if self.dilation[0] != self.dilation[1]:
  913. raise ValueError("The height and width of dilation should be equal,"
  914. f"but got height:{self.dilation[0]}, width:{self.dilation[1]}")
  915. self.add_prim_attr('dilation', (1, 1, self.dilation[0], self.dilation[1]))
  916. validator.check_value_type('pad', pad, (int, tuple), self.name)
  917. if isinstance(pad, int):
  918. pad = (pad,) * 4
  919. else:
  920. validator.check_equal_int(len(pad), 4, 'pad size', self.name)
  921. self.padding = pad
  922. self.pad_mode = validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.name)
  923. if pad_mode != 'pad' and pad != (0, 0, 0, 0):
  924. raise ValueError(f"For '{self.name}', padding must be zero when pad_mode is '{pad_mode}'.")
  925. if self.pad_mode == 'pad':
  926. for item in pad:
  927. validator.check_non_negative_int(item, 'pad item', self.name)
  928. self.mode = validator.check_equal_int(mode, 3, "mode", self.name)
  929. self.add_prim_attr('data_format', "NCHW")
  930. self.channel_multiplier = validator.check_positive_int(channel_multiplier, "channel_multiplier", self.name)
  931. self.group = validator.check_positive_int(group, "group", self.name)
  932. self.add_prim_attr('offset_a', 0)
  933. def infer_shape(self, x_shape, w_shape, b_shape=None):
  934. validator.check_equal_int(len(w_shape), 4, "weight rank", self.name)
  935. validator.check_equal_int(len(x_shape), 4, "x rank", self.name)
  936. validator.check("x_shape[1]", x_shape[1], "w_shape[1]", w_shape[1], Rel.EQ, self.name)
  937. validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), Rel.EQ, self.name)
  938. kernel_size_n, _, kernel_size_h, kernel_size_w = w_shape
  939. _, _, stride_h, stride_w = self.stride
  940. _, _, dilation_h, dilation_w = self.dilation
  941. if kernel_size_n != 1:
  942. raise ValueError(f"The batch of input weight should be 1, but got {kernel_size_n}")
  943. if self.pad_mode == "valid":
  944. h_out = math.ceil((x_shape[2] - dilation_h * (kernel_size_h - 1)) / stride_h)
  945. w_out = math.ceil((x_shape[3] - dilation_w * (kernel_size_w - 1)) / stride_w)
  946. pad_top, pad_bottom, pad_left, pad_right = 0, 0, 0, 0
  947. elif self.pad_mode == "same":
  948. h_out = math.ceil(x_shape[2] / stride_h)
  949. w_out = math.ceil(x_shape[3] / stride_w)
  950. pad_needed_h = max(0, (h_out - 1) * stride_h + dilation_h * (kernel_size_h - 1) + 1 - x_shape[2])
  951. pad_top = math.floor(pad_needed_h / 2)
  952. pad_bottom = pad_needed_h - pad_top
  953. pad_needed_w = max(0, (w_out - 1) * stride_w + dilation_w * (kernel_size_w - 1) + 1 - x_shape[3])
  954. pad_left = math.floor(pad_needed_w / 2)
  955. pad_right = pad_needed_w - pad_left
  956. elif self.pad_mode == 'pad':
  957. pad_top, pad_bottom, pad_left, pad_right = self.padding
  958. h_out = 1 + (x_shape[2] + pad_top + pad_bottom - kernel_size_h - (kernel_size_h - 1) * (dilation_h - 1)) \
  959. / stride_h
  960. w_out = 1 + (x_shape[3] + pad_left + pad_right - kernel_size_w - (kernel_size_w - 1) * (dilation_w - 1)) \
  961. / stride_w
  962. h_out = math.floor(h_out)
  963. w_out = math.floor(w_out)
  964. self.pad_list = (pad_top, pad_bottom, pad_left, pad_right)
  965. self.add_prim_attr('pads', self.pad_list)
  966. out_channel = self.channel_multiplier * x_shape[1]
  967. out_shape = [x_shape[0], out_channel, h_out, w_out]
  968. return out_shape
  969. def infer_dtype(self, x_dtype, w_dtype, b_dtype=None):
  970. args = {'x': x_dtype, 'w': w_dtype}
  971. validator.check_tensor_type_same(args, mstype.number_type, self.name)
  972. if x_dtype.element_type() == mstype.int8:
  973. return mstype.tensor_type(mstype.int32)
  974. return x_dtype
  975. class _Pool(PrimitiveWithInfer):
  976. r"""
  977. Performs max/avg pooling operation.
  978. Args:
  979. ksize (Union[int, tuple[int]]): The size of the kernel, that must be a tuple
  980. of two `int` for height and width. Default: 1.
  981. strides (Union[int, tuple[int]]): The stride of the window, that must be
  982. a tuple of two `int` for height and width. Default: 1.
  983. padding (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
  984. Default: "valid".
  985. """
  986. @prim_attr_register
  987. def __init__(self, ksize=1, strides=1, padding="valid"):
  988. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  989. validator.check_value_type('ksize', ksize, [int, tuple], self.name)
  990. validator.check_value_type('strides', strides, [int, tuple], self.name)
  991. self.padding = validator.check_string(padding.upper(), ['VALID', 'SAME'], 'padding', self.name)
  992. self.add_prim_attr("padding", self.padding)
  993. self.is_maxpoolwithargmax = (self.name == "MaxPoolWithArgmax")
  994. if not self.is_maxpoolwithargmax:
  995. self.add_prim_attr('data_format', "NCHW")
  996. self.ksize = _check_positive_int_or_tuple("ksize", ksize, self.name, allow_four=False, ret_four=True)
  997. if self.is_maxpoolwithargmax:
  998. self.ksize = (1, self.ksize[-2], self.ksize[-1], 1)
  999. self.add_prim_attr("ksize", self.ksize)
  1000. self.strides = _check_positive_int_or_tuple("strides", strides, self.name, allow_four=False, ret_four=True)
  1001. if self.is_maxpoolwithargmax:
  1002. self.strides = (1, self.strides[-2], self.strides[-1], 1)
  1003. self.add_prim_attr("strides", self.strides)
  1004. def infer_shape(self, x_shape):
  1005. validator.check_equal_int(len(x_shape), 4, "x rank", self.name)
  1006. batch, channel, input_h, input_w = x_shape
  1007. if self.is_maxpoolwithargmax:
  1008. _, kernel_h, kernel_w, _ = self.ksize
  1009. _, stride_h, stride_w, _ = self.strides
  1010. else:
  1011. _, _, kernel_h, kernel_w = self.ksize
  1012. _, _, stride_h, stride_w = self.strides
  1013. if self.padding == "VALID":
  1014. out_h = math.ceil((input_h - (kernel_h - 1)) / stride_h)
  1015. out_w = math.ceil((input_w - (kernel_w - 1)) / stride_w)
  1016. elif self.padding == "SAME":
  1017. out_h = math.ceil(input_h / stride_h)
  1018. out_w = math.ceil(input_w / stride_w)
  1019. out_shape = [batch, channel, out_h, out_w]
  1020. for shape_value in out_shape:
  1021. if shape_value <= 0:
  1022. raise ValueError(f"For '{self.name}' The kernel size is not valid, "
  1023. f"please check it if is larger than data's shape size.")
  1024. return out_shape
  1025. def infer_dtype(self, x_dtype):
  1026. validator.check_subclass("input", x_dtype, mstype.tensor, self.name)
  1027. return x_dtype
  1028. class MaxPool(_Pool):
  1029. r"""
  1030. Max pooling operation.
  1031. Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes.
  1032. Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs
  1033. regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
  1034. :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows.
  1035. .. math::
  1036. \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
  1037. \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
  1038. Args:
  1039. ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
  1040. is an int number that represents height and width are both ksize, or a tuple
  1041. of two int numbers that represent height and width respectively. Default: 1.
  1042. strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  1043. the height and width of movement are both strides, or a tuple of two int numbers that
  1044. represent height and width of movement respectively. Default: 1.
  1045. padding (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
  1046. Default: "valid".
  1047. - same: Adopts the way of completion. The height and width of the output will be the same as
  1048. the input. The total number of padding will be calculated in horizontal and vertical
  1049. directions and evenly distributed to top and bottom, left and right if possible.
  1050. Otherwise, the last extra padding will be done from the bottom and the right side.
  1051. - valid: Adopts the way of discarding. The possible largest height and width of output
  1052. will be returned without padding. Extra pixels will be discarded.
  1053. Inputs:
  1054. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  1055. Outputs:
  1056. Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  1057. Examples:
  1058. >>> input_tensor = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
  1059. >>> maxpool_op = P.MaxPool(padding="VALID", ksize=2, strides=1)
  1060. >>> output_tensor = maxpool_op(input_tensor)
  1061. """
  1062. @prim_attr_register
  1063. def __init__(self, ksize=1, strides=1, padding="valid"):
  1064. super(MaxPool, self).__init__(ksize, strides, padding)
  1065. class MaxPoolWithArgmax(_Pool):
  1066. r"""
  1067. Perform max pooling on the input Tensor and return both max values and indices.
  1068. Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs
  1069. regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
  1070. :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows.
  1071. .. math::
  1072. \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
  1073. \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
  1074. Args:
  1075. ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg value,
  1076. is an int number that represents height and width are both ksize, or a tuple of
  1077. two int numbers that represent height and width respectively. Default: 1.
  1078. strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  1079. the height and width of movement are both strides, or a tuple of two int numbers that
  1080. represent height and width of movement respectively. Default: 1.
  1081. padding (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
  1082. Default: "valid".
  1083. - same: Adopts the way of completion. The height and width of the output will be the same as
  1084. the input. The total number of padding will be calculated in horizontal and vertical
  1085. directions and evenly distributed to top and bottom, left and right if possible.
  1086. Otherwise, the last extra padding will be done from the bottom and the right side.
  1087. - valid: Adopts the way of discarding. The possible largest height and width of output
  1088. will be returned without padding. Extra pixels will be discarded.
  1089. Inputs:
  1090. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  1091. Data type must be float16 or float32.
  1092. Outputs:
  1093. Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
  1094. - **output** (Tensor) - Maxpooling result, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  1095. - **mask** (Tensor) - Max values' index represented by the mask.
  1096. Examples:
  1097. >>> input_tensor = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
  1098. >>> maxpool_arg_op = P.MaxPoolWithArgmax(padding="VALID", ksize=2, strides=1)
  1099. >>> output_tensor, argmax = maxpool_arg_op(input_tensor)
  1100. """
  1101. def __init__(self, ksize=1, strides=1, padding="valid"):
  1102. super(MaxPoolWithArgmax, self).__init__(ksize, strides, padding)
  1103. self.is_tbe = context.get_context("device_target") == "Ascend"
  1104. self.is_gpu = context.get_context("device_target") == "GPU"
  1105. def infer_shape(self, x_shape):
  1106. out_shape = _Pool.infer_shape(self, x_shape)
  1107. _, _, out_h, out_w = out_shape
  1108. _, kernel_h, kernel_w, _ = self.ksize
  1109. argmax_shape = []
  1110. if self.is_tbe:
  1111. for i in range(4):
  1112. if i == 2:
  1113. dim = kernel_h * kernel_w
  1114. argmax_shape.append(dim)
  1115. elif i == 3:
  1116. dim = math.ceil(out_h * out_w / 16) + 1
  1117. argmax_shape.append(dim)
  1118. else:
  1119. argmax_shape.append(x_shape[i])
  1120. else:
  1121. argmax_shape = out_shape
  1122. return out_shape, argmax_shape
  1123. def infer_dtype(self, x_dtype):
  1124. out_dtype = x_dtype
  1125. validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32), self.name)
  1126. argmax_dtype = mstype.uint16
  1127. if self.is_gpu:
  1128. argmax_dtype = mstype.int32
  1129. return out_dtype, argmax_dtype
  1130. class AvgPool(_Pool):
  1131. r"""
  1132. Average pooling operation.
  1133. Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes.
  1134. Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, AvgPool2d outputs
  1135. regional average in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
  1136. :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows.
  1137. .. math::
  1138. \text{output}(N_i, C_j, h, w) = \frac{1}{h_{ker} * w_{ker}} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1}
  1139. \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
  1140. Args:
  1141. ksize (Union[int, tuple[int]]): The size of kernel used to take the average value,
  1142. is an int number that represents height and width are both ksize, or a tuple
  1143. of two int numbers that represent height and width respectively. Default: 1.
  1144. strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  1145. the height and width of movement are both strides, or a tuple of two int numbers that
  1146. represent height and width of movement respectively. Default: 1.
  1147. padding (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
  1148. Default: "valid".
  1149. - same: Adopts the way of completion. The height and width of the output will be the same as
  1150. the input. The total number of padding will be calculated in horizontal and vertical
  1151. directions and evenly distributed to top and bottom, left and right if possible.
  1152. Otherwise, the last extra padding will be done from the bottom and the right side.
  1153. - valid: Adopts the way of discarding. The possible largest height and width of output
  1154. will be returned without padding. Extra pixels will be discarded.
  1155. Inputs:
  1156. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  1157. Outputs:
  1158. Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  1159. Examples:
  1160. >>> import mindspore
  1161. >>> import mindspore.nn as nn
  1162. >>> import numpy as np
  1163. >>> from mindspore import Tensor
  1164. >>> from mindspore.ops import operations as P
  1165. >>> class Net(nn.Cell):
  1166. >>> def __init__(self):
  1167. >>> super(Net, self).__init__()
  1168. >>> self.avgpool_op = P.AvgPool(padding="VALID", ksize=2, strides=1)
  1169. >>>
  1170. >>> def construct(self, x):
  1171. >>> result = self.avgpool_op(x)
  1172. >>> return result
  1173. >>>
  1174. >>> input_x = Tensor(np.arange(1 * 3 * 3 * 4).reshape(1, 3, 3, 4), mindspore.float32)
  1175. >>> net = Net()
  1176. >>> result = net(input_x)
  1177. [[[[ 2.5 3.5 4.5]
  1178. [ 6.5 7.5 8.5]]
  1179. [[ 14.5 15.5 16.5]
  1180. [ 18.5 19.5 20.5]]
  1181. [[ 26.5 27.5 28.5]
  1182. [ 30.5 31.5 32.5]]]]
  1183. """
  1184. @prim_attr_register
  1185. def __init__(self, ksize=1, strides=1, padding="valid"):
  1186. if context.get_context("device_target") == "GPU":
  1187. self.target = "GPU"
  1188. elif context.get_context("enable_ge"):
  1189. self.target = "GE"
  1190. else:
  1191. self.target = "OTHER"
  1192. super(AvgPool, self).__init__(ksize, strides, padding)
  1193. class Conv2DBackpropInput(PrimitiveWithInfer):
  1194. """
  1195. Computes the gradients of convolution with respect to the input.
  1196. Args:
  1197. out_channel (int): The dimensionality of the output space.
  1198. kernel_size (Union[int, tuple[int]]): The size of the convolution window.
  1199. pad_mode (str): Modes to fill padding. It could be "valid", "same", or "pad". Default: "valid".
  1200. pad (Union[int, tuple[int]]): The pad value to be filled. Default: 0. If `pad` is an integer, the paddings of
  1201. top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers, the
  1202. padding of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3] correspondingly.
  1203. mode (int): Modes for different convolutions. 0 Math convolutiuon, 1 cross-correlation convolution ,
  1204. 2 deconvolution, 3 depthwise convolution. Default: 1.
  1205. stride (Union[int. tuple[int]]): The stride to be applied to the convolution filter. Default: 1.
  1206. dilation (Union[int. tuple[int]]): Specifies the dilation rate to be used for the dilated convolution.
  1207. Default: 1.
  1208. group (int): Splits input into groups. Default: 1.
  1209. Returns:
  1210. Tensor, the gradients of convolution.
  1211. Examples:
  1212. >>> dout = Tensor(np.ones([10, 32, 30, 30]), mindspore.float32)
  1213. >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
  1214. >>> x = Tensor(np.ones([10, 32, 32, 32]))
  1215. >>> conv2d_backprop_input = P.Conv2DBackpropInput(out_channel=32, kernel_size=3)
  1216. >>> conv2d_backprop_input(dout, weight, F.shape(x))
  1217. """
  1218. @prim_attr_register
  1219. def __init__(self,
  1220. out_channel,
  1221. kernel_size,
  1222. pad_mode="valid",
  1223. pad=0,
  1224. pad_list=None,
  1225. mode=1,
  1226. stride=1,
  1227. dilation=1,
  1228. group=1):
  1229. """Initialize Conv2DBackpropInput"""
  1230. self.init_prim_io_names(inputs=['out_backprop', 'filter', 'input_sizes'], outputs=['output'])
  1231. self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
  1232. self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
  1233. self.stride = _check_positive_int_or_tuple('stride', stride, self.name, allow_four=True, ret_four=False)
  1234. self.add_prim_attr('stride', self.stride)
  1235. self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name, allow_four=True, ret_four=True)
  1236. self.add_prim_attr('dilation', self.dilation)
  1237. validator.check_value_type('pad', pad, (int, tuple), self.name)
  1238. if isinstance(pad, int):
  1239. pad = (pad,) * 4
  1240. else:
  1241. validator.check_equal_int(len(pad), 4, 'pad size', self.name)
  1242. self.padding = pad
  1243. self.pad_mode = validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.name)
  1244. if pad_mode != 'pad' and pad != (0, 0, 0, 0):
  1245. raise ValueError(f"For '{self.name}', padding must be zero when pad_mode is '{pad_mode}'.")
  1246. if self.pad_mode == 'pad':
  1247. for item in pad:
  1248. validator.check_non_negative_int(item, 'pad item', self.name)
  1249. pad_mode = pad_mode.upper()
  1250. self.add_prim_attr('pad_mode', pad_mode)
  1251. self.mode = validator.check_equal_int(mode, 1, 'mode', self.name)
  1252. self.group = validator.check_positive_int(group, 'group', self.name)
  1253. self.add_prim_attr('data_format', "NCHW")
  1254. if pad_list:
  1255. for x in pad_list:
  1256. validator.check_non_negative_int(x, 'element of pad_list', self.name)
  1257. self.pad_list = pad_list
  1258. def __infer__(self, doutput, w, x_size):
  1259. x_size_v = x_size['value']
  1260. validator.check_value_type('x_size', x_size_v, [tuple], self.name)
  1261. for i, dim_len in enumerate(x_size_v):
  1262. validator.check_value_type("x_size[%d]" % i, dim_len, [int], self.name)
  1263. args = {'doutput': doutput['dtype'], 'w': w['dtype']}
  1264. valid_types = [mstype.int8, mstype.int32, mstype.float16, mstype.float32]
  1265. validator.check_tensor_type_same(args, valid_types, self.name)
  1266. # infer shape
  1267. dout_shape = doutput['shape']
  1268. kernel_h = self.kernel_size[0]
  1269. kernel_w = self.kernel_size[1]
  1270. stride_h = self.stride[0]
  1271. stride_w = self.stride[1]
  1272. dilation_h = self.dilation[2]
  1273. dilation_w = self.dilation[3]
  1274. # default pad mode is valid
  1275. pad_list = (0, 0, 0, 0)
  1276. if self.pad_list:
  1277. pad_list = tuple(self.pad_list)
  1278. elif self.pad_mode == "SAME":
  1279. pad_needed_h = max(0, (dout_shape[2] - 1) * stride_h + dilation_h * (kernel_h - 1) + 1 - x_size_v[2])
  1280. pad_top = math.floor(pad_needed_h / 2)
  1281. pad_bottom = pad_needed_h - pad_top
  1282. pad_needed_w = max(0, (dout_shape[3] - 1) * stride_w + dilation_w * (kernel_w - 1) + 1 - x_size_v[3])
  1283. pad_left = math.floor(pad_needed_w / 2)
  1284. pad_right = pad_needed_w - pad_left
  1285. pad_list = (pad_top, pad_bottom, pad_left, pad_right)
  1286. elif self.pad_mode == 'PAD':
  1287. pad_list = self.padding
  1288. self.add_prim_attr('pad_list', pad_list)
  1289. out = {
  1290. 'value': None,
  1291. 'shape': x_size_v,
  1292. 'dtype': doutput['dtype'],
  1293. }
  1294. return out
  1295. class BiasAdd(PrimitiveWithInfer):
  1296. r"""
  1297. Returns sum of input and bias tensor.
  1298. Adds the 1-D bias tensor to the input tensor, and broadcasts the shape on all axis
  1299. except for the channel axis.
  1300. Inputs:
  1301. - **input_x** (Tensor) - The input tensor. The shape can be 2-4 dimensions.
  1302. - **bias** (Tensor) - The bias tensor, with shape :math:`(C)`.
  1303. The shape of `bias` must be the same as `input_x` in the second dimension.
  1304. Outputs:
  1305. Tensor, with the same shape and type as `input_x`.
  1306. Examples:
  1307. >>> input_x = Tensor(np.arange(6).reshape((2, 3)), mindspore.float32)
  1308. >>> bias = Tensor(np.random.random(3).reshape((3,)), mindspore.float32)
  1309. >>> bias_add = P.BiasAdd()
  1310. >>> bias_add(input_x, bias)
  1311. """
  1312. @prim_attr_register
  1313. def __init__(self):
  1314. self.init_prim_io_names(inputs=['x', 'b'], outputs=['output'])
  1315. self.add_prim_attr('data_format', 'NCHW')
  1316. def infer_shape(self, x_shape, b_shape):
  1317. validator.check_int(len(x_shape), 2, Rel.GE, "x rank", self.name)
  1318. validator.check_equal_int(len(b_shape), 1, "bias rank", self.name)
  1319. validator.check("b_shape[0]", b_shape[0], "x_shape[1]", x_shape[1], Rel.EQ, self.name)
  1320. return x_shape
  1321. def infer_dtype(self, x_type, b_type):
  1322. args = {"input_x": x_type, "bias": b_type}
  1323. validator.check_tensor_type_same(args, mstype.number_type, self.name)
  1324. return x_type
  1325. class TopK(PrimitiveWithInfer):
  1326. """
  1327. Finds values and indices of the `k` largest entries along the last dimension.
  1328. Args:
  1329. sorted (bool): If true, the obtained elements will
  1330. be sorted by the values in descending order. Default: False.
  1331. Inputs:
  1332. - **input_x** (Tensor) - Input to be computed, data type must be float16, float32 or int32.
  1333. - **k** (int) - The number of top elements to be computed along the last dimension, constant input is needed.
  1334. Outputs:
  1335. Tuple of 2 tensors, the values and the indices.
  1336. - **values** (Tensor) - The `k` largest elements in each slice of the last dimensional.
  1337. - **indices** (Tensor) - The indices of values within the last dimension of input.
  1338. Examples:
  1339. >>> topk = P.TopK(sorted=True)
  1340. >>> input_x = Tensor([1, 2, 3, 4, 5], mindspore.float16)
  1341. >>> k = 3
  1342. >>> values, indices = topk(input_x, k)
  1343. >>> assert values == Tensor(np.array([5, 4, 3]), mstype.float16)
  1344. >>> assert indices == Tensor(np.array([4, 3, 2]), mstype.int32)
  1345. """
  1346. @prim_attr_register
  1347. def __init__(self, sorted=False):
  1348. validator.check_value_type("sorted", sorted, [bool], self.name)
  1349. self.init_prim_io_names(inputs=['input', 'k'],
  1350. outputs=['values', 'indices'])
  1351. def __infer__(self, input_x, k):
  1352. x_dtype = input_x['dtype']
  1353. valid_types = (mstype.int32, mstype.float16, mstype.float32)
  1354. validator.check_tensor_type_same({'x': x_dtype}, valid_types, self.name)
  1355. k_v = k['value']
  1356. validator.check_value_type('k', k_v, (int,), self.name)
  1357. x_shape = list(input_x['shape'])
  1358. ndim = len(x_shape) - 1
  1359. x_shape[ndim] = k_v
  1360. return {'shape': (x_shape, x_shape),
  1361. 'dtype': (x_dtype, mstype.int32),
  1362. 'value': None}
  1363. class SoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
  1364. r"""
  1365. Gets the softmax cross-entropy value between logits and labels with one-hot encoding.
  1366. Note:
  1367. Sets input logits as `X`, input label as `Y`, output as `loss`. Then,
  1368. .. math::
  1369. p_{ij} = softmax(X_{ij}) = \frac{exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)}
  1370. .. math::
  1371. loss_{ij} = -\sum_j{Y_{ij} * ln(p_{ij})}
  1372. Inputs:
  1373. - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
  1374. - **labels** (Tensor) - Ground truth labels, with shape :math:`(N, C)`, has the same data type with `logits`.
  1375. Outputs:
  1376. Tuple of 2 tensors, the `loss` shape is `(N,)`, and the `dlogits` with the same shape as `logits`.
  1377. Examples:
  1378. >>> logits = Tensor([[2, 4, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32)
  1379. >>> labels = Tensor([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0]], mindspore.float32)
  1380. >>> softmax_cross = P.SoftmaxCrossEntropyWithLogits()
  1381. >>> loss, backprop = softmax_cross(logits, labels)
  1382. ([0.5899297, 0.52374405], [[0.02760027, 0.20393994, 0.01015357, 0.20393994, -0.44563377],
  1383. [0.08015892, 0.02948882, 0.08015892, -0.4077012, 0.21789455]])
  1384. """
  1385. @prim_attr_register
  1386. def __init__(self):
  1387. pass
  1388. def infer_shape(self, logits_shape, labels_shape):
  1389. validator.check("logits_shape", logits_shape, "labels_shape", labels_shape, Rel.EQ, self.name)
  1390. loss_shape = [logits_shape[0]]
  1391. dlogits_shape = logits_shape
  1392. return (loss_shape, dlogits_shape)
  1393. def infer_dtype(self, logits_type, labels_type):
  1394. args = {"logits": logits_type, "labels": labels_type}
  1395. validator.check_tensor_type_same(args, (mstype.float16, mstype.float32), self.name)
  1396. return (logits_type, logits_type)
  1397. class SparseSoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
  1398. r"""
  1399. Computes the softmax cross-entropy value between logits and sparse encoding labels.
  1400. Note:
  1401. Sets input logits as `X`, input label as `Y`, output as `loss`. Then,
  1402. .. math::
  1403. p_{ij} = softmax(X_{ij}) = \frac{exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)}
  1404. .. math::
  1405. loss_{ij} = \begin{cases} -ln(p_{ij}), &j = y_i \cr -ln(1 - p_{ij}), & j \neq y_i \end{cases}
  1406. .. math::
  1407. loss = \sum_{ij} loss_{ij}
  1408. Args:
  1409. is_grad (bool): If true, this operation returns the computed gradient. Default: False.
  1410. Inputs:
  1411. - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
  1412. - **labels** (Tensor) - Ground truth labels, with shape :math:`(N)`.
  1413. Data type must be int32 or int64.
  1414. Outputs:
  1415. Tensor, if `is_grad` is False, the output tensor is the value of loss which is a scalar tensor;
  1416. if `is_grad` is True, the output tensor is the gradient of input with the same shape as `logits`.
  1417. Examples:
  1418. Please refer to the usage in nn.SoftmaxCrossEntropyWithLogits source code.
  1419. """
  1420. @prim_attr_register
  1421. def __init__(self, is_grad=False):
  1422. validator.check_value_type('is_grad', is_grad, [bool], self.name)
  1423. self.init_prim_io_names(inputs=['features', 'labels'], outputs=['output'])
  1424. self.is_grad = is_grad
  1425. self.add_prim_attr('sens', 1.0)
  1426. def infer_shape(self, logits_shape, labels_shape):
  1427. validator.check("logits_shape[0]", logits_shape[0], "labels_shape[0]", labels_shape[0], Rel.EQ, self.name)
  1428. loss_shape = []
  1429. if self.is_grad:
  1430. return logits_shape
  1431. return loss_shape
  1432. def infer_dtype(self, logits_type, labels_type):
  1433. validator.check_tensor_type_same({"logits": logits_type}, (mstype.float16, mstype.float32), self.name)
  1434. validator.check_tensor_type_same({"labels": labels_type}, (mstype.int32, mstype.int64), self.name)
  1435. return logits_type
  1436. class ApplyMomentum(PrimitiveWithInfer):
  1437. """
  1438. Optimizer that implements the Momentum algorithm.
  1439. Refer to the paper `On the importance of initialization and momentum in deep
  1440. learning <https://dl.acm.org/doi/10.5555/3042817.3043064>`_ for more details.
  1441. Inputs of `variable`, `accumulation` and `gradient` comply with the implicit type conversion rules
  1442. to make the data types consistent.
  1443. If they have different data types, lower priority data type will be converted to
  1444. relatively highest priority data type.
  1445. Data type conversion of Parameter is not supported. RuntimeError exception will be thrown.
  1446. Args:
  1447. use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors
  1448. from being updated. Default: False.
  1449. use_nesterov (bool): Enable Nesterov momentum. Default: False.
  1450. gradient_scale (float): The scale of the gradient. Default: 1.0.
  1451. Inputs:
  1452. - **variable** (Parameter) - Weights to be updated. data type must be float.
  1453. - **accumulation** (Parameter) - Accumulated gradient value by moment weight.
  1454. Has the same data type with `variable`.
  1455. - **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float number or
  1456. a scalar tensor with float data type.
  1457. - **gradient** (Tensor) - Gradient, has the same data type as `variable`.
  1458. - **momentum** (Union[Number, Tensor]) - Momentum, must be a float number or
  1459. a scalar tensor with float data type.
  1460. Outputs:
  1461. Tensor, parameters to be updated.
  1462. Examples:
  1463. Please refer to the usage in nn.ApplyMomentum.
  1464. """
  1465. __mindspore_signature__ = (
  1466. sig.make_sig('variable', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  1467. sig.make_sig('accumulation', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  1468. sig.make_sig('learning_rate', dtype=sig.sig_dtype.T1),
  1469. sig.make_sig('gradient', dtype=sig.sig_dtype.T),
  1470. sig.make_sig('momentum', dtype=sig.sig_dtype.T2),
  1471. )
  1472. @prim_attr_register
  1473. def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0):
  1474. self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'],
  1475. outputs=['output'])
  1476. self.is_tbe = context.get_context("device_target") == "Ascend"
  1477. self.is_ge = context.get_context("enable_ge")
  1478. def infer_shape(self, v_shape, a_shape, l_shape, g_shape, m_shape):
  1479. if not self.is_ge and self.is_tbe:
  1480. return v_shape, v_shape
  1481. return v_shape
  1482. def infer_dtype(self, v_dtype, a_dtype, l_dtype, g_dtype, m_dtype):
  1483. valid_types = [mstype.float16, mstype.float32, mstype.float64]
  1484. if v_dtype != mstype.type_refkey and a_dtype != mstype.type_refkey:
  1485. validator.check_tensor_type_same({"v": v_dtype}, valid_types, self.name)
  1486. validator.check_tensor_type_same({"a": a_dtype}, valid_types, self.name)
  1487. validator.check_scalar_or_tensor_type_same({"l_dtype": l_dtype}, valid_types, self.name)
  1488. validator.check_scalar_or_tensor_type_same({"g_dtype": g_dtype}, valid_types, self.name)
  1489. validator.check_scalar_or_tensor_type_same({"m_dtype": m_dtype}, valid_types, self.name)
  1490. if not self.is_ge and self.is_tbe:
  1491. return g_dtype, g_dtype
  1492. return g_dtype
  1493. class SmoothL1Loss(PrimitiveWithInfer):
  1494. r"""
  1495. Computes smooth L1 loss, a robust L1 loss.
  1496. SmoothL1Loss is a Loss similar to MSELoss but less sensitive to outliers as described in the
  1497. `Fast R-CNN <https://arxiv.org/abs/1504.08083>`_ by Ross Girshick.
  1498. Note:
  1499. Sets input prediction as `X`, input target as `Y`, output as `loss`. Then,
  1500. .. math::
  1501. \text{SmoothL1Loss} = \begin{cases} \frac{0.5 x^{2}}{\text{beta}}, &if \left |x \right | < \text{beta} \cr
  1502. \left |x \right|-0.5 \text{beta}, &\text{otherwise}\end{cases}
  1503. Args:
  1504. beta (float): A parameter used to control the point where the function will change from
  1505. quadratic to linear. Default: 1.0.
  1506. Inputs:
  1507. - **prediction** (Tensor) - Predict data. Data type must be float16 or float32.
  1508. - **target** (Tensor) - Ground truth data, with the same type and shape as `prediction`.
  1509. Outputs:
  1510. Tensor, with the same type and shape as `prediction`.
  1511. Examples:
  1512. >>> loss = P.SmoothL1Loss()
  1513. >>> input_data = Tensor(np.array([1, 2, 3]), mindspore.float32)
  1514. >>> target_data = Tensor(np.array([1, 2, 2]), mindspore.float32)
  1515. >>> loss(input_data, target_data)
  1516. [0, 0, 0.5]
  1517. """
  1518. @prim_attr_register
  1519. def __init__(self, beta=1.0):
  1520. validator.check_value_type('beta', beta, [float], self.name)
  1521. validator.check('beta', beta, '', 0, Rel.GT, self.name)
  1522. self.init_prim_io_names(inputs=['prediction', 'target'], outputs=['output'])
  1523. self.add_prim_attr('sigma', beta)
  1524. def infer_shape(self, prediction, target):
  1525. validator.check('prediction shape', prediction, 'target shape', target, Rel.EQ, self.name)
  1526. return prediction
  1527. def infer_dtype(self, prediction, target):
  1528. args = {"prediction": prediction, "target": target}
  1529. validator.check_tensor_type_same(args, (mstype.float16, mstype.float32), self.name)
  1530. return prediction
  1531. class L2Loss(PrimitiveWithInfer):
  1532. """
  1533. Calculates half of the L2 norm of a tensor without using the `sqrt`.
  1534. Set `input_x` as x and output as loss.
  1535. .. math::
  1536. loss = sum(x ** 2) / nelement(x)
  1537. :math:`nelement(x)` represents the number of `input_x`.
  1538. Inputs:
  1539. - **input_x** (Tensor) - A input Tensor. Data type must be float16 or float32.
  1540. Outputs:
  1541. Tensor, has the same dtype as `input_x`. The output tensor is the value of loss which is a scalar tensor.
  1542. Examples
  1543. >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float16)
  1544. >>> l2_loss = P.L2Loss()
  1545. >>> l2_loss(input_x)
  1546. 7.0
  1547. """
  1548. @prim_attr_register
  1549. def __init__(self):
  1550. """Initialize L2Loss"""
  1551. def infer_shape(self, input_x):
  1552. loss_shape = []
  1553. return loss_shape
  1554. def infer_dtype(self, x_type):
  1555. validator.check_subclass("x_type", x_type, mstype.tensor, self.name)
  1556. valid_types = [mstype.float16, mstype.float32]
  1557. validator.check_tensor_type_same({'x_type': x_type}, valid_types, self.name)
  1558. return x_type
  1559. class DataFormatDimMap(PrimitiveWithInfer):
  1560. """
  1561. Returns the dimension index in the destination data format given in the source data format.
  1562. Args:
  1563. src_format (string): An optional value for source data format. Default: 'NHWC'.
  1564. dst_format (string): An optional value for destination data format. Default: 'NCHW'.
  1565. Inputs:
  1566. - **input_x** (Tensor) - A Tensor with each element as a dimension index in source data format.
  1567. The suggested values is in the range [-4, 4). It's type is int32.
  1568. Outputs:
  1569. Tensor, has the same type as the `input_x`.
  1570. Examples:
  1571. >>> x = Tensor([0, 1, 2, 3], mindspore.int32)
  1572. >>> dfdm = P.DataFormatDimMap()
  1573. >>> dfdm(x)
  1574. [0 3 1 2]
  1575. """
  1576. @prim_attr_register
  1577. def __init__(self, src_format='NHWC', dst_format='NCHW'):
  1578. valid_values = ['NHWC', 'NCHW']
  1579. self.src_format = validator.check_string(src_format, valid_values, "src_format", self.name)
  1580. self.dst_format = validator.check_string(dst_format, valid_values, "dst_format", self.name)
  1581. self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
  1582. def infer_shape(self, x_shape):
  1583. return x_shape
  1584. def infer_dtype(self, x_type):
  1585. validator.check_subclass("x", x_type, mstype.tensor, self.name)
  1586. valid_types = [mstype.int32]
  1587. validator.check_tensor_type_same({"x": x_type}, valid_types, self.name)
  1588. return x_type
  1589. class RNNTLoss(PrimitiveWithInfer):
  1590. """
  1591. Computes the RNNTLoss and its gradient with respect to the softmax outputs.
  1592. Args:
  1593. blank_label (int): blank label. Default: 0.
  1594. Inputs:
  1595. - **acts** (Tensor) - Tensor of shape :math:`(B, T, U, V)`. Data type must be float16 or float32.
  1596. - **labels** (Tensor[int32]) - Tensor of shape :math:`(B, U-1)`.
  1597. - **input_lengths** (Tensor[int32]) - Tensor of shape :math:`(B,)`.
  1598. - **label_lengths** (Tensor[int32]) - Tensor of shape :math:`(B,)`.
  1599. Outputs:
  1600. - **costs** (Tensor[int32]) - Tensor of shape :math:`(B,)`.
  1601. - **grads** (Tensor[int32]) - Has the same shape as `acts`.
  1602. Examples:
  1603. >>> B, T, U, V = 1, 2, 3, 5
  1604. >>> acts = np.random.random((B, T, U, V)).astype(np.float32)
  1605. >>> labels = np.array([[1, 2]]).astype(np.int32)
  1606. >>> input_length = np.array([T] * B).astype(np.int32)
  1607. >>> label_length = np.array([len(l) for l in labels]).astype(np.int32)
  1608. >>> rnnt_loss = P.RNNTLoss(blank_label=blank)
  1609. >>> costs, grads = rnnt_loss(Tensor(acts), Tensor(labels), Tensor(input_length), Tensor(label_length))
  1610. """
  1611. @prim_attr_register
  1612. def __init__(self, blank_label=0):
  1613. validator.check_value_type('blank_label', blank_label, [int], self.name)
  1614. self.init_prim_io_names(inputs=['acts', 'labels', 'input_length', 'label_length'],
  1615. outputs=['costs', 'grads'])
  1616. def infer_shape(self, acts_shape, labels_shape, input_length_shape, label_length_shape):
  1617. validator.check_equal_int(len(acts_shape), 4, 'acts_rank', self.name)
  1618. validator.check_equal_int(len(labels_shape), 2, 'labels_rank', self.name)
  1619. validator.check_equal_int(len(input_length_shape), 1, 'input_length_rank', self.name)
  1620. validator.check_equal_int(len(label_length_shape), 1, 'label_length_rank', self.name)
  1621. validator.check('labels shape[0]', labels_shape[0], 'acts shape[0]', acts_shape[0], Rel.EQ, self.name)
  1622. validator.check('labels shape[1]', labels_shape[1], 'acts shape[2]-1', acts_shape[2]-1, Rel.EQ, self.name)
  1623. validator.check('input_length size', input_length_shape[0], 'acts shape[0]', acts_shape[0], Rel.EQ, self.name)
  1624. validator.check('label_length size', label_length_shape[0], 'acts shape[0]', acts_shape[0], Rel.EQ, self.name)
  1625. costs_shape = (acts_shape[0],)
  1626. return (costs_shape, acts_shape)
  1627. def infer_dtype(self, acts_type, labels_type, input_length_type, label_length_type):
  1628. validator.check_subclass("acts_type", acts_type, mstype.tensor, self.name)
  1629. validator.check_subclass("labels_type", labels_type, mstype.tensor, self.name)
  1630. validator.check_subclass("input_length_type", input_length_type, mstype.tensor, self.name)
  1631. validator.check_subclass("label_length_type", label_length_type, mstype.tensor, self.name)
  1632. validator.check_tensor_type_same({"acts_type": acts_type}, [mstype.float32, mstype.float16], self.name)
  1633. validator.check_tensor_type_same({"labels_type": labels_type}, [mstype.int32], self.name)
  1634. validator.check_tensor_type_same({"input_length_type": input_length_type}, [mstype.int32], self.name)
  1635. validator.check_tensor_type_same({"label_length_type": label_length_type}, [mstype.int32], self.name)
  1636. return (acts_type, acts_type)
  1637. class SGD(PrimitiveWithInfer):
  1638. """
  1639. Computes stochastic gradient descent (optionally with momentum).
  1640. Nesterov momentum is based on the formula from On the importance of
  1641. initialization and momentum in deep learning.
  1642. Note:
  1643. For details, please refer to `nn.SGD` source code.
  1644. Args:
  1645. dampening (float): The dampening for momentum. Default: 0.0.
  1646. weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
  1647. nesterov (bool): Enable Nesterov momentum. Default: False.
  1648. Inputs:
  1649. - **parameters** (Tensor) - Parameters to be updated. With float16 or float32 data type.
  1650. - **gradient** (Tensor) - Gradient, with float16 or float32 data type.
  1651. - **learning_rate** (Tensor) - Learning rate, a scalar tensor with float16 or float32 data type.
  1652. e.g. Tensor(0.1, mindspore.float32)
  1653. - **accum** (Tensor) - Accum(velocity) to be updated. With float16 or float32 data type.
  1654. - **momentum** (Tensor) - Momentum, a scalar tensor with float16 or float32 data type.
  1655. e.g. Tensor(0.1, mindspore.float32).
  1656. - **stat** (Tensor) - States to be updated with the same shape as gradient, with float16 or float32 data type.
  1657. Outputs:
  1658. Tensor, parameters to be updated.
  1659. Examples:
  1660. >>> sgd = P.SGD()
  1661. >>> parameters = Tensor(np.array([2, -0.5, 1.7, 4]), mindspore.float32)
  1662. >>> gradient = Tensor(np.array([1, -1, 0.5, 2]), mindspore.float32)
  1663. >>> learning_rate = Tensor(0.01, mindspore.float32)
  1664. >>> accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mindspore.float32)
  1665. >>> momentum = Tensor(0.1, mindspore.float32)
  1666. >>> stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mindspore.float32)
  1667. >>> result = sgd(parameters, gradient, learning_rate, accum, momentum, stat)
  1668. """
  1669. @prim_attr_register
  1670. def __init__(self, dampening=0.0, weight_decay=0.0, nesterov=False):
  1671. validator.check_value_type("nesterov", nesterov, [bool], self.name)
  1672. if nesterov and dampening != 0:
  1673. raise ValueError(f"Nesterov need zero dampening!")
  1674. self.init_prim_io_names(inputs=['parameters', 'gradient', 'learning_rate', 'accum', 'momentum', 'stat'],
  1675. outputs=['output'])
  1676. def infer_shape(self, parameters_shape, gradient_shape, learning_rate_shape,
  1677. accum_shape, momentum_shape, stat_shape):
  1678. validator.check_positive_int(len(parameters_shape), "parameters rank", self.name)
  1679. validator.check_int(len(gradient_shape), 0, Rel.GE, f'gradient rank', self.name)
  1680. validator.check_int(len(learning_rate_shape), 0, Rel.GE, f'learning rate rank', self.name)
  1681. validator.check_positive_int(len(accum_shape), "accumulation rank", self.name)
  1682. validator.check_int(len(momentum_shape), 0, Rel.GE, f'momentum rank', self.name)
  1683. validator.check_int(len(stat_shape), 0, Rel.GE, f'stat rank', self.name)
  1684. validator.check("gradient shape", gradient_shape, "stat shape", stat_shape, Rel.EQ, self.name)
  1685. return parameters_shape
  1686. def infer_dtype(self, parameters_dtype, gradient_dtype, learning_rate_dtype,
  1687. accum_dtype, momentum_dtype, stat_dtype):
  1688. valid_types = [mstype.float16, mstype.float32]
  1689. validator.check_tensor_type_same({"parameters": parameters_dtype}, valid_types, self.name)
  1690. validator.check_tensor_type_same({"gradient": gradient_dtype}, valid_types, self.name)
  1691. validator.check_tensor_type_same({"learning_rate": learning_rate_dtype}, valid_types, self.name)
  1692. validator.check_tensor_type_same({"accum": accum_dtype}, valid_types, self.name)
  1693. validator.check_tensor_type_same({"momentum": momentum_dtype}, valid_types, self.name)
  1694. validator.check_tensor_type_same({"stat": stat_dtype}, valid_types, self.name)
  1695. return parameters_dtype
  1696. class ApplyRMSProp(PrimitiveWithInfer):
  1697. """
  1698. Optimizer that implements the Root Mean Square prop(RMSProp) algorithm.
  1699. Please refer to the usage in source code of `nn.RMSProp`.
  1700. Note:
  1701. Update `var` according to the RMSProp algorithm.
  1702. .. math::
  1703. s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2
  1704. .. math::
  1705. m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} + \\epsilon}} \\nabla Q_{i}(w)
  1706. .. math::
  1707. w = w - m_{t}
  1708. where :math:`w` represents `var`, which will be updated.
  1709. :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`,
  1710. :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`.
  1711. :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`.
  1712. :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`.
  1713. :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`.
  1714. Args:
  1715. use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors
  1716. from being updated. Default: False.
  1717. Inputs:
  1718. - **var** (Tensor) - Weights to be update.
  1719. - **mean_square** (Tensor) - Mean square gradients, must have the same type as `var`.
  1720. - **moment** (Tensor) - Delta of `var`, must have the same type as `var`.
  1721. - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or
  1722. a scalar tensor with float16 or float32 data type.
  1723. - **grad** (Tensor) - Gradient, must have the same type as `var`.
  1724. - **decay** (float) - Decay rate. Only constant value is allowed.
  1725. - **momentum** (float) - Momentum. Only constant value is allowed.
  1726. - **epsilon** (float) - Ridge term. Only constant value is allowed.
  1727. Outputs:
  1728. Tensor, parameters to be update.
  1729. Examples:
  1730. >>> apply_rms = P.ApplyRMSProp()
  1731. >>> input_x = Tensor(1., mindspore.float32)
  1732. >>> mean_square = Tensor(2., mindspore.float32)
  1733. >>> moment = Tensor(1., mindspore.float32)
  1734. >>> grad = Tensor(2., mindspore.float32 )
  1735. >>> learning_rate = Tensor(0.9, mindspore.float32)
  1736. >>> decay = 0.0
  1737. >>> momentum = 1e-10
  1738. >>> epsilon = 0.001
  1739. >>> result = apply_rms(input_x, mean_square, moment, learning_rate, grad, decay, momentum, epsilon)
  1740. (-2.9977674, 0.80999994, 1.9987665)
  1741. """
  1742. @prim_attr_register
  1743. def __init__(self, use_locking=False):
  1744. self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  1745. self.init_prim_io_names(inputs=['var', 'mean_square', 'moment', 'learning_rate', 'grad',
  1746. 'rho', 'momentum', 'epsilon'], outputs=['output'])
  1747. self.is_ge = context.get_context("enable_ge")
  1748. self.is_d = context.get_context("device_target") == "Ascend"
  1749. def infer_shape(self, var_shape, mean_square_shape, moment_shape, learning_rate_shape, grad_shape, decay_shape,
  1750. momentum_shape, epsilon_shape):
  1751. validator.check("var_shape", var_shape, "mean_square_shape", mean_square_shape, Rel.EQ, self.name)
  1752. validator.check("var_shape", var_shape, "moment_shape", moment_shape, Rel.EQ, self.name)
  1753. validator.check("var_shape", var_shape, "grad_shape", grad_shape, Rel.EQ, self.name)
  1754. if not self.is_ge and self.is_d:
  1755. return var_shape, var_shape, var_shape
  1756. return var_shape
  1757. def infer_dtype(self, var_dtype, mean_square_dtype, moment_dtype, learning_rate_dtype, grad_dtype, decay_dtype,
  1758. momentum_dtype, epsilon_dtype):
  1759. args = {"var": var_dtype, "mean_square": mean_square_dtype, "moment": moment_dtype, "grad": grad_dtype}
  1760. validator.check_tensor_type_same(args, mstype.number_type, self.name)
  1761. valid_types = [mstype.float16, mstype.float32]
  1762. args_decay = {"decay": decay_dtype, 'momentum': momentum_dtype, "epsilon": epsilon_dtype}
  1763. validator.check_type_same(args_decay, valid_types, self.name)
  1764. args_lr = {"learning_rate": learning_rate_dtype, "decay": decay_dtype}
  1765. validator.check_scalar_or_tensor_type_same(args_lr, valid_types, self.name, allow_mix=True)
  1766. if not self.is_ge and self.is_d:
  1767. return var_dtype, var_dtype, var_dtype
  1768. return var_dtype
  1769. def infer_value(self, var, mean_square, moment, learning_rate, grad, decay, momentum, epsilon):
  1770. if decay is None or momentum is None or epsilon is None:
  1771. raise ValueError(f"For {self.name}, decay, momentum, epsilon must be const.")
  1772. class ApplyCenteredRMSProp(PrimitiveWithInfer):
  1773. """
  1774. Optimizer that implements the centered RMSProp algorithm.
  1775. Please refer to the usage in source code of `nn.RMSProp`.
  1776. Note:
  1777. Update `var` according to the centered RMSProp algorithm.
  1778. .. math::
  1779. g_{t} = \\rho g_{t-1} + (1 - \\rho)\\nabla Q_{i}(w)
  1780. .. math::
  1781. s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2
  1782. .. math::
  1783. m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} - g_{t}^2 + \\epsilon}} \\nabla Q_{i}(w)
  1784. .. math::
  1785. w = w - m_{t}
  1786. where :math:`w` represents `var`, which will be updated.
  1787. :math:`g_{t}` represents `mean_gradient`, :math:`g_{t-1}` is the last momentent of :math:`g_{t}`.
  1788. :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`,
  1789. :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`.
  1790. :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`.
  1791. :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`.
  1792. :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`.
  1793. Args:
  1794. use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors
  1795. from being updated. Default: False.
  1796. Inputs:
  1797. - **var** (Tensor) - Weights to be update.
  1798. - **mean_gradient** (Tensor) - Mean gradients, must have the same type as `var`.
  1799. - **mean_square** (Tensor) - Mean square gradients, must have the same type as `var`.
  1800. - **moment** (Tensor) - Delta of `var`, must have the same type as `var`.
  1801. - **grad** (Tensor) - Gradient, must have the same type as `var`.
  1802. - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or
  1803. a scalar tensor with float16 or float32 data type.
  1804. - **decay** (float) - Decay rate.
  1805. - **momentum** (float) - Momentum.
  1806. - **epsilon** (float) - Ridge term.
  1807. Outputs:
  1808. Tensor, parameters to be update.
  1809. Examples:
  1810. >>> centered_rms_prop = P.ApplyCenteredRMSProp()
  1811. >>> input_x = Tensor(np.arange(-6, 6).astype(np.float32).reshape(2, 3, 2), mindspore.float32)
  1812. >>> mean_grad = Tensor(np.arange(12).astype(np.float32).reshape(2, 3, 2), mindspore.float32)
  1813. >>> mean_square = Tensor(np.arange(-8, 4).astype(np.float32).reshape(2, 3, 2), mindspore.float32)
  1814. >>> moment = Tensor(np.arange(12).astype(np.float32).reshape(2, 3, 2), mindspore.float32)
  1815. >>> grad = Tensor(np.arange(12).astype(np.float32).reshape(2, 3, 2), mindspore.float32)
  1816. >>> learning_rate = Tensor(0.9, mindspore.float32)
  1817. >>> decay = 0.0
  1818. >>> momentum = 1e-10
  1819. >>> epsilon = 0.05
  1820. >>> result = centered_rms_prop(input_x, mean_grad, mean_square, moment, grad,
  1821. >>> learning_rate, decay, momentum, epsilon)
  1822. [[[ -6. -9.024922]
  1823. [-12.049845 -15.074766]
  1824. [-18.09969 -21.124613]]
  1825. [[-24.149532 -27.174456]
  1826. [-30.199379 -33.2243 ]
  1827. [-36.249226 -39.274143]]]
  1828. """
  1829. @prim_attr_register
  1830. def __init__(self, use_locking=False):
  1831. self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  1832. self.is_ascend = context.get_context("device_target") == "Ascend"
  1833. def infer_shape(self, var_shape, mean_gradient_shape, mean_square_shape, moment_shape, grad_shape,
  1834. learning_rate_shape, decay_shape, momentum_shape, epsilon_shape):
  1835. validator.check("var_shape", var_shape, "mean_gradient_shape", mean_gradient_shape, Rel.EQ, self.name)
  1836. validator.check("var_shape", var_shape, "mean_square_shape", mean_square_shape, Rel.EQ, self.name)
  1837. validator.check("var_shape", var_shape, "moment_shape", moment_shape, Rel.EQ, self.name)
  1838. validator.check("var_shape", var_shape, "grad_shape", grad_shape, Rel.EQ, self.name)
  1839. if self.is_ascend:
  1840. return var_shape, mean_gradient_shape, mean_square_shape, moment_shape
  1841. return var_shape
  1842. def infer_dtype(self, var_dtype, mean_gradient_dtype, mean_square_dtype, moment_dtype, grad_dtype,
  1843. learning_rate_dtype, rho_dtype, momentum_dtype, epsilon_dtype):
  1844. args = {"var": var_dtype, "mean_gradient": mean_gradient_dtype,
  1845. "mean_square": mean_square_dtype, "moment": moment_dtype, "grad": grad_dtype}
  1846. validator.check_tensor_type_same(args, mstype.number_type, self.name)
  1847. valid_types = [mstype.float16, mstype.float32]
  1848. args_rho = {"rho": rho_dtype, 'momentum': momentum_dtype, "epsilon": epsilon_dtype}
  1849. validator.check_type_same(args_rho, valid_types, self.name)
  1850. args_lr = {"learning_rate": learning_rate_dtype, "rho": rho_dtype}
  1851. validator.check_scalar_or_tensor_type_same(args_lr, valid_types, self.name, allow_mix=True)
  1852. if self.is_ascend:
  1853. return var_dtype, mean_gradient_dtype, mean_square_dtype, moment_dtype
  1854. return var_dtype
  1855. class LayerNorm(Primitive):
  1856. r"""
  1857. Applies the Layer Normalization to the input tensor.
  1858. This operator will normalize the input tensor on given axis. LayerNorm is described in the paper
  1859. `Layer Normalization <https://arxiv.org/abs/1607.06450>`_.
  1860. .. math::
  1861. y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
  1862. where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon.
  1863. Args:
  1864. begin_norm_axis (int): The begin axis of the `input_x` to apply LayerNorm,
  1865. the value must be in [-1, rank(input)). Default: 1.
  1866. begin_params_axis (int): The begin axis of the parameter input (`gamma`, `beta`) to
  1867. apply LayerNorm, the value must be in [-1, rank(input)). Default: 1.
  1868. epsilon (float): A value added to the denominator for numerical stability. Default: 1e-7.
  1869. Inputs:
  1870. - **input_x** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
  1871. The input of LayerNorm.
  1872. - **gamma** (Tensor) - Tensor of shape :math:`(P_0, \ldots, P_\text{begin_params_axis})`.
  1873. The learnable parameter `gamma` as the scale on norm.
  1874. - **beta** (Tensor) - Tensor of shape :math:`(P_0, \ldots, P_\text{begin_params_axis})`.
  1875. The learnable parameter `beta` as the scale on norm.
  1876. Outputs:
  1877. tuple[Tensor], tuple of 3 tensors, the normalized input and the updated parameters.
  1878. - **output_x** (Tensor) - The normalized input, has the same type and shape as the `input_x`.
  1879. The shape is :math:`(N, C)`.
  1880. - **mean** (Tensor) - Tensor of shape :math:`(C,)`.
  1881. - **variance** (Tensor) - Tensor of shape :math:`(C,)`.
  1882. Examples:
  1883. >>> input_x = Tensor(np.array([[1, 2, 3], [1, 2, 3]]), mindspore.float32)
  1884. >>> gamma = Tensor(np.ones([3]), mindspore.float32)
  1885. >>> beta = Tensor(np.ones([3]), mindspore.float32)
  1886. >>> layer_norm = P.LayerNorm()
  1887. >>> output = layer_norm(input_x, gamma, beta)
  1888. ([[-0.22474492, 1., 2.2247488], [-0.22474492, 1., 2.2247488]],
  1889. [[2.], [2.]], [[0.6666667], [0.6666667]])
  1890. """
  1891. @prim_attr_register
  1892. def __init__(self, begin_norm_axis=1, begin_params_axis=1, epsilon=1e-7):
  1893. validator.check_value_type('begin_norm_axis', begin_norm_axis, [int], self.name)
  1894. validator.check_value_type('begin_params_axis', begin_params_axis, [int], self.name)
  1895. validator.check_value_type('epsilon', epsilon, [float], self.name)
  1896. class L2Normalize(PrimitiveWithInfer):
  1897. r"""
  1898. L2 normalization Operator.
  1899. This operator will normalize the input using the given axis. The function is shown as follows:
  1900. .. math::
  1901. \text{output} = \frac{x}{\sqrt{\text{max}(\text{sum} (\text{input_x}^2), \epsilon)}},
  1902. where :math:`\epsilon` is epsilon.
  1903. Args:
  1904. axis (int): The starting axis for the input to apply the L2 normalization. Default: 0.
  1905. epsilon (float): A small value added for numerical stability. Default: 1e-4.
  1906. Inputs:
  1907. - **input_x** (Tensor) - Input to compute the normalization. Data type must be float16 or float32.
  1908. Outputs:
  1909. Tensor, with the same type and shape as the input.
  1910. Examples:
  1911. >>> l2_normalize = P.L2Normalize()
  1912. >>> input_x = Tensor(np.random.randint(-256, 256, (2, 3, 4)), mindspore.float32)
  1913. >>> result = l2_normalize(input_x)
  1914. [[[-0.47247353 -0.30934513 -0.4991462 0.8185567 ]
  1915. [-0.08070751 -0.9961299 -0.5741758 0.09262337]
  1916. [-0.9916556 -0.3049123 0.5730487 -0.40579924]
  1917. [[-0.88134485 0.9509498 -0.86651784 0.57442576]
  1918. [ 0.99673784 0.08789381 -0.8187321 0.9957012 ]
  1919. [ 0.12891524 -0.9523804 -0.81952125 0.91396334]]]
  1920. """
  1921. @prim_attr_register
  1922. def __init__(self, axis=0, epsilon=1e-4):
  1923. validator.check_value_type('axis', axis, [int], self.name)
  1924. validator.check_value_type('epsilon', epsilon, [int, float], self.name)
  1925. def infer_shape(self, input_x):
  1926. dim = len(input_x)
  1927. validator.check_int_range(self.axis, -dim, dim, Rel.INC_LEFT, 'axis value', self.name)
  1928. return input_x
  1929. def infer_dtype(self, input_x):
  1930. validator.check_subclass("x", input_x, mstype.tensor, self.name)
  1931. validator.check_tensor_type_same({"input_x": input_x}, [mstype.float16, mstype.float32], self.name)
  1932. return input_x
  1933. class DropoutGenMask(Primitive):
  1934. """
  1935. Generates the mask value for the input shape.
  1936. Args:
  1937. Seed0 (int): Seed0 value for random generating. Default: 0.
  1938. Seed1 (int): Seed1 value for random generating. Default: 0.
  1939. Inputs:
  1940. - **shape** (tuple[int]) - The shape of target mask.
  1941. - **keep_prob** (Tensor) - The keep rate, greater than 0 and less equal than 1, e.g. keep_prob = 0.9,
  1942. means dropping out 10% of input units.
  1943. Outputs:
  1944. Tensor, the value of generated mask for input shape.
  1945. Examples:
  1946. >>> dropout_gen_mask = P.DropoutGenMask()
  1947. >>> shape = (2, 4, 5)
  1948. >>> keep_prob = Tensor(0.5, mindspore.float32)
  1949. >>> mask = dropout_gen_mask(shape, keep_prob)
  1950. [249, 11, 134, 133, 143, 246, 89, 52, 169, 15, 94, 63, 146, 103, 7, 101]
  1951. """
  1952. @prim_attr_register
  1953. def __init__(self, Seed0=0, Seed1=0):
  1954. self.init_prim_io_names(inputs=['shape', 'keep_prob'], outputs=['output'])
  1955. validator.check_value_type("Seed0", Seed0, [int], self.name)
  1956. validator.check_value_type("Seed1", Seed1, [int], self.name)
  1957. self.add_prim_attr("_random_effect", True)
  1958. class DropoutDoMask(PrimitiveWithInfer):
  1959. """
  1960. Applies dropout mask on the input tensor.
  1961. Take the mask output of DropoutGenMask as input, and apply dropout on the input.
  1962. Inputs:
  1963. - **input_x** (Tensor) - The input tensor.
  1964. - **mask** (Tensor) - The mask to be applied on `input_x`, which is the output of `DropoutGenMask`. And the
  1965. shape of `input_x` must be the same as the value of `DropoutGenMask`'s input `shape`. If input wrong `mask`,
  1966. the output of `DropoutDoMask` are unpredictable.
  1967. - **keep_prob** (Union[Tensor, float]) - The keep rate, greater than 0 and less equal than 1, e.g. keep_prob =
  1968. 0.9, means dropping out 10% of input units. The value of `keep_prob` is the same as the input `keep_prob` of
  1969. `DropoutGenMask`.
  1970. Outputs:
  1971. Tensor, the value that applied dropout on.
  1972. Examples:
  1973. >>> x = Tensor(np.ones([2, 2, 3]), mindspore.float32)
  1974. >>> shape = (2, 2, 3)
  1975. >>> keep_prob = Tensor(0.5, mindspore.float32)
  1976. >>> dropout_gen_mask = P.DropoutGenMask()
  1977. >>> dropout_do_mask = P.DropoutDoMask()
  1978. >>> mask = dropout_gen_mask(shape, keep_prob)
  1979. >>> output = dropout_do_mask(x, mask, keep_prob)
  1980. >>> assert output.shape == (2, 2, 3)
  1981. [[[2.0, 0.0, 0.0],
  1982. [0.0, 0.0, 0.0]],
  1983. [[0.0, 0.0, 0.0],
  1984. [2.0, 2.0, 2.0]]]
  1985. """
  1986. @prim_attr_register
  1987. def __init__(self):
  1988. pass
  1989. def __infer__(self, input_x, mask, keep_prob):
  1990. input_x_shape = input_x['shape']
  1991. mask_shape = mask['shape']
  1992. keep_prob_shape = keep_prob['shape']
  1993. validator.check("keep_prob's dim", len(keep_prob_shape), '0(scalar)', 0, Rel.EQ, self.name)
  1994. size_x = reduce(lambda x, y: x * y, input_x_shape)
  1995. if len(mask_shape) != 1:
  1996. raise ValueError("DropoutDoMask mask shape should be 1-dimension.")
  1997. size_y = mask_shape[0] * 8
  1998. if size_x > size_y:
  1999. raise ValueError(f"DropoutDoMask y mask do not math input input_x shape:"
  2000. "{input_x_shape}, mask shape: {mask_shape}.")
  2001. validator.check_tensor_type_same({"input_x": input_x['dtype']}, [mstype.float32, mstype.float16, mstype.int32],
  2002. self.name)
  2003. validator.check_tensor_type_same({"input_mask": mask['dtype']}, [mstype.uint8], self.name)
  2004. keep_prob_v = keep_prob['value']
  2005. if keep_prob_v is not None:
  2006. if isinstance(keep_prob['dtype'], type(mstype.tensor)):
  2007. validator.check_float_range(keep_prob_v.asnumpy(), 0, 1, Rel.INC_BOTH, 'keep_prob', self.name)
  2008. else:
  2009. validator.check_value_type("keep_prob", keep_prob_v, [float], self.name)
  2010. validator.check_float_range(keep_prob_v, 0, 1, Rel.INC_BOTH, 'keep_prob', self.name)
  2011. out = {'shape': input_x_shape,
  2012. 'dtype': input_x['dtype'],
  2013. 'value': None}
  2014. return out
  2015. class ResizeBilinear(PrimitiveWithInfer):
  2016. r"""
  2017. Resizes the image to certain size using bilinear interpolation.
  2018. The resizing only affects the lower two dimensions which represent the height and width. The input images
  2019. can be represented by different data types, but the data types of output images are always float32.
  2020. Args:
  2021. size (Union[tuple[int], list[int]]): A tuple or list of 2 int elements `(new_height, new_width)`, the new size
  2022. of the images.
  2023. align_corners (bool): If true, rescale input by `(new_height - 1) / (height - 1)`,
  2024. which exactly aligns the 4 corners of images and resized images. If false,
  2025. rescale by `new_height / height`. Default: False.
  2026. Inputs:
  2027. - **input** (Tensor) - Image to be resized. Input images must be a 4-D tensor with shape
  2028. :math:`(batch, channels, height, width)`, with data type of float32 or float16.
  2029. Outputs:
  2030. Tensor, resized image. 4-D with shape [batch, channels, new_height, new_width] in `float32`.
  2031. Examples:
  2032. >>> tensor = Tensor([[[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]]], mindspore.float32)
  2033. >>> resize_bilinear = P.ResizeBilinear((5, 5))
  2034. >>> result = resize_bilinear(tensor)
  2035. >>> assert result.shape == (1, 1, 5, 5)
  2036. """
  2037. @prim_attr_register
  2038. def __init__(self, size, align_corners=False):
  2039. validator.check_value_type("size", size, [tuple, list], self.name)
  2040. def infer_shape(self, input_shape):
  2041. validator.check("input shape rank", len(input_shape), "", 4, Rel.EQ, self.name)
  2042. input_shape = list(input_shape)
  2043. batch, channel, _, _ = input_shape
  2044. out_shape = [batch, channel]
  2045. for i in self.size:
  2046. out_shape.append(int(i))
  2047. return out_shape
  2048. def infer_dtype(self, input_dtype):
  2049. validator.check_tensor_type_same({'input_dtype': input_dtype}, [mstype.float16, mstype.float32], self.name)
  2050. return mstype.tensor_type(mstype.float32)
  2051. class OneHot(PrimitiveWithInfer):
  2052. r"""
  2053. Computes a one-hot tensor.
  2054. Makes a new tensor, whose locations represented by indices in `indices` take value `on_value`, while all
  2055. other locations take value `off_value`.
  2056. Note:
  2057. If the input indices is rank `N`, the output will have rank `N+1`. The new axis is created at dimension `axis`.
  2058. Args:
  2059. axis (int): Position to insert the value. e.g. If `indices` shape is [n, c], and `axis` is `-1` the output shape
  2060. will be [n, c, depth], If `axis` is `0` the output shape will be [depth, n, c]. Default: -1.
  2061. Inputs:
  2062. - **indices** (Tensor) - A tensor of indices. Tensor of shape :math:`(X_0, \ldots, X_n)`.
  2063. Data type must be int32.
  2064. - **depth** (int) - A scalar defining the depth of the one hot dimension.
  2065. - **on_value** (Tensor) - A value to fill in output when `indices[j] = i`. With data type of float16 or float32.
  2066. - **off_value** (Tensor) - A value to fill in output when `indices[j] != i`.
  2067. Has the same data type with as `on_value`.
  2068. Outputs:
  2069. Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`.
  2070. Examples:
  2071. >>> indices = Tensor(np.array([0, 1, 2]), mindspore.int32)
  2072. >>> depth, on_value, off_value = 3, Tensor(1.0, mindspore.float32), Tensor(0.0, mindspore.float32)
  2073. >>> onehot = P.OneHot()
  2074. >>> result = onehot(indices, depth, on_value, off_value)
  2075. [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
  2076. """
  2077. @prim_attr_register
  2078. def __init__(self, axis=-1):
  2079. self.init_prim_io_names(inputs=['indices', 'depth', 'on_value', 'off_value'], outputs=['output'])
  2080. validator.check_value_type("axis", axis, [int], self.name)
  2081. def __infer__(self, indices, depth, on_value, off_value):
  2082. # check type
  2083. validator.check_tensor_type_same({"indices": indices['dtype']}, (mstype.int32,), self.name)
  2084. validator.check_type_name("depth", depth['dtype'], mstype.int_type, self.name)
  2085. args = {"on_value": on_value['dtype'], "off_value": off_value['dtype']}
  2086. validator.check_tensor_type_same(args, (mstype.float16, mstype.float32), self.name)
  2087. # check shape
  2088. indices_shp = indices['shape']
  2089. validator.check_int_range(self.axis, -1, len(indices_shp), Rel.INC_BOTH, "axis", self.name)
  2090. depth_val = depth['value']
  2091. validator.check_non_negative_int(depth_val, "depth", self.name)
  2092. # create new dimension at end if self.axis is -1
  2093. _ = indices_shp.insert(self.axis, depth_val) if self.axis >= 0 else indices_shp.append(depth_val)
  2094. return {'shape': indices_shp,
  2095. 'dtype': on_value['dtype'],
  2096. 'value': None}
  2097. class Gelu(PrimitiveWithInfer):
  2098. r"""
  2099. Gaussian Error Linear Units activation function.
  2100. GeLU is described in the paper `Gaussian Error Linear Units (GELUs) <https://arxiv.org/abs/1606.08415>`_.
  2101. And also please refer to `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
  2102. <https://arxiv.org/abs/1810.04805>`_.
  2103. Gelu is defined as follows:
  2104. .. math::
  2105. \text{output} = 0.5 * x * (1 + erf(x / \sqrt{2})),
  2106. where :math:`erf` is the "Gauss error function" .
  2107. Inputs:
  2108. - **input_x** (Tensor) - Input to compute the Gelu with data type of float16 or float32.
  2109. Outputs:
  2110. Tensor, with the same type and shape as input.
  2111. Examples:
  2112. >>> tensor = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32)
  2113. >>> gelu = P.Gelu()
  2114. >>> result = gelu(tensor)
  2115. """
  2116. @prim_attr_register
  2117. def __init__(self):
  2118. """Initialize GeLU"""
  2119. self.init_prim_io_names(inputs=['x'], outputs=['output'])
  2120. def infer_shape(self, input_x):
  2121. return input_x
  2122. def infer_dtype(self, input_x):
  2123. validator.check_tensor_type_same({"input_x": input_x}, (mstype.float16, mstype.float32), self.name)
  2124. return input_x
  2125. class GetNext(PrimitiveWithInfer):
  2126. """
  2127. Returns the next element in the dataset queue.
  2128. Note:
  2129. The GetNext operation needs to be associated with network and it also depends on the init_dataset interface,
  2130. it can't be used directly as a single operation.
  2131. For details, please refer to `connect_network_with_dataset` source code.
  2132. Args:
  2133. types (list[:class:`mindspore.dtype`]): The type of the outputs.
  2134. shapes (list[tuple[int]]): The dimensionality of the outputs.
  2135. output_num (int): The output number, length of `types` and `shapes`.
  2136. shared_name (str): The queue name of `init_dataset` interface.
  2137. Inputs:
  2138. No inputs.
  2139. Outputs:
  2140. tuple[Tensor], the output of Dataset. The shape is described in `shapes`
  2141. and the type is described is `types`.
  2142. Examples:
  2143. >>> get_next = P.GetNext([mindspore.float32, mindspore.int32], [[32, 1, 28, 28], [10]], 2, 'shared_name')
  2144. >>> feature, label = get_next()
  2145. """
  2146. @prim_attr_register
  2147. def __init__(self, types, shapes, output_num, shared_name):
  2148. validator.check_value_type("types", types, [list, tuple], self.name)
  2149. validator.check_value_type("shapes", shapes, [list, tuple], self.name)
  2150. validator.check("types length", len(types), "shapes length", len(shapes), Rel.EQ, self.name)
  2151. validator.check_value_type("output_num", output_num, [int], self.name)
  2152. def infer_shape(self):
  2153. return tuple(self.shapes)
  2154. def infer_dtype(self):
  2155. return tuple(self.types)
  2156. class PReLU(PrimitiveWithInfer):
  2157. r"""
  2158. Parametric Rectified Linear Unit activation function.
  2159. PReLU is described in the paper `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
  2160. ImageNet Classification <https://arxiv.org/abs/1502.01852>`_. Defined as follows:
  2161. .. math::
  2162. prelu(x_i)= \max(0, x_i) + \min(0, w * x_i),
  2163. where :math:`x_i` is an element of an channel of the input.
  2164. Note:
  2165. 1-dimensional input_x is not supported.
  2166. Inputs:
  2167. - **input_x** (Tensor) - Float tensor, representing the output of the preview layer.
  2168. With data type of float16 or float32.
  2169. - **weight** (Tensor) - Float Tensor, w > 0, there are only two shapes are legitimate,
  2170. 1 or the number of channels of the input. With data type of float16 or float32.
  2171. Outputs:
  2172. Tensor, with the same type as `input_x`.
  2173. For detailed information, please refer to `nn.PReLU`.
  2174. Examples:
  2175. >>> import mindspore
  2176. >>> import mindspore.nn as nn
  2177. >>> import numpy as np
  2178. >>> from mindspore import Tensor
  2179. >>> from mindspore.ops import operations as P
  2180. >>> class Net(nn.Cell):
  2181. >>> def __init__(self):
  2182. >>> super(Net, self).__init__()
  2183. >>> self.prelu = P.PReLU()
  2184. >>> def construct(self, input_x, weight):
  2185. >>> result = self.prelu(input_x, weight)
  2186. >>> return result
  2187. >>>
  2188. >>> input_x = Tensor(np.random.randint(-3, 3, (2, 3, 2)), mindspore.float32)
  2189. >>> weight = Tensor(np.array([0.1, 0.6, -0.3]), mindspore.float32)
  2190. >>> net = Net()
  2191. >>> result = net(input_x, weight)
  2192. [[[-0.1, 1.0],
  2193. [0.0, 2.0],
  2194. [0.0, 0.0]],
  2195. [[-0.2, -0.1],
  2196. [2.0, -1.8000001],
  2197. [0.6, 0.6]]]
  2198. """
  2199. @prim_attr_register
  2200. def __init__(self):
  2201. pass
  2202. def infer_shape(self, input_x_shape, weight_shape):
  2203. input_x_dim = len(input_x_shape)
  2204. weight_dim = len(weight_shape)
  2205. if input_x_dim == 1:
  2206. raise ValueError(f'For \'{self.name}\' input_x rank 1 is not supported.')
  2207. if weight_dim != 1:
  2208. raise ValueError(f'For \'{self.name}\' weight_dim must be 1, while weight_dim is {weight_dim}.')
  2209. if weight_shape[0] != input_x_shape[1] and weight_shape[0] != 1:
  2210. raise ValueError(f'For \'{self.name}\' channel of input_x and weight must be matched,'
  2211. f' while channel of input_x is {input_x_shape[1]},'
  2212. f' weight_shape[0] is {weight_shape[0]}.')
  2213. return input_x_shape
  2214. def infer_dtype(self, input_x_dtype, weight_dtype):
  2215. valid_types = (mstype.float16, mstype.float32)
  2216. validator.check_tensor_type_same({"input_x": input_x_dtype}, valid_types, self.name)
  2217. validator.check_tensor_type_same({"weight": weight_dtype}, valid_types, self.name)
  2218. return input_x_dtype
  2219. class LSTM(PrimitiveWithInfer):
  2220. """
  2221. Performs the long short term memory(LSTM) on the input.
  2222. For detailed information, please refer to `nn.LSTM`.
  2223. """
  2224. @prim_attr_register
  2225. def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
  2226. self.input_size = validator.check_positive_int(input_size, "input_size", self.name)
  2227. self.hidden_size = validator.check_positive_int(hidden_size, "hidden_size", self.name)
  2228. self.num_layers = validator.check_positive_int(num_layers, "num_layers", self.name)
  2229. self.has_bias = validator.check_value_type("has_bias", has_bias, (bool,), self.name)
  2230. self.bidirectional = validator.check_value_type("bidirectional", bidirectional, (bool,), self.name)
  2231. self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
  2232. self.dropout = validator.check_float_range(dropout, 0, 1, Rel.INC_BOTH, 'dropout', self.name)
  2233. if bidirectional:
  2234. self.num_directions = 2
  2235. else:
  2236. self.num_directions = 1
  2237. def infer_shape(self, x_shape, h_shape, c_shape, w_shape):
  2238. # (seq, batch_size, feature)
  2239. validator.check_equal_int(len(x_shape), 3, "x rank", self.name)
  2240. validator.check_equal_int(x_shape[2], self.input_size, "x[2]", self.name)
  2241. # h and c should be same shape
  2242. validator.check_equal_int(len(h_shape), 3, "h rank", self.name)
  2243. validator.check("h_shape", h_shape, "c_shape", c_shape, Rel.EQ, self.name)
  2244. # (num_layers * num_directions, batch, hidden_size)
  2245. validator.check_int(h_shape[0], self.num_layers * self.num_directions, Rel.EQ, "h[0]", self.name)
  2246. validator.check_equal_int(h_shape[1], x_shape[1], "h[1]", self.name)
  2247. validator.check_int(h_shape[2], self.hidden_size, Rel.EQ, "h[2]", self.name)
  2248. y_shape = (x_shape[0], x_shape[1], self.hidden_size * self.num_directions)
  2249. # set arbitrary shape for reserved space
  2250. type_size = 4
  2251. gates_ws_ld = self.get_good_ld(self.hidden_size * 4, type_size)
  2252. states_ws_ld = self.get_good_ld(max(self.hidden_size, self.input_size), type_size)
  2253. self.ws_gates_size = self.num_layers * self.num_directions * x_shape[0] * x_shape[1] * gates_ws_ld * type_size
  2254. self.ws_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[
  2255. 1] * states_ws_ld * type_size
  2256. self.ws_c_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[
  2257. 1] * states_ws_ld * type_size
  2258. self.ws_diff_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * (2 + 1) * x_shape[
  2259. 1] * states_ws_ld * type_size
  2260. self.ws_grid_comp_size = 0
  2261. self.page_size = 4096
  2262. current_offset = 0
  2263. current_offset += self.ws_gates_size
  2264. current_offset = self.rnd_up(current_offset, self.page_size)
  2265. current_offset += self.ws_states_size
  2266. current_offset = self.rnd_up(current_offset, self.page_size)
  2267. current_offset += self.ws_c_states_size
  2268. current_offset = self.rnd_up(current_offset, self.page_size)
  2269. current_offset += self.ws_diff_states_size
  2270. current_offset = self.rnd_up(current_offset, self.page_size)
  2271. current_offset += self.ws_grid_comp_size
  2272. reserved_shape = (current_offset, 1)
  2273. state_shape = (1, 1)
  2274. return (y_shape, h_shape, c_shape, reserved_shape, state_shape)
  2275. def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype):
  2276. args = {'x': x_dtype, 'h': h_dtype, 'c': c_dtype, 'w': w_dtype}
  2277. validator.check_tensor_type_same(args, (mstype.float32, mstype.float16), self.name)
  2278. return (x_dtype, x_dtype, x_dtype, x_dtype, x_dtype)
  2279. def rnd_up(self, current_offset, page_size):
  2280. return ((current_offset + page_size - 1) // page_size) * page_size
  2281. def get_good_ld(self, dim, type_size):
  2282. ld = self.rnd_up(dim, 64 // type_size)
  2283. if ld * 256 == 0:
  2284. return ld + 64 // type_size
  2285. return ld
  2286. class SigmoidCrossEntropyWithLogits(PrimitiveWithInfer):
  2287. r"""
  2288. Uses the given logits to compute sigmoid cross entropy.
  2289. Note:
  2290. Sets input logits as `X`, input label as `Y`, output as `loss`. Then,
  2291. .. math::
  2292. p_{ij} = sigmoid(X_{ij}) = \frac{1}{1 + e^{-X_{ij}}}
  2293. .. math::
  2294. loss_{ij} = -[Y_{ij} * ln(p_{ij}) + (1 - Y_{ij})ln(1 - p_{ij})]
  2295. Inputs:
  2296. - **logits** (Tensor) - Input logits.
  2297. - **label** (Tensor) - Ground truth label.
  2298. Outputs:
  2299. Tensor, with the same shape and type as input `logits`.
  2300. Examples:
  2301. >>> logits = Tensor(np.array([[-0.8, 1.2, 0.7], [-0.1, -0.4, 0.7]]).astype(np.float16))
  2302. >>> labels = Tensor(np.array([[0.3, 0.8, 1.2], [-0.6, 0.1, 2.2]]).astype(np.float16))
  2303. >>> sigmoid = P.SigmoidCrossEntropyWithLogits()
  2304. >>> sigmoid(logits, labels)
  2305. [[0.6113 0.5034 0.263 ]
  2306. [0.5845 0.553 -0.4365]]
  2307. """
  2308. @prim_attr_register
  2309. def __init__(self):
  2310. """Initialize SigmoidCrossEntropyWithLogits"""
  2311. self.init_prim_io_names(inputs=['predict', 'target'], outputs=['loss'])
  2312. def infer_shape(self, x_shape, y_shape):
  2313. validator.check("x_shape", x_shape, "y_shape", y_shape, Rel.EQ, self.name)
  2314. return x_shape
  2315. def infer_dtype(self, x_dtype, y_dtype):
  2316. args = {"x_dtype": x_dtype, "y_dtype": y_dtype}
  2317. validator.check_tensor_type_same(args, mstype.number_type, self.name)
  2318. return x_dtype
  2319. class Pad(PrimitiveWithInfer):
  2320. """
  2321. Pads input tensor according to the paddings.
  2322. Args:
  2323. paddings (tuple): The shape of parameter `paddings` is (N, 2). N is the rank of input data. All elements of
  2324. paddings are int type. For the input in `D` th dimension, paddings[D, 0] indicates how many sizes to be
  2325. extended ahead of the input tensor in the `D` th dimension, and paddings[D, 1] indicates how many sizes to
  2326. be extended behind the input tensor in the `D` th dimension.
  2327. Inputs:
  2328. - **input_x** (Tensor) - The input tensor.
  2329. Outputs:
  2330. Tensor, the tensor after padding.
  2331. Examples:
  2332. >>> input_tensor = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32)
  2333. >>> pad_op = P.Pad(((1, 2), (2, 1)))
  2334. >>> output_tensor = pad_op(input_tensor)
  2335. >>> assert output_tensor == Tensor(np.array([[ 0. , 0. , 0. , 0. , 0. , 0. ],
  2336. >>> [ 0. , 0. , -0.1, 0.3, 3.6, 0. ],
  2337. >>> [ 0. , 0. , 0.4, 0.5, -3.2, 0. ],
  2338. >>> [ 0. , 0. , 0. , 0. , 0. , 0. ],
  2339. >>> [ 0. , 0. , 0. , 0. , 0. , 0. ]]), mindspore.float32)
  2340. """
  2341. @prim_attr_register
  2342. def __init__(self, paddings):
  2343. """Initialize Pad"""
  2344. self.init_prim_io_names(inputs=['x'], outputs=['y'])
  2345. if not isinstance(paddings, tuple):
  2346. raise TypeError('Paddings must be tuple type.')
  2347. for item in paddings:
  2348. if len(item) != 2:
  2349. raise ValueError('The shape of paddings must be (n, 2).')
  2350. self.paddings = paddings
  2351. def infer_shape(self, x):
  2352. paddings = np.array(self.paddings)
  2353. validator.check_int(paddings.size, len(x) * 2, Rel.EQ, 'paddings.shape', self.name)
  2354. if not np.all(paddings >= 0):
  2355. raise ValueError('All elements of paddings must be >= 0.')
  2356. y_shape = ()
  2357. for i in range(int(paddings.size / 2)):
  2358. y_shape += ((x[i] + paddings[i, 0] + paddings[i, 1]),)
  2359. return y_shape
  2360. def infer_dtype(self, x):
  2361. validator.check_subclass("input_x", x, mstype.tensor, self.name)
  2362. return x
  2363. class MirrorPad(PrimitiveWithInfer):
  2364. """
  2365. Pads the input tensor according to the paddings and mode.
  2366. Args:
  2367. mode (str): Specifies the padding mode. The optional values are "REFLECT" and "SYMMETRIC".
  2368. Default: "REFLECT".
  2369. Inputs:
  2370. - **input_x** (Tensor) - The input tensor.
  2371. - **paddings** (Tensor) - The paddings tensor. The value of `paddings` is a matrix(list),
  2372. and its shape is (N, 2). N is the rank of input data. All elements of paddings
  2373. are int type. For the input in the `D` th dimension, paddings[D, 0] indicates how many sizes to be
  2374. extended ahead of the input tensor in the `D` th dimension, and paddings[D, 1] indicates how many sizes to
  2375. be extended behind the input tensor in the `D` th dimension.
  2376. Outputs:
  2377. Tensor, the tensor after padding.
  2378. - If `mode` is "REFLECT", it uses a way of symmetrical copying through the axis of symmetry to fill in.
  2379. If the `input_x` is [[1,2,3],[4,5,6],[7,8,9]] and `paddings` is [[1,1],[2,2]], then the
  2380. Outputs is [[6,5,4,5,6,5,4],[3,2,1,2,3,2,1],[6,5,4,5,6,5,4],[9,8,7,8,9,8,7],[6,5,4,5,6,5,4]].
  2381. - If `mode` is "SYMMETRIC", the filling method is similar to the "REFLECT". It is also copied
  2382. according to the symmetry axis, except that it includes the symmetry axis. If the `input_x`
  2383. is [[1,2,3],[4,5,6],[7,8,9]] and `paddings` is [[1,1],[2,2]], then the Outputs is
  2384. [[2,1,1,2,3,3,2],[2,1,1,2,3,3,2],[5,4,4,5,6,6,5],[8,7,7,8,9,9,8],[8,7,7,8,9,9,8]].
  2385. Examples:
  2386. >>> from mindspore import Tensor
  2387. >>> from mindspore.ops import operations as P
  2388. >>> import mindspore.nn as nn
  2389. >>> import numpy as np
  2390. >>> class Net(nn.Cell):
  2391. >>> def __init__(self):
  2392. >>> super(Net, self).__init__()
  2393. >>> self.pad = P.MirrorPad(mode="REFLECT")
  2394. >>> def construct(self, x, paddings):
  2395. >>> return self.pad(x, paddings)
  2396. >>> x = np.random.random(size=(2, 3)).astype(np.float32)
  2397. >>> paddings = Tensor([[1,1],[2,2]])
  2398. >>> pad = Net()
  2399. >>> ms_output = pad(Tensor(x), paddings)
  2400. [[0.5525309 0.49183875 0.99110144 0.49183875 0.5525309 0.49183875 0.99110144]
  2401. [0.31417271 0.96308136 0.934709 0.96308136 0.31417271 0.96308136 0.934709 ]
  2402. [0.5525309 0.49183875 0.99110144 0.49183875 0.5525309 0.49183875 0.99110144]
  2403. [0.31417271 0.96308136 0.934709 0.96308136 0.31417271 0.96308136 0.934709 ]]
  2404. """
  2405. @prim_attr_register
  2406. def __init__(self, mode='REFLECT'):
  2407. """Initialize Pad"""
  2408. validator.check_string(mode, ['REFLECT', 'SYMMETRIC'], 'mode', self.name)
  2409. self.mode = mode
  2410. self.set_const_input_indexes([1])
  2411. def __infer__(self, input_x, paddings):
  2412. validator.check_subclass("input_x", input_x['dtype'], mstype.tensor, self.name)
  2413. validator.check_subclass("paddings", paddings['dtype'], mstype.tensor, self.name)
  2414. x_shape = list(input_x['shape'])
  2415. paddings_value = paddings['value'].asnumpy()
  2416. paddings_size = paddings_value.size
  2417. validator.check_int(paddings_size, len(x_shape) * 2, Rel.EQ, 'paddings.shape', self.name)
  2418. if not np.all(paddings_value >= 0):
  2419. raise ValueError('All elements of paddings must be >= 0.')
  2420. adjust = 0
  2421. if self.mode == 'SYMMETRIC':
  2422. adjust = 1
  2423. for i in range(0, int(paddings_size / 2)):
  2424. if (paddings_value[i, 0] >= x_shape[i] + adjust) or (paddings_value[i, 1] >= x_shape[i] + adjust):
  2425. raise ValueError('At least one dim has too high a padding value for this input and mode')
  2426. y_shape = ()
  2427. for i in range(0, int(paddings_size / 2)):
  2428. y_shape += ((x_shape[i] + paddings_value[i, 0] + paddings_value[i, 1]),)
  2429. return {'shape': y_shape,
  2430. 'dtype': input_x['dtype'],
  2431. 'value': None}
  2432. class ROIAlign(PrimitiveWithInfer):
  2433. """
  2434. Computes Region of Interest (RoI) Align operator.
  2435. The operator computes the value of each sampling point by bilinear interpolation from the nearby grid points on the
  2436. feature map. No quantization is performed on any coordinates involved in the RoI, its bins, or the sampling
  2437. points. The details of (RoI) Align operator are described in `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_.
  2438. Args:
  2439. pooled_height (int): The output features' height.
  2440. pooled_width (int): The output features' width.
  2441. spatial_scale (float): A scaling factor that maps the raw image coordinates to the input
  2442. feature map coordinates. Suppose the height of a RoI is `ori_h` in the raw image and `fea_h` in the
  2443. input feature map, the `spatial_scale` must be `fea_h / ori_h`.
  2444. sample_num (int): Number of sampling points. Default: 2.
  2445. roi_end_mode (int): Number must be 0 or 1. Default: 1.
  2446. Inputs:
  2447. - **features** (Tensor) - The input features, whose shape must be `(N, C, H, W)`.
  2448. - **rois** (Tensor) - The shape is `(rois_n, 5)`. With data type of float16 or float32.
  2449. `rois_n` represents the number of RoI. The size of the second dimension must be `5` and the `5` colunms
  2450. are `(image_index, top_left_x, top_left_y, bottom_right_x, bottom_right_y)`. `image_index` represents the
  2451. index of image. `top_left_x` and `top_left_y` represent the `x, y` coordinates of the top left corner
  2452. of corresponding RoI, respectively. `bottom_right_x` and `bottom_right_y` represent the `x, y`
  2453. coordinates of the bottom right corner of corresponding RoI, respectively.
  2454. Outputs:
  2455. Tensor, the shape is `(rois_n, C, pooled_height, pooled_width)`.
  2456. Examples:
  2457. >>> input_tensor = Tensor(np.array([[[[1., 2.], [3., 4.]]]]), mindspore.float32)
  2458. >>> rois = Tensor(np.array([[0, 0.2, 0.3, 0.2, 0.3]]), mindspore.float32)
  2459. >>> roi_align = P.ROIAlign(2, 2, 0.5, 2)
  2460. >>> output_tensor = roi_align(input_tensor, rois)
  2461. [[[[1.77499998e+00, 2.02500010e+00],
  2462. [2.27500010e+00, 2.52500010e+00]]]]
  2463. """
  2464. @prim_attr_register
  2465. def __init__(self, pooled_height, pooled_width, spatial_scale, sample_num=2, roi_end_mode=1):
  2466. """Initialize ROIAlign"""
  2467. validator.check_value_type("pooled_height", pooled_height, [int], self.name)
  2468. validator.check_value_type("pooled_width", pooled_width, [int], self.name)
  2469. validator.check_value_type("spatial_scale", spatial_scale, [float], self.name)
  2470. validator.check_value_type("sample_num", sample_num, [int], self.name)
  2471. validator.check_value_type("roi_end_mode", roi_end_mode, [int], self.name)
  2472. validator.check_int_range(roi_end_mode, 0, 1, Rel.INC_BOTH, "roi_end_mode", self.name)
  2473. self.pooled_height = pooled_height
  2474. self.pooled_width = pooled_width
  2475. self.spatial_scale = spatial_scale
  2476. self.sample_num = sample_num
  2477. self.roi_end_mode = roi_end_mode
  2478. def infer_shape(self, inputs_shape, rois_shape):
  2479. validator.check("input shape rank", len(inputs_shape), "", 4, Rel.LE, self.name)
  2480. return [rois_shape[0], inputs_shape[1], self.pooled_height, self.pooled_width]
  2481. def infer_dtype(self, inputs_type, rois_type):
  2482. valid_types = (mstype.float16, mstype.float32)
  2483. validator.check_tensor_type_same({"inputs_type": inputs_type}, valid_types, self.name)
  2484. validator.check_tensor_type_same({"rois_type": rois_type}, valid_types, self.name)
  2485. return inputs_type
  2486. class Adam(PrimitiveWithInfer):
  2487. r"""
  2488. Updates gradients by Adaptive Moment Estimation (Adam) algorithm.
  2489. The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
  2490. The updating formulas are as follows,
  2491. .. math::
  2492. \begin{array}{ll} \\
  2493. m = \beta_1 * m + (1 - \beta_1) * g \\
  2494. v = \beta_2 * v + (1 - \beta_2) * g * g \\
  2495. l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
  2496. w = w - l * \frac{m}{\sqrt{v} + \epsilon}
  2497. \end{array}
  2498. :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents
  2499. `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
  2500. :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1_power` and
  2501. `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, :math:`\epsilon` represents
  2502. `epsilon`.
  2503. Args:
  2504. use_locking (bool): Whether to enable a lock to protect variable tensors from being updated.
  2505. If true, updates of the var, m, and v tensors will be protected by a lock.
  2506. If false, the result is unpredictable. Default: False.
  2507. use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
  2508. If true, update the gradients using NAG.
  2509. If true, update the gradients without using NAG. Default: False.
  2510. Inputs:
  2511. - **var** (Tensor) - Weights to be updated.
  2512. - **m** (Tensor) - The 1st moment vector in the updating formula, has the same type as `var`.
  2513. - **v** (Tensor) - the 2nd moment vector in the updating formula.
  2514. Mean square gradients with the same type as `var`.
  2515. - **beta1_power** (float) - :math:`beta_1^t` in the updating formula.
  2516. - **beta2_power** (float) - :math:`beta_2^t` in the updating formula.
  2517. - **lr** (float) - :math:`l` in the updating formula.
  2518. - **beta1** (float) - The exponential decay rate for the 1st moment estimations.
  2519. - **beta2** (float) - The exponential decay rate for the 2nd moment estimations.
  2520. - **epsilon** (float) - Term added to the denominator to improve numerical stability.
  2521. - **gradient** (Tensor) - Gradient, has the same type as `var`.
  2522. Outputs:
  2523. Tuple of 3 Tensor, the updated parameters.
  2524. - **var** (Tensor) - The same shape and data type as `var`.
  2525. - **m** (Tensor) - The same shape and data type as `m`.
  2526. - **v** (Tensor) - The same shape and data type as `v`.
  2527. Examples:
  2528. >>> import numpy as np
  2529. >>> import mindspore.nn as nn
  2530. >>> from mindspore import Tensor, Parameter
  2531. >>> from mindspore.ops import operations as P
  2532. >>> class Net(nn.Cell):
  2533. >>> def __init__(self):
  2534. >>> super(Net, self).__init__()
  2535. >>> self.apply_adam = P.Adam()
  2536. >>> self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="var")
  2537. >>> self.m = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="m")
  2538. >>> self.v = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="v")
  2539. >>> def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
  2540. >>> out = self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,
  2541. >>> epsilon, grad)
  2542. >>> return out
  2543. >>> net = Net()
  2544. >>> gradient = Tensor(np.random.rand(3, 3, 3).astype(np.float32))
  2545. >>> result = net(0.9, 0.999, 0.001, 0.9, 0.999, 1e-8, gradient)
  2546. """
  2547. @prim_attr_register
  2548. def __init__(self, use_locking=False, use_nesterov=False):
  2549. validator.check_value_type("use_locking", use_locking, [bool], self.name)
  2550. validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
  2551. def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape,
  2552. beta1_shape, beta2_shape, epsilon_shape, grad_shape):
  2553. validator.check("var_shape", var_shape, "m_shape", m_shape, Rel.EQ, self.name)
  2554. validator.check("var_shape", var_shape, "v_shape", v_shape, Rel.EQ, self.name)
  2555. validator.check("var_shape", var_shape, "grad_shape", grad_shape, Rel.EQ, self.name)
  2556. return var_shape, m_shape, v_shape
  2557. def infer_dtype(self, var_dtype, m_dtype, v_dtype, beta1_power_dtype, beta2_power_dtype, lr_dtype,
  2558. beta1_dtype, beta2_dtype, epsilon_dtype, grad_dtype):
  2559. args = {"var": var_dtype, "m": m_dtype, "v": v_dtype, "grad": grad_dtype}
  2560. validator.check_tensor_type_same(args, mstype.number_type, self.name)
  2561. args = {"beta1_power": beta1_power_dtype, "beta2_power": beta2_power_dtype, 'lr': lr_dtype,
  2562. "beta1": beta1_dtype, "beta2": beta2_dtype, "epsilon": epsilon_dtype}
  2563. validator.check_scalar_or_tensor_type_same(args, [mstype.float16, mstype.float32], self.name, True)
  2564. return var_dtype, m_dtype, v_dtype
  2565. class FusedSparseAdam(PrimitiveWithInfer):
  2566. r"""
  2567. Merges the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam)
  2568. algorithm. This operator is used when the gradient is sparse.
  2569. The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
  2570. The updating formulas are as follows,
  2571. .. math::
  2572. \begin{array}{ll} \\
  2573. m = \beta_1 * m + (1 - \beta_1) * g \\
  2574. v = \beta_2 * v + (1 - \beta_2) * g * g \\
  2575. l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
  2576. w = w - l * \frac{m}{\sqrt{v} + \epsilon}
  2577. \end{array}
  2578. :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents
  2579. `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
  2580. :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1_power` and
  2581. `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, :math:`\epsilon` represents
  2582. `epsilon`.
  2583. All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
  2584. If they have different data types, lower priority data type will be converted to
  2585. relatively highest priority data type.
  2586. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  2587. Args:
  2588. use_locking (bool): Whether to enable a lock to protect variable tensors from being updated.
  2589. If true, updates of the var, m, and v tensors will be protected by a lock.
  2590. If false, the result is unpredictable. Default: False.
  2591. use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
  2592. If true, update the gradients using NAG.
  2593. If true, update the gradients without using NAG. Default: False.
  2594. Inputs:
  2595. - **var** (Parameter) - Parameters to be updated with float32 data type.
  2596. - **m** (Parameter) - The 1st moment vector in the updating formula, has the same type as `var` with
  2597. float32 data type.
  2598. - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients, has the same type as
  2599. `var` with float32 data type.
  2600. - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
  2601. - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
  2602. - **lr** (Tensor) - :math:`l` in the updating formula. With float32 data type.
  2603. - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimations with float32 data type.
  2604. - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimations with float32 data type.
  2605. - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability with float32 data type.
  2606. - **gradient** (Tensor) - Gradient value with float32 data type.
  2607. - **indices** (Tensor) - Gradient indices with int32 data type.
  2608. Outputs:
  2609. Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
  2610. - **var** (Tensor) - A Tensor with shape (1,).
  2611. - **m** (Tensor) - A Tensor with shape (1,).
  2612. - **v** (Tensor) - A Tensor with shape (1,).
  2613. Examples:
  2614. >>> import numpy as np
  2615. >>> import mindspore.nn as nn
  2616. >>> from mindspore import Tensor, Parameter
  2617. >>> from mindspore.ops import operations as P
  2618. >>> import mindspore.common.dtype as mstype
  2619. >>> class Net(nn.Cell):
  2620. >>> def __init__(self):
  2621. >>> super(Net, self).__init__()
  2622. >>> self.sparse_apply_adam = P.FusedSparseAdam()
  2623. >>> self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
  2624. >>> self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m")
  2625. >>> self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v")
  2626. >>> def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices):
  2627. >>> out = self.sparse_apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,
  2628. >>> epsilon, grad, indices)
  2629. >>> return out
  2630. >>> net = Net()
  2631. >>> beta1_power = Tensor(0.9, mstype.float32)
  2632. >>> beta2_power = Tensor(0.999, mstype.float32)
  2633. >>> lr = Tensor(0.001, mstype.float32)
  2634. >>> beta1 = Tensor(0.9, mstype.float32)
  2635. >>> beta2 = Tensor(0.999, mstype.float32)
  2636. >>> epsilon = Tensor(1e-8, mstype.float32)
  2637. >>> gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32)
  2638. >>> indices = Tensor([0, 1], mstype.int32)
  2639. >>> result = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices)
  2640. """
  2641. __mindspore_signature__ = (
  2642. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2643. sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2644. sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2645. sig.make_sig('beta1_power', dtype=sig.sig_dtype.T),
  2646. sig.make_sig('beta2_power', dtype=sig.sig_dtype.T),
  2647. sig.make_sig('lr', dtype=sig.sig_dtype.T),
  2648. sig.make_sig('beta1', dtype=sig.sig_dtype.T),
  2649. sig.make_sig('beta2', dtype=sig.sig_dtype.T),
  2650. sig.make_sig('epsilon', dtype=sig.sig_dtype.T),
  2651. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  2652. sig.make_sig('indices', dtype=sig.sig_dtype.T1),
  2653. )
  2654. @prim_attr_register
  2655. def __init__(self, use_locking=False, use_nesterov=False):
  2656. validator.check_value_type("use_locking", use_locking, [bool], self.name)
  2657. validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
  2658. self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2',
  2659. 'epsilon', 'grad', 'indices'],
  2660. outputs=['var', 'm', 'v'])
  2661. def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape,
  2662. beta1_shape, beta2_shape, epsilon_shape, grad_shape, indices_shape):
  2663. validator.check("var_shape", var_shape, "m_shape", m_shape, Rel.EQ, self.name)
  2664. validator.check("var_shape", var_shape, "v_shape", v_shape, Rel.EQ, self.name)
  2665. validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
  2666. validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
  2667. if len(var_shape) > 1 and grad_shape != indices_shape + var_shape[1:]:
  2668. raise ValueError(f"For '{self.name}', the shape of updates should be [] or "
  2669. f"grad_shape = indices_shape + var_shape[1:], but got var_shape: {var_shape}, "
  2670. f"indices_shape: {indices_shape}, grad_shape: {grad_shape}.")
  2671. return [1], [1], [1]
  2672. def infer_dtype(self, var_dtype, m_dtype, v_dtype, beta1_power_dtype, beta2_power_dtype, lr_dtype,
  2673. beta1_dtype, beta2_dtype, epsilon_dtype, grad_dtype, indices_dtype):
  2674. args = {"var": var_dtype, "m": m_dtype, "v": v_dtype, "grad": grad_dtype}
  2675. validator.check_tensor_type_same(args, mstype.number_type, self.name)
  2676. args = {"beta1_power": beta1_power_dtype, "beta2_power": beta2_power_dtype, 'lr': lr_dtype,
  2677. "beta1": beta1_dtype, "beta2": beta2_dtype, "epsilon": epsilon_dtype}
  2678. validator.check_scalar_or_tensor_type_same(args, [mstype.float16, mstype.float32], self.name, True)
  2679. validator.check_tensor_type_same({"indices_dtype": indices_dtype}, [mstype.int32], self.name)
  2680. return var_dtype, m_dtype, v_dtype
  2681. class FusedSparseLazyAdam(PrimitiveWithInfer):
  2682. r"""
  2683. Merges the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam)
  2684. algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
  2685. original Adam algorithm, as only the current indices parameters will be updated.
  2686. The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
  2687. The updating formulas are as follows,
  2688. .. math::
  2689. \begin{array}{ll} \\
  2690. m = \beta_1 * m + (1 - \beta_1) * g \\
  2691. v = \beta_2 * v + (1 - \beta_2) * g * g \\
  2692. l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
  2693. w = w - l * \frac{m}{\sqrt{v} + \epsilon}
  2694. \end{array}
  2695. :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents
  2696. `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
  2697. :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1_power` and
  2698. `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, :math:`\epsilon` represents
  2699. `epsilon`.
  2700. All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
  2701. If they have different data types, lower priority data type will be converted to
  2702. relatively highest priority data type.
  2703. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  2704. Args:
  2705. use_locking (bool): Whether to enable a lock to protect variable tensors from being updated.
  2706. If true, updates of the var, m, and v tensors will be protected by a lock.
  2707. If false, the result is unpredictable. Default: False.
  2708. use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
  2709. If true, update the gradients using NAG.
  2710. If true, update the gradients without using NAG. Default: False.
  2711. Inputs:
  2712. - **var** (Parameter) - Parameters to be updated with float32 data type.
  2713. - **m** (Parameter) - The 1st moment vector in the updating formula, has the same type as `var` with
  2714. float32 data type.
  2715. - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients, has the same type as
  2716. `var` with float32 data type.
  2717. - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
  2718. - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
  2719. - **lr** (Tensor) - :math:`l` in the updating formula with float32 data type.
  2720. - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimations with float32 data type.
  2721. - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimations with float32 data type.
  2722. - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability with float32 data type.
  2723. - **gradient** (Tensor) - Gradient value with float32 data type.
  2724. - **indices** (Tensor) - Gradient indices with int32 data type.
  2725. Outputs:
  2726. Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
  2727. - **var** (Tensor) - A Tensor with shape (1,).
  2728. - **m** (Tensor) - A Tensor with shape (1,).
  2729. - **v** (Tensor) - A Tensor with shape (1,).
  2730. Examples:
  2731. >>> import numpy as np
  2732. >>> import mindspore.nn as nn
  2733. >>> from mindspore import Tensor, Parameter
  2734. >>> from mindspore.ops import operations as P
  2735. >>> import mindspore.common.dtype as mstype
  2736. >>> class Net(nn.Cell):
  2737. >>> def __init__(self):
  2738. >>> super(Net, self).__init__()
  2739. >>> self.sparse_apply_lazyadam = P.FusedSparseLazyAdam()
  2740. >>> self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
  2741. >>> self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m")
  2742. >>> self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v")
  2743. >>> def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices):
  2744. >>> out = self.sparse_apply_lazyadam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1,
  2745. >>> beta2, epsilon, grad, indices)
  2746. >>> return out
  2747. >>> net = Net()
  2748. >>> beta1_power = Tensor(0.9, mstype.float32)
  2749. >>> beta2_power = Tensor(0.999, mstype.float32)
  2750. >>> lr = Tensor(0.001, mstype.float32)
  2751. >>> beta1 = Tensor(0.9, mstype.float32)
  2752. >>> beta2 = Tensor(0.999, mstype.float32)
  2753. >>> epsilon = Tensor(1e-8, mstype.float32)
  2754. >>> gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32)
  2755. >>> indices = Tensor([0, 1], mstype.int32)
  2756. >>> result = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices)
  2757. """
  2758. __mindspore_signature__ = (
  2759. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2760. sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2761. sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2762. sig.make_sig('beta1_power', dtype=sig.sig_dtype.T),
  2763. sig.make_sig('beta2_power', dtype=sig.sig_dtype.T),
  2764. sig.make_sig('lr', dtype=sig.sig_dtype.T),
  2765. sig.make_sig('beta1', dtype=sig.sig_dtype.T),
  2766. sig.make_sig('beta2', dtype=sig.sig_dtype.T),
  2767. sig.make_sig('epsilon', dtype=sig.sig_dtype.T),
  2768. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  2769. sig.make_sig('indices', dtype=sig.sig_dtype.T1),
  2770. )
  2771. @prim_attr_register
  2772. def __init__(self, use_locking=False, use_nesterov=False):
  2773. validator.check_value_type("use_locking", use_locking, [bool], self.name)
  2774. validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
  2775. self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2',
  2776. 'epsilon', 'grad', 'indices'],
  2777. outputs=['var', 'm', 'v'])
  2778. def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape,
  2779. beta1_shape, beta2_shape, epsilon_shape, grad_shape, indices_shape):
  2780. validator.check("var_shape", var_shape, "m_shape", m_shape, Rel.EQ, self.name)
  2781. validator.check("var_shape", var_shape, "v_shape", v_shape, Rel.EQ, self.name)
  2782. validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
  2783. validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
  2784. if len(var_shape) > 1 and grad_shape != indices_shape + var_shape[1:]:
  2785. raise ValueError(f"For '{self.name}', the shape of updates should be [] or "
  2786. f"grad_shape = indices_shape + var_shape[1:], but got var_shape: {var_shape}, "
  2787. f"indices_shape: {indices_shape}, grad_shape: {grad_shape}.")
  2788. return [1], [1], [1]
  2789. def infer_dtype(self, var_dtype, m_dtype, v_dtype, beta1_power_dtype, beta2_power_dtype, lr_dtype,
  2790. beta1_dtype, beta2_dtype, epsilon_dtype, grad_dtype, indices_dtype):
  2791. args = {"var": var_dtype, "m": m_dtype, "v": v_dtype, "grad": grad_dtype}
  2792. validator.check_tensor_type_same(args, mstype.number_type, self.name)
  2793. args = {"beta1_power": beta1_power_dtype, "beta2_power": beta2_power_dtype, 'lr': lr_dtype,
  2794. "beta1": beta1_dtype, "beta2": beta2_dtype, "epsilon": epsilon_dtype}
  2795. validator.check_scalar_or_tensor_type_same(args, [mstype.float16, mstype.float32], self.name, True)
  2796. validator.check_tensor_type_same({"indices_dtype": indices_dtype}, [mstype.int32], self.name)
  2797. return var_dtype, m_dtype, v_dtype
  2798. class FusedSparseFtrl(PrimitiveWithInfer):
  2799. """
  2800. Merges the duplicate value of the gradient and then updates relevant entries according to the FTRL-proximal scheme.
  2801. All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
  2802. If they have different data types, lower priority data type will be converted to
  2803. relatively highest priority data type.
  2804. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  2805. Args:
  2806. lr (float): The learning rate value, must be positive.
  2807. l1 (float): l1 regularization strength, must be greater than or equal to zero.
  2808. l2 (float): l2 regularization strength, must be greater than or equal to zero.
  2809. lr_power (float): Learning rate power controls how the learning rate decreases during training,
  2810. must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
  2811. use_locking (bool): Use locks for updating operation if true . Default: False.
  2812. Inputs:
  2813. - **var** (Parameter) - The variable to be updated. The data type must be float32.
  2814. - **accum** (Parameter) - The accumulation to be updated, must be same type and shape as `var`.
  2815. - **linear** (Parameter) - the linear coefficient to be updated, must be same type and shape as `var`.
  2816. - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient.
  2817. - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`. The shape
  2818. of `indices` must be the same as `grad` in first dimension. The type must be int32.
  2819. Outputs:
  2820. Tuple of 3 Tensor, this operator will update the input parameters directly, the outputs are useless.
  2821. - **var** (Tensor) - A Tensor with shape (1,).
  2822. - **accum** (Tensor) - A Tensor with shape (1,).
  2823. - **linear** (Tensor) - A Tensor with shape (1,).
  2824. Examples:
  2825. >>> import mindspore
  2826. >>> import mindspore.nn as nn
  2827. >>> import numpy as np
  2828. >>> from mindspore import Parameter
  2829. >>> from mindspore import Tensor
  2830. >>> from mindspore.ops import operations as P
  2831. >>> class SparseApplyFtrlNet(nn.Cell):
  2832. >>> def __init__(self):
  2833. >>> super(SparseApplyFtrlNet, self).__init__()
  2834. >>> self.sparse_apply_ftrl = P.FusedSparseFtrl(lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
  2835. >>> self.var = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="var")
  2836. >>> self.accum = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="accum")
  2837. >>> self.linear = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="linear")
  2838. >>>
  2839. >>> def construct(self, grad, indices):
  2840. >>> out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices)
  2841. >>> return out
  2842. >>>
  2843. >>> net = SparseApplyFtrlNet()
  2844. >>> grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
  2845. >>> indices = Tensor(np.array([0, 1]).astype(np.int32))
  2846. >>> output = net(grad, indices)
  2847. """
  2848. __mindspore_signature__ = (
  2849. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2850. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2851. sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2852. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  2853. sig.make_sig('indices', dtype=sig.sig_dtype.T1),
  2854. )
  2855. @prim_attr_register
  2856. def __init__(self, lr, l1, l2, lr_power, use_locking=False):
  2857. self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'],
  2858. outputs=['output'])
  2859. validator.check_value_type("lr", lr, [float], self.name)
  2860. validator.check_value_type("l1", l1, [float], self.name)
  2861. validator.check_value_type("l2", l2, [float], self.name)
  2862. validator.check_value_type("lr_power", lr_power, [float], self.name)
  2863. self.lr = validator.check_positive_float(lr, "lr", self.name)
  2864. self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
  2865. self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
  2866. self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
  2867. self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  2868. def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape):
  2869. validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
  2870. validator.check('var shape', var_shape, 'linear shape', linear_shape, Rel.EQ, self.name)
  2871. if len(var_shape) > 1:
  2872. validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name)
  2873. validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
  2874. validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
  2875. return [1], [1], [1]
  2876. def infer_dtype(self, var_dtype, accum_dtype, linear_dtype, grad_dtype, indices_dtype):
  2877. args = {"var_dtype": var_dtype, "accum_dtype": accum_dtype,
  2878. "linear_dtype": linear_dtype, "grad_dtype": grad_dtype}
  2879. validator.check_tensor_type_same(args, [mstype.float32], self.name)
  2880. validator.check_tensor_type_same({"indices_dtype": indices_dtype}, [mstype.int32], self.name)
  2881. return var_dtype, accum_dtype, linear_dtype
  2882. class FusedSparseProximalAdagrad(PrimitiveWithInfer):
  2883. r"""
  2884. Merges the duplicate value of the gradient and then updates relevant entries according to the proximal adagrad
  2885. algorithm.
  2886. .. math::
  2887. accum += grad * grad
  2888. .. math::
  2889. \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}}
  2890. .. math::
  2891. var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
  2892. All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
  2893. If they have different data types, lower priority data type will be converted to
  2894. relatively highest priority data type.
  2895. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  2896. Args:
  2897. use_locking (bool): If true, the variable and accumulation tensors will be protected from being updated.
  2898. Default: False.
  2899. Inputs:
  2900. - **var** (Parameter) - Variable tensor to be updated. The data type must be float32.
  2901. - **accum** (Parameter) - Variable tensor to be updated, has the same dtype as `var`.
  2902. - **lr** (Tensor) - The learning rate value. The data type must be float32.
  2903. - **l1** (Tensor) - l1 regularization strength. The data type must be float32.
  2904. - **l2** (Tensor) - l2 regularization strength. The data type must be float32.
  2905. - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient. The data type must be float32.
  2906. - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`. The data type
  2907. must be int32.
  2908. Outputs:
  2909. Tuple of 2 Tensors, this operator will update the input parameters directly, the outputs are useless.
  2910. - **var** (Tensor) - A Tensor with shape (1,).
  2911. - **accum** (Tensor) - A Tensor with shape (1,).
  2912. Examples:
  2913. >>> import numpy as np
  2914. >>> import mindspore.nn as nn
  2915. >>> from mindspore import Tensor, Parameter
  2916. >>> from mindspore.ops import operations as P
  2917. >>> class Net(nn.Cell):
  2918. >>> def __init__(self):
  2919. >>> super(Net, self).__init__()
  2920. >>> self.sparse_apply_proximal_adagrad = P.FusedSparseProximalAdagrad()
  2921. >>> self.var = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="var")
  2922. >>> self.accum = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="accum")
  2923. >>> self.lr = Tensor(0.01, mstype.float32)
  2924. >>> self.l1 = Tensor(0.0, mstype.float32)
  2925. >>> self.l2 = Tensor(0.0, mstype.float32)
  2926. >>> def construct(self, grad, indices):
  2927. >>> out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1,
  2928. >>> self.l2, grad, indices)
  2929. >>> return out
  2930. >>> net = Net()
  2931. >>> grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
  2932. >>> indices = Tensor(np.array([0, 1]).astype(np.int32))
  2933. >>> output = net(grad, indices)
  2934. """
  2935. __mindspore_signature__ = (
  2936. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2937. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  2938. sig.make_sig('lr', dtype=sig.sig_dtype.T),
  2939. sig.make_sig('l1', dtype=sig.sig_dtype.T),
  2940. sig.make_sig('l2', dtype=sig.sig_dtype.T),
  2941. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  2942. sig.make_sig('indices', dtype=sig.sig_dtype.T1),
  2943. )
  2944. @prim_attr_register
  2945. def __init__(self, use_locking=False):
  2946. self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad', 'indices'],
  2947. outputs=['output'])
  2948. self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  2949. def infer_shape(self, var_shape, accum_shape, lr_shape, l1_shape, l2_shape, grad_shape, indices_shape):
  2950. validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
  2951. return [1], [1]
  2952. def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, l1_dtype, l2_dtype, grad_dtype, indices_dtype):
  2953. args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
  2954. validator.check_tensor_type_same(args, [mstype.float32], self.name)
  2955. validator.check_scalar_or_tensor_type_same({"lr": lr_dtype}, [mstype.float32], self.name)
  2956. validator.check_scalar_or_tensor_type_same({"l1": l1_dtype}, [mstype.float32], self.name)
  2957. validator.check_scalar_or_tensor_type_same({"l2": l2_dtype}, [mstype.float32], self.name)
  2958. valid_types = [mstype.int16, mstype.int32, mstype.int64,
  2959. mstype.uint16, mstype.uint32, mstype.uint64]
  2960. validator.check_tensor_type_same({'indices': indices_dtype}, valid_types, self.name)
  2961. return var_dtype, accum_dtype
  2962. class KLDivLoss(PrimitiveWithInfer):
  2963. r"""
  2964. Computes the Kullback-Leibler divergence between the target and the output.
  2965. Note:
  2966. Sets input as :math:`x`, input label as :math:`y`, output as :math:`\ell(x, y)`.
  2967. Let,
  2968. .. math::
  2969. L = \{l_1,\dots,l_N\}^\top, \quad
  2970. l_n = y_n \cdot (\log y_n - x_n)
  2971. Then,
  2972. .. math::
  2973. \ell(x, y) = \begin{cases}
  2974. L, & \text{if reduction} = \text{`none';}\\
  2975. \operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\
  2976. \operatorname{sum}(L), & \text{if reduction} = \text{`sum'.}
  2977. \end{cases}
  2978. Args:
  2979. reduction (str): Specifies the reduction to be applied to the output.
  2980. Its value must be one of 'none', 'mean', 'sum'. Default: 'mean'.
  2981. Inputs:
  2982. - **input_x** (Tensor) - The input Tensor. The data type must be float32.
  2983. - **input_y** (Tensor) - The label Tensor which has the same shape as `input_x`. The data type must be float32.
  2984. Outputs:
  2985. Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `input_x`.
  2986. Otherwise it is a scalar.
  2987. Examples:
  2988. >>> import mindspore
  2989. >>> import mindspore.nn as nn
  2990. >>> import numpy as np
  2991. >>> from mindspore import Tensor
  2992. >>> from mindspore.ops import operations as P
  2993. >>> class Net(nn.Cell):
  2994. >>> def __init__(self):
  2995. >>> super(Net, self).__init__()
  2996. >>> self.kldiv_loss = P.KLDivLoss()
  2997. >>> def construct(self, x, y):
  2998. >>> result = self.kldiv_loss(x, y)
  2999. >>> return result
  3000. >>>
  3001. >>> net = Net()
  3002. >>> input_x = Tensor(np.array([0.2, 0.7, 0.1]), mindspore.float32)
  3003. >>> input_y = Tensor(np.array([0., 1., 0.]), mindspore.float32)
  3004. >>> result = net(input_x, input_y)
  3005. """
  3006. @prim_attr_register
  3007. def __init__(self, reduction='mean'):
  3008. self.reduction = validator.check_string(reduction, ['none', 'mean', 'sum'], 'reduction', self.name)
  3009. def infer_shape(self, x_shape, y_shape):
  3010. validator.check('x_shape', x_shape, 'y_shape', y_shape, Rel.EQ, self.name)
  3011. if self.reduction in ('mean', 'sum'):
  3012. shape = []
  3013. else:
  3014. shape = x_shape
  3015. return shape
  3016. def infer_dtype(self, x_type, y_type):
  3017. args = {'x': x_type, 'y': y_type}
  3018. valid_types = (mstype.float16, mstype.float32)
  3019. validator.check_tensor_type_same(args, valid_types, self.name)
  3020. return x_type
  3021. class BinaryCrossEntropy(PrimitiveWithInfer):
  3022. r"""
  3023. Computes the Binary Cross Entropy between the target and the output.
  3024. Note:
  3025. Sets input as :math:`x`, input label as :math:`y`, output as :math:`\ell(x, y)`.
  3026. Let,
  3027. .. math::
  3028. L = \{l_1,\dots,l_N\}^\top, \quad
  3029. l_n = - w_n \left[ y_n \cdot \log x_n + (1 - y_n) \cdot \log (1 - x_n) \right]
  3030. Then,
  3031. .. math::
  3032. \ell(x, y) = \begin{cases}
  3033. L, & \text{if reduction} = \text{`none';}\\
  3034. \operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\
  3035. \operatorname{sum}(L), & \text{if reduction} = \text{`sum'.}
  3036. \end{cases}
  3037. Args:
  3038. reduction (str): Specifies the reduction to be applied to the output.
  3039. Its value must be one of 'none', 'mean', 'sum'. Default: 'mean'.
  3040. Inputs:
  3041. - **input_x** (Tensor) - The input Tensor. The data type must be float16 or float32.
  3042. - **input_y** (Tensor) - The label Tensor which has same shape and data type as `input_x`.
  3043. - **weight** (Tensor, optional) - A rescaling weight applied to the loss of each batch element.
  3044. And it must have same shape and data type as `input_x`. Default: None.
  3045. Outputs:
  3046. Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `input_x`.
  3047. Otherwise, the output is a scalar.
  3048. Examples:
  3049. >>> import mindspore
  3050. >>> import mindspore.nn as nn
  3051. >>> import numpy as np
  3052. >>> from mindspore import Tensor
  3053. >>> from mindspore.ops import operations as P
  3054. >>> class Net(nn.Cell):
  3055. >>> def __init__(self):
  3056. >>> super(Net, self).__init__()
  3057. >>> self.binary_cross_entropy = P.BinaryCrossEntropy()
  3058. >>> def construct(self, x, y, weight):
  3059. >>> result = self.binary_cross_entropy(x, y, weight)
  3060. >>> return result
  3061. >>>
  3062. >>> net = Net()
  3063. >>> input_x = Tensor(np.array([0.2, 0.7, 0.1]), mindspore.float32)
  3064. >>> input_y = Tensor(np.array([0., 1., 0.]), mindspore.float32)
  3065. >>> weight = Tensor(np.array([1, 2, 2]), mindspore.float32)
  3066. >>> result = net(input_x, input_y, weight)
  3067. 0.38240486
  3068. """
  3069. @prim_attr_register
  3070. def __init__(self, reduction='mean'):
  3071. self.reduction = validator.check_string(reduction, ['none', 'mean', 'sum'], 'reduction', self.name)
  3072. def infer_shape(self, x_shape, y_shape, weight_shape):
  3073. validator.check('x_shape', x_shape, 'y_shape', y_shape, Rel.EQ, self.name)
  3074. if weight_shape:
  3075. validator.check('y_shape', y_shape, 'weight_shape', weight_shape, Rel.EQ, self.name)
  3076. if self.reduction in ('mean', 'sum'):
  3077. shape = []
  3078. else:
  3079. shape = x_shape
  3080. return shape
  3081. def infer_dtype(self, x_type, y_type, weight_type):
  3082. args = {'x': x_type, 'y': y_type}
  3083. valid_types = (mstype.float16, mstype.float32)
  3084. validator.check_tensor_type_same(args, valid_types, self.name)
  3085. if weight_type:
  3086. validator.check_tensor_type_same({'x': x_type, 'weight': weight_type}, valid_types, self.name)
  3087. return x_type
  3088. class ApplyAdaMax(PrimitiveWithInfer):
  3089. r"""
  3090. Updates relevant entries according to the adamax scheme.
  3091. The updating formulas are as follows,
  3092. .. math::
  3093. \begin{array}{ll} \\
  3094. m_{t} = \beta_1 * m_{t-1} + (1 - \beta_1) * g \\
  3095. v_{t} = \max(\beta_2 * v_{t-1}, \left| g \right|) \\
  3096. var = var - \frac{l}{1 - \beta_1^t} * \frac{m_{t}}{v_{t} + \epsilon}
  3097. \end{array}
  3098. :math:`t` represents updating step while :math:`m` represents the 1st moment vector, :math:`m_{t-1}`
  3099. is the last momentent of :math:`m_{t}`, :math:`v` represents the 2nd moment vector, :math:`v_{t-1}`
  3100. is the last momentent of :math:`v_{t}`, :math:`l` represents scaling factor `lr`,
  3101. :math:`g` represents `grad`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
  3102. :math:`beta_1^t` represents `beta1_power`, :math:`var` represents the variable to be updated,
  3103. :math:`\epsilon` represents `epsilon`.
  3104. Inputs of `var`, `m`, `v` and `grad` comply with the implicit type conversion rules
  3105. to make the data types consistent.
  3106. If they have different data types, lower priority data type will be converted to
  3107. relatively highest priority data type.
  3108. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3109. Inputs:
  3110. - **var** (Parameter) - Variable to be updated. With float32 or float16 data type.
  3111. - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and type as `var`.
  3112. With float32 or float16 data type.
  3113. - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients
  3114. with the same shape and type as `var`. With float32 or float16 data type.
  3115. - **beta1_power** (Union[Number, Tensor]) - :math:`beta_1^t` in the updating formula, must be scalar.
  3116. With float32 or float16 data type.
  3117. - **lr** (Union[Number, Tensor]) - Learning rate, :math:`l` in the updating formula, must be scalar.
  3118. With float32 or float16 data type.
  3119. - **beta1** (Union[Number, Tensor]) - The exponential decay rate for the 1st moment estimations,
  3120. must be scalar. With float32 or float16 data type.
  3121. - **beta2** (Union[Number, Tensor]) - The exponential decay rate for the 2nd moment estimations,
  3122. must be scalar. With float32 or float16 data type.
  3123. - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be scalar.
  3124. With float32 or float16 data type.
  3125. - **grad** (Tensor) - A tensor for gradient, has the same shape and type as `var`.
  3126. With float32 or float16 data type.
  3127. Outputs:
  3128. Tuple of 3 Tensor, the updated parameters.
  3129. - **var** (Tensor) - The same shape and data type as `var`.
  3130. - **m** (Tensor) - The same shape and data type as `m`.
  3131. - **v** (Tensor) - The same shape and data type as `v`.
  3132. Examples:
  3133. >>> import numpy as np
  3134. >>> import mindspore.nn as nn
  3135. >>> from mindspore import Tensor, Parameter
  3136. >>> from mindspore.ops import operations as P
  3137. >>> import mindspore.common.dtype as mstype
  3138. >>> class Net(nn.Cell):
  3139. >>> def __init__(self):
  3140. >>> super(Net, self).__init__()
  3141. >>> self.apply_ada_max = P.ApplyAdaMax()
  3142. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  3143. >>> self.m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m")
  3144. >>> self.v = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="v")
  3145. >>> def construct(self, beta1_power, lr, beta1, beta2, epsilon, grad):
  3146. >>> out = self.apply_ada_max(self.var, self.m, self.v, beta1_power, lr, beta1, beta2, epsilon, grad)
  3147. >>> return out
  3148. >>> net = Net()
  3149. >>> beta1_power =Tensor(0.9, mstype.float32)
  3150. >>> lr = Tensor(0.001, mstype.float32)
  3151. >>> beta1 = Tensor(0.9, mstype.float32)
  3152. >>> beta2 = Tensor(0.99, mstype.float32)
  3153. >>> epsilon = Tensor(1e-10, mstype.float32)
  3154. >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  3155. >>> result = net(beta1_power, lr, beta1, beta2, epsilon, grad)
  3156. """
  3157. __mindspore_signature__ = (
  3158. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3159. sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3160. sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3161. sig.make_sig('beta1_power', dtype=sig.sig_dtype.T1),
  3162. sig.make_sig('lr', dtype=sig.sig_dtype.T2),
  3163. sig.make_sig('beta1', dtype=sig.sig_dtype.T3),
  3164. sig.make_sig('beta2', dtype=sig.sig_dtype.T4),
  3165. sig.make_sig('epsilon', dtype=sig.sig_dtype.T5),
  3166. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3167. )
  3168. @prim_attr_register
  3169. def __init__(self):
  3170. """Initialize ApplyAdaMax"""
  3171. def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, lr_shape,
  3172. beta1_shape, beta2_shape, epsilon_shape, grad_shape):
  3173. validator.check("m_shape", m_shape, "var_shape", var_shape, Rel.EQ, self.name)
  3174. validator.check("v_shape", v_shape, "var_shape", var_shape, Rel.EQ, self.name)
  3175. validator.check("grad_shape", grad_shape, "var_shape", var_shape, Rel.EQ, self.name)
  3176. beta1_power_shp_len = len(beta1_power_shape)
  3177. validator.check_int(beta1_power_shp_len, 1, Rel.LE, "beta1 power's rank", self.name)
  3178. if beta1_power_shp_len == 1:
  3179. validator.check_int(beta1_power_shape[0], 1, Rel.EQ, "beta1_power_shape[0]", self.name)
  3180. lr_shp_len = len(lr_shape)
  3181. validator.check_int(lr_shp_len, 1, Rel.LE, "lr's rank", self.name)
  3182. if lr_shp_len == 1:
  3183. validator.check_int(lr_shape[0], 1, Rel.EQ, "lr_shape[0]", self.name)
  3184. beta1_shp_len = len(beta1_shape)
  3185. validator.check_int(beta1_shp_len, 1, Rel.LE, "beta1's rank", self.name)
  3186. if beta1_shp_len == 1:
  3187. validator.check_int(beta1_shape[0], 1, Rel.EQ, "beta1_shape[0]", self.name)
  3188. beta2_shp_len = len(beta2_shape)
  3189. validator.check_int(beta2_shp_len, 1, Rel.LE, "beta2's rank", self.name)
  3190. if beta2_shp_len == 1:
  3191. validator.check_int(beta2_shape[0], 1, Rel.EQ, "beta2_shape[0]", self.name)
  3192. epsilon_shp_len = len(epsilon_shape)
  3193. validator.check_int(epsilon_shp_len, 1, Rel.LE, "epsilon's rank", self.name)
  3194. if epsilon_shp_len == 1:
  3195. validator.check_int(epsilon_shape[0], 1, Rel.EQ, "epsilon_shape[0]", self.name)
  3196. return var_shape, m_shape, v_shape
  3197. def infer_dtype(self, var_dtype, m_dtype, v_dtype, beta1_power_dtype, lr_dtype,
  3198. beta1_dtype, beta2_dtype, epsilon_dtype, grad_dtype):
  3199. valid_types = [mstype.float16, mstype.float32]
  3200. args = {"var": var_dtype, "m": m_dtype, "v": v_dtype, "grad": grad_dtype}
  3201. validator.check_tensor_type_same(args, valid_types, self.name)
  3202. validator.check_scalar_or_tensor_type_same({"beta1_power": beta1_power_dtype}, valid_types, self.name)
  3203. validator.check_scalar_or_tensor_type_same({"lr": lr_dtype}, valid_types, self.name)
  3204. validator.check_scalar_or_tensor_type_same({"beta1": beta1_dtype}, valid_types, self.name)
  3205. validator.check_scalar_or_tensor_type_same({"beta2": beta2_dtype}, valid_types, self.name)
  3206. validator.check_scalar_or_tensor_type_same({"epsilon": epsilon_dtype}, valid_types, self.name)
  3207. return var_dtype, m_dtype, v_dtype
  3208. class ApplyAdadelta(PrimitiveWithInfer):
  3209. r"""
  3210. Updates relevant entries according to the adadelta scheme.
  3211. .. math::
  3212. accum = \rho * accum + (1 - \rho) * grad^2
  3213. .. math::
  3214. \text{update} = \sqrt{\text{accum_update} + \epsilon} * \frac{grad}{\sqrt{accum + \epsilon}}
  3215. .. math::
  3216. \text{accum_update} = \rho * \text{accum_update} + (1 - \rho) * update^2
  3217. .. math::
  3218. var -= lr * update
  3219. Inputs of `var`, `accum`, `accum_update` and `grad` comply with the implicit type conversion rules
  3220. to make the data types consistent.
  3221. If they have different data types, lower priority data type will be converted to
  3222. relatively highest priority data type.
  3223. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3224. Inputs:
  3225. - **var** (Parameter) - Weights to be updated. With float32 or float16 data type.
  3226. - **accum** (Parameter) - Accumulation to be updated, has the same shape and type as `var`.
  3227. With float32 or float16 data type.
  3228. - **accum_update** (Parameter) - Accum_update to be updated, has the same shape and type as `var`.
  3229. With float32 or float16 data type.
  3230. - **lr** (Union[Number, Tensor]) - Learning rate, must be scalar. With float32 or float16 data type.
  3231. - **rho** (Union[Number, Tensor]) - Decay rate, must be scalar. With float32 or float16 data type.
  3232. - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be scalar.
  3233. With float32 or float16 data type.
  3234. - **grad** (Tensor) - Gradients, has the same shape and type as `var`. With float32 or float16 data type.
  3235. Outputs:
  3236. Tuple of 3 Tensor, the updated parameters.
  3237. - **var** (Tensor) - The same shape and data type as `var`.
  3238. - **accum** (Tensor) - The same shape and data type as `accum`.
  3239. - **accum_update** (Tensor) - The same shape and data type as `accum_update`.
  3240. Examples:
  3241. >>> import numpy as np
  3242. >>> import mindspore.nn as nn
  3243. >>> from mindspore import Tensor, Parameter
  3244. >>> from mindspore.ops import operations as P
  3245. >>> import mindspore.common.dtype as mstype
  3246. >>> class Net(nn.Cell):
  3247. >>> def __init__(self):
  3248. >>> super(Net, self).__init__()
  3249. >>> self.apply_adadelta = P.ApplyAdadelta()
  3250. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  3251. >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
  3252. >>> self.accum_update = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum_update")
  3253. >>> def construct(self, lr, rho, epsilon, grad):
  3254. >>> out = self.apply_adadelta(self.var, self.accum, self.accum_update, lr, rho, epsilon, grad)
  3255. >>> return out
  3256. >>> net = Net()
  3257. >>> lr = Tensor(0.001, mstype.float32)
  3258. >>> rho = Tensor(0.0, mstype.float32)
  3259. >>> epsilon = Tensor(1e-6, mstype.float32)
  3260. >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  3261. >>> result = net(lr, rho, epsilon, grad)
  3262. """
  3263. __mindspore_signature__ = (
  3264. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3265. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3266. sig.make_sig('accum_update', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3267. sig.make_sig('lr', dtype=sig.sig_dtype.T1),
  3268. sig.make_sig('rho', dtype=sig.sig_dtype.T2),
  3269. sig.make_sig('epsilon', dtype=sig.sig_dtype.T3),
  3270. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3271. )
  3272. @prim_attr_register
  3273. def __init__(self):
  3274. """Initialize ApplyAdadelta"""
  3275. def infer_shape(self, var_shape, accum_shape, accum_update_shape, lr_shape, rho_shape,
  3276. epsilon_shape, grad_shape):
  3277. validator.check("accum_shape", accum_shape, "var_shape", var_shape, Rel.EQ, self.name)
  3278. validator.check("accum_update_shape", accum_update_shape, "var_shape", var_shape, Rel.EQ, self.name)
  3279. validator.check("grad_shape", grad_shape, "var_shape", var_shape, Rel.EQ, self.name)
  3280. lr_shp_len = len(lr_shape)
  3281. validator.check_int(lr_shp_len, 1, Rel.LE, "lr's rank", self.name)
  3282. if lr_shp_len == 1:
  3283. validator.check_int(lr_shape[0], 1, Rel.EQ, "lr_shape[0]", self.name)
  3284. rho_shp_len = len(rho_shape)
  3285. validator.check_int(rho_shp_len, 1, Rel.LE, "rho's rank", self.name)
  3286. if rho_shp_len == 1:
  3287. validator.check_int(rho_shape[0], 1, Rel.EQ, "rho_shape[0]", self.name)
  3288. epsilon_shp_len = len(epsilon_shape)
  3289. validator.check_int(epsilon_shp_len, 1, Rel.LE, "lepsilon's rank", self.name)
  3290. if epsilon_shp_len == 1:
  3291. validator.check_int(epsilon_shape[0], 1, Rel.EQ, "epsilon_shape[0]", self.name)
  3292. return var_shape, accum_shape, accum_update_shape
  3293. def infer_dtype(self, var_dtype, accum_dtype, accum_update_dtype, lr_dtype, rho_dtype,
  3294. epsilon_dtype, grad_dtype):
  3295. valid_types = [mstype.float16, mstype.float32]
  3296. args = {"var": var_dtype, "accum": accum_dtype, "accum_update": accum_update_dtype, "grad": grad_dtype}
  3297. validator.check_tensor_type_same(args, valid_types, self.name)
  3298. validator.check_scalar_or_tensor_type_same({"lr": lr_dtype}, valid_types, self.name)
  3299. validator.check_scalar_or_tensor_type_same({"rho": rho_dtype}, valid_types, self.name)
  3300. validator.check_scalar_or_tensor_type_same({"epsilon": epsilon_dtype}, valid_types, self.name)
  3301. return var_dtype, accum_dtype, accum_update_dtype
  3302. class ApplyAdagrad(PrimitiveWithInfer):
  3303. r"""
  3304. Updates relevant entries according to the adagrad scheme.
  3305. .. math::
  3306. accum += grad * grad
  3307. .. math::
  3308. var -= lr * grad * \frac{1}{\sqrt{accum}}
  3309. Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
  3310. to make the data types consistent..
  3311. If they have different data types, lower priority data type will be converted to
  3312. relatively highest priority data type.
  3313. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3314. Args:
  3315. update_slots (bool): If `True`, `accum` will be updated. Default: True.
  3316. Inputs:
  3317. - **var** (Parameter) - Variable to be updated. With float32 or float16 data type.
  3318. - **accum** (Parameter) - Accumulation to be updated. The shape and dtype must be the same as `var`.
  3319. With float32 or float16 data type.
  3320. - **lr** (Union[Number, Tensor]) - The learning rate value, must be scalar. With float32 or float16 data type.
  3321. - **grad** (Tensor) - A tensor for gradient. The shape and dtype must be the same as `var`.
  3322. With float32 or float16 data type.
  3323. Outputs:
  3324. Tuple of 2 Tensors, the updated parameters.
  3325. - **var** (Tensor) - The same shape and data type as `var`.
  3326. - **accum** (Tensor) - The same shape and data type as `accum`.
  3327. Examples:
  3328. >>> import numpy as np
  3329. >>> import mindspore.nn as nn
  3330. >>> from mindspore import Tensor, Parameter
  3331. >>> from mindspore.ops import operations as P
  3332. >>> import mindspore.common.dtype as mstype
  3333. >>> class Net(nn.Cell):
  3334. >>> def __init__(self):
  3335. >>> super(Net, self).__init__()
  3336. >>> self.apply_adagrad = P.ApplyAdagrad()
  3337. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  3338. >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
  3339. >>> def construct(self, lr, grad):
  3340. >>> out = self.apply_adagrad(self.var, self.accum, lr, grad)
  3341. >>> return out
  3342. >>> net = Net()
  3343. >>> lr = Tensor(0.001, mstype.float32)
  3344. >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  3345. >>> result = net(lr, grad)
  3346. """
  3347. __mindspore_signature__ = (
  3348. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3349. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3350. sig.make_sig('lr', dtype=sig.sig_dtype.T1),
  3351. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3352. )
  3353. @prim_attr_register
  3354. def __init__(self, update_slots=True):
  3355. validator.check_value_type("update_slots", update_slots, [bool], self.name)
  3356. def infer_shape(self, var_shape, accum_shape, lr_shape, grad_shape):
  3357. validator.check('accum shape', accum_shape, 'var shape', var_shape, Rel.EQ, self.name)
  3358. validator.check('grad shape', grad_shape, 'var shape', var_shape, Rel.EQ, self.name)
  3359. lr_shp_len = len(lr_shape)
  3360. validator.check_int(lr_shp_len, 1, Rel.LE, "lr's rank", self.name)
  3361. if lr_shp_len == 1:
  3362. validator.check_int(lr_shape[0], 1, Rel.EQ, "lr_shape[0]", self.name)
  3363. return var_shape, accum_shape
  3364. def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, grad_dtype):
  3365. args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
  3366. valid_types = [mstype.float16, mstype.float32]
  3367. validator.check_tensor_type_same(args, valid_types, self.name)
  3368. validator.check_scalar_or_tensor_type_same({'lr': lr_dtype}, valid_types, self.name)
  3369. return var_dtype, accum_dtype
  3370. class ApplyAdagradV2(PrimitiveWithInfer):
  3371. r"""
  3372. Updates relevant entries according to the adagradv2 scheme.
  3373. .. math::
  3374. accum += grad * grad
  3375. .. math::
  3376. var -= lr * grad * \frac{1}{\sqrt{accum} + \epsilon}
  3377. Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
  3378. to make the data types consistent.
  3379. If they have different data types, lower priority data type will be converted to
  3380. relatively highest priority data type.
  3381. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3382. Args:
  3383. epsilon (float): A small value added for numerical stability.
  3384. update_slots (bool): If `True`, `accum` will be updated. Default: True.
  3385. Inputs:
  3386. - **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
  3387. - **accum** (Parameter) - Accumulation to be updated. The shape and dtype must be the same as `var`.
  3388. With float16 or float32 data type.
  3389. - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
  3390. a scalar tensor with float16 or float32 data type.
  3391. - **grad** (Tensor) - A tensor for gradient. The shape and dtype must be the same as `var`.
  3392. With float16 or float32 data type.
  3393. Outputs:
  3394. Tuple of 2 Tensors, the updated parameters.
  3395. - **var** (Tensor) - The same shape and data type as `var`.
  3396. - **accum** (Tensor) - The same shape and data type as `m`.
  3397. Examples:
  3398. >>> import numpy as np
  3399. >>> import mindspore.nn as nn
  3400. >>> from mindspore import Tensor, Parameter
  3401. >>> from mindspore.ops import operations as P
  3402. >>> import mindspore.common.dtype as mstype
  3403. >>> class Net(nn.Cell):
  3404. >>> def __init__(self):
  3405. >>> super(Net, self).__init__()
  3406. >>> self.apply_adagrad_v2 = P.ApplyAdagradV2(epsilon=1e-6)
  3407. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  3408. >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
  3409. >>> def construct(self, lr, grad):
  3410. >>> out = self.apply_adagrad_v2(self.var, self.accum, lr, grad)
  3411. >>> return out
  3412. >>> net = Net()
  3413. >>> lr = Tensor(0.001, mstype.float32)
  3414. >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  3415. >>> result = net(lr, grad)
  3416. """
  3417. __mindspore_signature__ = (
  3418. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3419. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3420. sig.make_sig('lr', dtype=sig.sig_dtype.T1),
  3421. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3422. )
  3423. @prim_attr_register
  3424. def __init__(self, epsilon, update_slots=True):
  3425. validator.check_value_type("epsilon", epsilon, [float], self.name)
  3426. validator.check_value_type("update_slots", update_slots, [bool], self.name)
  3427. def infer_shape(self, var_shape, accum_shape, lr_shape, grad_shape):
  3428. validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
  3429. validator.check('var shape', var_shape, 'grad shape', grad_shape, Rel.EQ, self.name)
  3430. lr_shp_len = len(lr_shape)
  3431. validator.check_int(lr_shp_len, 1, Rel.LE, "lr's rank", self.name)
  3432. if lr_shp_len == 1:
  3433. validator.check_int(lr_shape[0], 1, Rel.EQ, "lr_shape[0]", self.name)
  3434. return var_shape, accum_shape
  3435. def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, grad_dtype):
  3436. args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
  3437. validator.check_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
  3438. validator.check_scalar_or_tensor_type_same({'lr': lr_dtype}, [mstype.float16, mstype.float32], self.name)
  3439. return var_dtype, accum_dtype
  3440. class SparseApplyAdagrad(PrimitiveWithInfer):
  3441. r"""
  3442. Updates relevant entries according to the adagrad scheme.
  3443. .. math::
  3444. accum += grad * grad
  3445. .. math::
  3446. var -= lr * grad * (1 / sqrt(accum))
  3447. Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
  3448. to make the data types consistent.
  3449. If they have different data types, lower priority data type will be converted to
  3450. relatively highest priority data type.
  3451. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3452. Args:
  3453. lr (float): Learning rate.
  3454. update_slots (bool): If `True`, `accum` will be updated. Default: True.
  3455. use_locking (bool): If true, the `var` and `accumulation` tensors will be protected from being updated.
  3456. Default: False.
  3457. Inputs:
  3458. - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
  3459. - **accum** (Parameter) - Accumulation to be updated. The shape and data type must be the same as `var`.
  3460. - **grad** (Tensor) - Gradient. The shape must be the same as `var`'s shape except the first dimension.
  3461. Gradients has the same data type as `var`.
  3462. - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
  3463. The shape of `indices` must be the same as `grad` in first dimension, the type must be int32.
  3464. Outputs:
  3465. Tuple of 2 tensors, the updated parameters.
  3466. - **var** (Tensor) - The same shape and data type as `var`.
  3467. - **accum** (Tensor) - The same shape and data type as `accum`.
  3468. Examples:
  3469. >>> import numpy as np
  3470. >>> import mindspore.nn as nn
  3471. >>> from mindspore import Tensor, Parameter
  3472. >>> from mindspore.ops import operations as P
  3473. >>> import mindspore.common.dtype as mstype
  3474. >>> class Net(nn.Cell):
  3475. >>> def __init__(self):
  3476. >>> super(Net, self).__init__()
  3477. >>> self.sparse_apply_adagrad = P.SparseApplyAdagrad(lr=1e-8)
  3478. >>> self.var = Parameter(Tensor(np.ones([1, 1, 1]).astype(np.float32)), name="var")
  3479. >>> self.accum = Parameter(Tensor(np.ones([1, 1, 1]).astype(np.float32)), name="accum")
  3480. >>> def construct(self, grad, indices):
  3481. >>> out = self.sparse_apply_adagrad(self.var, self.accum, grad, indices)
  3482. >>> return out
  3483. >>> net = Net()
  3484. >>> grad = Tensor(np.random.rand(1, 1, 1).astype(np.float32))
  3485. >>> indices = Tensor([0], mstype.int32)
  3486. >>> result = net(grad, indices)
  3487. ([[[1.0]]], [[[1.0]]])
  3488. """
  3489. __mindspore_signature__ = (
  3490. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3491. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3492. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3493. sig.make_sig('indices', dtype=sig.sig_dtype.T1),
  3494. )
  3495. @prim_attr_register
  3496. def __init__(self, lr, update_slots=True, use_locking=False):
  3497. validator.check_value_type("lr", lr, [float], self.name)
  3498. validator.check_is_float(lr, "lr", self.name)
  3499. validator.check_value_type("update_slots", update_slots, [bool], self.name)
  3500. validator.check_value_type("use_locking", use_locking, [bool], self.name)
  3501. def infer_shape(self, var_shape, accum_shape, grad_shape, indices_shape):
  3502. validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
  3503. validator.check('len of var shape', len(var_shape), 'len of grad shape', len(grad_shape), Rel.EQ, self.name)
  3504. if len(var_shape) > 1:
  3505. validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name)
  3506. validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
  3507. validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
  3508. return var_shape, accum_shape
  3509. def infer_dtype(self, var_type, accum_type, grad_type, indices_type):
  3510. args = {'var': var_type, 'accum': accum_type, 'grad': grad_type}
  3511. validator.check_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
  3512. validator.check_tensor_type_same({'indices': indices_type}, [mstype.int32], self.name)
  3513. return var_type, accum_type
  3514. class SparseApplyAdagradV2(PrimitiveWithInfer):
  3515. r"""
  3516. Updates relevant entries according to the adagrad scheme.
  3517. .. math::
  3518. accum += grad * grad
  3519. .. math::
  3520. var -= lr * grad * \frac{1}{\sqrt{accum} + \epsilon}
  3521. Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
  3522. to make the data types consistent.
  3523. If they have different data types, lower priority data type will be converted to
  3524. relatively highest priority data type.
  3525. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3526. Args:
  3527. lr (float): Learning rate.
  3528. epsilon (float): A small value added for numerical stability.
  3529. use_locking (bool): If `True`, the `var` and `accum` tensors will be protected from being updated.
  3530. Default: False.
  3531. update_slots (bool): If `True`, the computation logic will be different to `False`. Default: True.
  3532. Inputs:
  3533. - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
  3534. - **accum** (Parameter) - Accumulation to be updated. The shape and data type must be the same as `var`.
  3535. - **grad** (Tensor) - Gradient. The shape must be the same as `var`'s shape except the first dimension.
  3536. Gradients has the same data type as `var`.
  3537. - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
  3538. The shape of `indices` must be the same as `grad` in first dimension, the type must be int32.
  3539. Outputs:
  3540. Tuple of 2 tensors, the updated parameters.
  3541. - **var** (Tensor) - The same shape and data type as `var`.
  3542. - **accum** (Tensor) - The same shape and data type as `accum`.
  3543. Examples:
  3544. >>> import numpy as np
  3545. >>> import mindspore.nn as nn
  3546. >>> from mindspore import Tensor, Parameter
  3547. >>> from mindspore.ops import operations as P
  3548. >>> import mindspore.common.dtype as mstype
  3549. >>> class Net(nn.Cell):
  3550. >>> def __init__(self):
  3551. >>> super(Net, self).__init__()
  3552. >>> self.sparse_apply_adagrad_v2 = P.SparseApplyAdagradV2(lr=1e-8, epsilon=1e-6)
  3553. >>> self.var = Parameter(Tensor(np.ones([1, 1, 1]).astype(np.float32)), name="var")
  3554. >>> self.accum = Parameter(Tensor(np.ones([1, 1, 1]).astype(np.float32)), name="accum")
  3555. >>>
  3556. >>> def construct(self, grad, indices):
  3557. >>> out = self.sparse_apply_adagrad_v2(self.var, self.accum, grad, indices)
  3558. >>> return out
  3559. >>> net = Net()
  3560. >>> grad = Tensor(np.random.rand(1, 1, 1).astype(np.float32))
  3561. >>> indices = Tensor([0], mstype.int32)
  3562. >>> result = net(grad, indices)
  3563. ([[[1.0]]], [[[1.67194188]]])
  3564. """
  3565. __mindspore_signature__ = (
  3566. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3567. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3568. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3569. sig.make_sig('indices', dtype=sig.sig_dtype.T1),
  3570. )
  3571. @prim_attr_register
  3572. def __init__(self, lr, epsilon, use_locking=False, update_slots=True):
  3573. self.lr = validator.check_value_type("lr", lr, [float], self.name)
  3574. self.epsilon = validator.check_value_type("epsilon", epsilon, [float], self.name)
  3575. self.use_locking = validator.check_value_type("update_slots", update_slots, [bool], self.name)
  3576. self.update_slots = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  3577. def infer_shape(self, var_shape, accum_shape, grad_shape, indices_shape):
  3578. validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
  3579. validator.check('len of var shape', len(var_shape), 'len of grad shape', len(grad_shape), Rel.EQ, self.name)
  3580. if len(var_shape) > 1:
  3581. validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name)
  3582. validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
  3583. validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
  3584. return var_shape, accum_shape
  3585. def infer_dtype(self, var_type, accum_type, grad_type, indices_type):
  3586. args = {'var': var_type, 'accum': accum_type, 'grad': grad_type}
  3587. validator.check_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
  3588. validator.check_tensor_type_same({'indices': indices_type}, [mstype.int32], self.name)
  3589. return var_type, accum_type
  3590. class ApplyProximalAdagrad(PrimitiveWithInfer):
  3591. r"""
  3592. Updates relevant entries according to the proximal adagrad algorithm.
  3593. .. math::
  3594. accum += grad * grad
  3595. .. math::
  3596. \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}}
  3597. .. math::
  3598. var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
  3599. Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
  3600. to make the data types consistent.
  3601. If they have different data types, lower priority data type will be converted to
  3602. relatively highest priority data type.
  3603. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3604. Args:
  3605. use_locking (bool): If true, the var and accumulation tensors will be protected from being updated.
  3606. Default: False.
  3607. Inputs:
  3608. - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
  3609. - **accum** (Parameter) - Accumulation to be updated. Must has the same shape and dtype as `var`.
  3610. - **lr** (Union[Number, Tensor]) - The learning rate value, must be scalar. The data type must be
  3611. float16 or float32.
  3612. - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be scalar. The data type must be
  3613. float16 or float32.
  3614. - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be scalar. The data type must be
  3615. float16 or float32.
  3616. - **grad** (Tensor) - Gradient with the same shape and dtype as `var`.
  3617. Outputs:
  3618. Tuple of 2 Tensors, the updated parameters.
  3619. - **var** (Tensor) - The same shape and data type as `var`.
  3620. - **accum** (Tensor) - The same shape and data type as `accum`.
  3621. Examples:
  3622. >>> import numpy as np
  3623. >>> import mindspore.nn as nn
  3624. >>> from mindspore import Tensor, Parameter
  3625. >>> from mindspore.ops import operations as P
  3626. >>> class Net(nn.Cell):
  3627. >>> def __init__(self):
  3628. >>> super(Net, self).__init__()
  3629. >>> self.apply_proximal_adagrad = P.ApplyProximalAdagrad()
  3630. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  3631. >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
  3632. >>> self.lr = 0.01
  3633. >>> self.l1 = 0.0
  3634. >>> self.l2 = 0.0
  3635. >>> def construct(self, grad):
  3636. >>> out = self.apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1, self.l2, grad)
  3637. >>> return out
  3638. >>> net = Net()
  3639. >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  3640. >>> output = net(grad)
  3641. """
  3642. __mindspore_signature__ = (
  3643. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3644. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3645. sig.make_sig('lr', dtype=sig.sig_dtype.T1),
  3646. sig.make_sig('l1', dtype=sig.sig_dtype.T2),
  3647. sig.make_sig('l2', dtype=sig.sig_dtype.T3),
  3648. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3649. )
  3650. @prim_attr_register
  3651. def __init__(self, use_locking=False):
  3652. self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad'],
  3653. outputs=['var', 'accum'])
  3654. self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  3655. def infer_shape(self, var_shape, accum_shape, lr_shape, l1_shape, l2_shape, grad_shape):
  3656. validator.check('accum shape', accum_shape, 'var shape', var_shape, Rel.EQ, self.name)
  3657. validator.check('grad shape', grad_shape, 'var shape', var_shape, Rel.EQ, self.name)
  3658. lr_shp_len = len(lr_shape)
  3659. validator.check_int(lr_shp_len, 1, Rel.LE, "lr's rank", self.name)
  3660. if lr_shp_len == 1:
  3661. validator.check_int(lr_shape[0], 1, Rel.EQ, "lr_shape[0]", self.name)
  3662. l1_shp_len = len(l1_shape)
  3663. validator.check_int(l1_shp_len, 1, Rel.LE, "l1's rank", self.name)
  3664. if l1_shp_len == 1:
  3665. validator.check_int(l1_shape[0], 1, Rel.EQ, "l1_shape[0]", self.name)
  3666. l2_shp_len = len(l2_shape)
  3667. validator.check_int(l2_shp_len, 1, Rel.LE, "l2's rank", self.name)
  3668. if l2_shp_len == 1:
  3669. validator.check_int(l2_shape[0], 1, Rel.EQ, "l2_shape[0]", self.name)
  3670. return var_shape, accum_shape
  3671. def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, l1_dtype, l2_dtype, grad_dtype):
  3672. valid_types = [mstype.float16, mstype.float32]
  3673. args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
  3674. validator.check_tensor_type_same(args, valid_types, self.name)
  3675. validator.check_scalar_or_tensor_type_same({"lr": lr_dtype}, valid_types, self.name)
  3676. validator.check_scalar_or_tensor_type_same({"l1": l1_dtype}, valid_types, self.name)
  3677. validator.check_scalar_or_tensor_type_same({"l2": l2_dtype}, valid_types, self.name)
  3678. return var_dtype, accum_dtype
  3679. class SparseApplyProximalAdagrad(PrimitiveWithCheck):
  3680. r"""
  3681. Updates relevant entries according to the proximal adagrad algorithm. Compared with ApplyProximalAdagrad,
  3682. an additional index tensor is input.
  3683. .. math::
  3684. accum += grad * grad
  3685. .. math::
  3686. \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}}
  3687. .. math::
  3688. var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
  3689. Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
  3690. to make the data types consistent.
  3691. If they have different data types, lower priority data type will be converted to
  3692. relatively highest priority data type.
  3693. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3694. Args:
  3695. use_locking (bool): If true, the `var` and `accum` tensors will be protected from being updated.
  3696. Default: False.
  3697. Inputs:
  3698. - **var** (Parameter) - Variable tensor to be updated. The data type must be float16 or float32.
  3699. - **accum** (Parameter) - Variable tensor to be updated, has the same dtype as `var`.
  3700. - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
  3701. a scalar tensor with float16 or float32 data type.
  3702. - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a float number or
  3703. a scalar tensor with float16 or float32 data type.
  3704. - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a float number or
  3705. a scalar tensor with float16 or float32 data type..
  3706. - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient.
  3707. - **indices** (Tensor) - A vector of indices in the first dimension of `var` and `accum`.
  3708. Outputs:
  3709. Tuple of 2 tensors, the updated parameters.
  3710. - **var** (Tensor) - The same shape and data type as `var`.
  3711. - **accum** (Tensor) - The same shape and data type as `accum`.
  3712. Examples:
  3713. >>> import numpy as np
  3714. >>> import mindspore.nn as nn
  3715. >>> from mindspore import Tensor, Parameter
  3716. >>> from mindspore.ops import operations as P
  3717. >>> class Net(nn.Cell):
  3718. >>> def __init__(self):
  3719. >>> super(Net, self).__init__()
  3720. >>> self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad()
  3721. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  3722. >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
  3723. >>> self.lr = 0.01
  3724. >>> self.l1 = 0.0
  3725. >>> self.l2 = 0.0
  3726. >>> def construct(self, grad, indices):
  3727. >>> out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1,
  3728. self.l2, grad, indices)
  3729. >>> return out
  3730. >>> net = Net()
  3731. >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  3732. >>> indices = Tensor(np.ones((3,), np.int32))
  3733. >>> output = net(grad, indices)
  3734. """
  3735. __mindspore_signature__ = (
  3736. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3737. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3738. sig.make_sig('lr', dtype=sig.sig_dtype.T1),
  3739. sig.make_sig('l1', dtype=sig.sig_dtype.T2),
  3740. sig.make_sig('l2', dtype=sig.sig_dtype.T3),
  3741. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3742. sig.make_sig('indices', dtype=sig.sig_dtype.T4),
  3743. )
  3744. @prim_attr_register
  3745. def __init__(self, use_locking=False):
  3746. self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad', 'indices'],
  3747. outputs=['var', 'accum'])
  3748. self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  3749. def check_shape(self, var_shape, accum_shape, lr_shape, l1_shape, l2_shape, grad_shape, indices_shape):
  3750. validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
  3751. def check_dtype(self, var_dtype, accum_dtype, lr_dtype, l1_dtype, l2_dtype, grad_dtype, indices_dtype):
  3752. args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
  3753. validator.check_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
  3754. validator.check_scalar_or_tensor_type_same({"lr": lr_dtype}, [mstype.float16, mstype.float32], self.name)
  3755. validator.check_scalar_or_tensor_type_same({"l1": l1_dtype}, [mstype.float16, mstype.float32], self.name)
  3756. validator.check_scalar_or_tensor_type_same({"l2": l2_dtype}, [mstype.float16, mstype.float32], self.name)
  3757. valid_types = [mstype.int16, mstype.int32, mstype.int64,
  3758. mstype.uint16, mstype.uint32, mstype.uint64]
  3759. validator.check_tensor_type_same({'indices': indices_dtype}, valid_types, self.name)
  3760. class ApplyAddSign(PrimitiveWithInfer):
  3761. r"""
  3762. Updates relevant entries according to the AddSign algorithm.
  3763. .. math::
  3764. \begin{array}{ll} \\
  3765. m_{t} = \beta * m_{t-1} + (1 - \beta) * g \\
  3766. \text{update} = (\alpha + \text{sign_decay} * sign(g) * sign(m)) * g \\
  3767. var = var - lr_{t} * \text{update}
  3768. \end{array}
  3769. :math:`t` represents updating step while :math:`m` represents the 1st moment vector, :math:`m_{t-1}`
  3770. is the last momentent of :math:`m_{t}`, :math:`lr` represents scaling factor `lr`, :math:`g` represents `grad`.
  3771. Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
  3772. to make the data types consistent.
  3773. If they have different data types, lower priority data type will be converted to
  3774. relatively highest priority data type.
  3775. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3776. Inputs:
  3777. - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
  3778. - **m** (Parameter) - Variable tensor to be updated, has the same dtype as `var`.
  3779. - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar.
  3780. With float32 or float16 data type.
  3781. - **alpha** (Union[Number, Tensor]) - Must be a scalar. With float32 or float16 data type.
  3782. - **sign_decay** (Union[Number, Tensor]) - Must be a scalar. With float32 or float16 data type.
  3783. - **beta** (Union[Number, Tensor]) - The exponential decay rate, must be a scalar.
  3784. With float32 or float16 data type.
  3785. - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient.
  3786. Outputs:
  3787. Tuple of 2 Tensors, the updated parameters.
  3788. - **var** (Tensor) - The same shape and data type as `var`.
  3789. - **m** (Tensor) - The same shape and data type as `m`.
  3790. Examples:
  3791. >>> import numpy as np
  3792. >>> import mindspore.nn as nn
  3793. >>> from mindspore import Tensor, Parameter
  3794. >>> from mindspore.ops import operations as P
  3795. >>> class Net(nn.Cell):
  3796. >>> def __init__(self):
  3797. >>> super(Net, self).__init__()
  3798. >>> self.apply_add_sign = P.ApplyAddSign()
  3799. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  3800. >>> self.m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m")
  3801. >>> self.lr = 0.001
  3802. >>> self.alpha = 1.0
  3803. >>> self.sign_decay = 0.99
  3804. >>> self.beta = 0.9
  3805. >>> def construct(self, grad):
  3806. >>> out = self.apply_add_sign(self.var, self.m, self.lr, self.alpha, self.sign_decay, self.beta, grad)
  3807. >>> return out
  3808. >>> net = Net()
  3809. >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  3810. >>> output = net(grad)
  3811. """
  3812. __mindspore_signature__ = (
  3813. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3814. sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3815. sig.make_sig('lr', dtype=sig.sig_dtype.T1),
  3816. sig.make_sig('alpha', dtype=sig.sig_dtype.T2),
  3817. sig.make_sig('sign_decay', dtype=sig.sig_dtype.T3),
  3818. sig.make_sig('beta', dtype=sig.sig_dtype.T3),
  3819. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3820. )
  3821. @prim_attr_register
  3822. def __init__(self):
  3823. "Initialize ApplyAddSign"
  3824. def infer_shape(self, var_shape, m_shape, lr_shape, alpha_shape, sign_decay_shape, beta_shape, grad_shape):
  3825. validator.check('m_shape', m_shape, 'var_shape', var_shape, Rel.EQ, self.name)
  3826. validator.check('grad_shape', grad_shape, 'var_shape', var_shape, Rel.EQ, self.name)
  3827. lr_shape_len = len(lr_shape)
  3828. validator.check_int(lr_shape_len, 1, Rel.LE, "lr's rank", self.name)
  3829. if lr_shape_len == 1:
  3830. validator.check_int(lr_shape[0], 1, Rel.EQ, "lr_shape[0]", self.name)
  3831. alpha_shape_len = len(alpha_shape)
  3832. validator.check_int(alpha_shape_len, 1, Rel.LE, "alpha's rank", self.name)
  3833. if alpha_shape_len == 1:
  3834. validator.check_int(alpha_shape[0], 1, Rel.EQ, "alpha_shape[0]", self.name)
  3835. sign_decay_shape_len = len(sign_decay_shape)
  3836. validator.check_int(sign_decay_shape_len, 1, Rel.LE, "sign_decay's rank", self.name)
  3837. if sign_decay_shape_len == 1:
  3838. validator.check_int(sign_decay_shape[0], 1, Rel.EQ, "sign_decay_shape[0]", self.name)
  3839. beta_shape_len = len(beta_shape)
  3840. validator.check_int(beta_shape_len, 1, Rel.LE, "beta's rank", self.name)
  3841. if beta_shape_len == 1:
  3842. validator.check_int(beta_shape[0], 1, Rel.EQ, "beta_shape[0]", self.name)
  3843. return var_shape, m_shape
  3844. def infer_dtype(self, var_dtype, m_dtype, lr_dtype, alpha_dtype, sign_decay_dtype, beta_dtype, grad_dtype):
  3845. valid_types = [mstype.float16, mstype.float32]
  3846. args = {'var': var_dtype, 'm': m_dtype, 'grad': grad_dtype}
  3847. validator.check_tensor_type_same(args, valid_types, self.name)
  3848. validator.check_scalar_or_tensor_type_same({"lr": lr_dtype}, valid_types, self.name)
  3849. validator.check_scalar_or_tensor_type_same({"alpha": alpha_dtype}, valid_types, self.name)
  3850. validator.check_scalar_or_tensor_type_same({"sign_decay": sign_decay_dtype}, valid_types, self.name)
  3851. validator.check_scalar_or_tensor_type_same({"beta": beta_dtype}, valid_types, self.name)
  3852. return var_dtype, m_dtype
  3853. class ApplyPowerSign(PrimitiveWithInfer):
  3854. r"""
  3855. Updates relevant entries according to the AddSign algorithm.
  3856. .. math::
  3857. \begin{array}{ll} \\
  3858. m_{t} = \beta * m_{t-1} + (1 - \beta) * g \\
  3859. \text{update} = \exp(\text{logbase} * \text{sign_decay} * sign(g) * sign(m)) * g \\
  3860. var = var - lr_{t} * \text{update}
  3861. \end{array}
  3862. :math:`t` represents updating step while :math:`m` represents the 1st moment vector, :math:`m_{t-1}`
  3863. is the last momentent of :math:`m_{t}`, :math:`lr` represents scaling factor `lr`, :math:`g` represents `grad`.
  3864. All of inputs comply with the implicit type conversion rules to make the data types consistent.
  3865. If `lr`, `logbase`, `sign_decay` or `beta` is a number, the number is automatically converted to Tensor,
  3866. and the data type is consistent with the Tensor data type involved in the operation.
  3867. If inputs are tensors and have different data types, lower priority data type will be converted to
  3868. relatively highest priority data type.
  3869. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3870. Inputs:
  3871. - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
  3872. If data type of `var` is float16, all inputs must have the same data type as `var`.
  3873. - **m** (Parameter) - Variable tensor to be updated, has the same dtype as `var`.
  3874. - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar.
  3875. With float32 or float16 data type.
  3876. - **logbase** (Union[Number, Tensor]) - Must be a scalar. With float32 or float16 data type.
  3877. - **sign_decay** (Union[Number, Tensor]) - Must be a scalar. With float32 or float16 data type.
  3878. - **beta** (Union[Number, Tensor]) - The exponential decay rate, must be a scalar.
  3879. With float32 or float16 data type.
  3880. - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient.
  3881. Outputs:
  3882. Tuple of 2 Tensors, the updated parameters.
  3883. - **var** (Tensor) - The same shape and data type as `var`.
  3884. - **m** (Tensor) - The same shape and data type as `m`.
  3885. Examples:
  3886. >>> import numpy as np
  3887. >>> import mindspore.nn as nn
  3888. >>> from mindspore import Tensor, Parameter
  3889. >>> from mindspore.ops import operations as P
  3890. >>> class Net(nn.Cell):
  3891. >>> def __init__(self):
  3892. >>> super(Net, self).__init__()
  3893. >>> self.apply_power_sign = P.ApplyPowerSign()
  3894. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  3895. >>> self.m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m")
  3896. >>> self.lr = 0.001
  3897. >>> self.logbase = np.e
  3898. >>> self.sign_decay = 0.99
  3899. >>> self.beta = 0.9
  3900. >>> def construct(self, grad):
  3901. >>> out = self.apply_power_sign(self.var, self.m, self.lr, self.logbase,
  3902. self.sign_decay, self.beta, grad)
  3903. >>> return out
  3904. >>> net = Net()
  3905. >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  3906. >>> output = net(grad)
  3907. """
  3908. __mindspore_signature__ = (
  3909. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3910. sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3911. sig.make_sig('lr', dtype=sig.sig_dtype.T),
  3912. sig.make_sig('logbase', dtype=sig.sig_dtype.T),
  3913. sig.make_sig('sign_decay', dtype=sig.sig_dtype.T),
  3914. sig.make_sig('beta', dtype=sig.sig_dtype.T),
  3915. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  3916. )
  3917. @prim_attr_register
  3918. def __init__(self):
  3919. "Initialize ApplyPowerSign"
  3920. def infer_shape(self, var_shape, m_shape, lr_shape, logbase_shape, sign_decay_shape, beta_shape, grad_shape):
  3921. validator.check('m_shape', m_shape, 'var_shape', var_shape, Rel.EQ, self.name)
  3922. validator.check('grad_shape', grad_shape, 'var_shape', var_shape, Rel.EQ, self.name)
  3923. lr_shape_len = len(lr_shape)
  3924. validator.check_int(lr_shape_len, 1, Rel.LE, "lr's rank", self.name)
  3925. if lr_shape_len == 1:
  3926. validator.check_int(lr_shape[0], 1, Rel.EQ, "lr_shape[0]", self.name)
  3927. logbase_shape_len = len(logbase_shape)
  3928. validator.check_int(logbase_shape_len, 1, Rel.LE, "logbase's rank", self.name)
  3929. if logbase_shape_len == 1:
  3930. validator.check_int(logbase_shape[0], 1, Rel.EQ, "logbase_shape[0]", self.name)
  3931. sign_decay_shape_len = len(sign_decay_shape)
  3932. validator.check_int(sign_decay_shape_len, 1, Rel.LE, "sign_decay's rank", self.name)
  3933. if sign_decay_shape_len == 1:
  3934. validator.check_int(sign_decay_shape[0], 1, Rel.EQ, "sign_decay_shape[0]", self.name)
  3935. beta_shape_len = len(beta_shape)
  3936. validator.check_int(beta_shape_len, 1, Rel.LE, "beta's rank", self.name)
  3937. if beta_shape_len == 1:
  3938. validator.check_int(beta_shape[0], 1, Rel.EQ, "beta_shape[0]", self.name)
  3939. return var_shape, m_shape
  3940. def infer_dtype(self, var_dtype, m_dtype, lr_dtype, logbase_dtype, sign_decay_dtype, beta_dtype, grad_dtype):
  3941. valid_types = [mstype.float16, mstype.float32]
  3942. args = {'var': var_dtype, 'm': m_dtype, 'grad': grad_dtype}
  3943. validator.check_tensor_type_same(args, valid_types, self.name)
  3944. validator.check_scalar_or_tensor_type_same({"lr": lr_dtype}, valid_types, self.name)
  3945. validator.check_scalar_or_tensor_type_same({"logbase": logbase_dtype}, valid_types, self.name)
  3946. validator.check_scalar_or_tensor_type_same({"sign_decay": sign_decay_dtype}, valid_types, self.name)
  3947. validator.check_scalar_or_tensor_type_same({"beta": beta_dtype}, valid_types, self.name)
  3948. return var_dtype, m_dtype
  3949. class ApplyGradientDescent(PrimitiveWithInfer):
  3950. r"""
  3951. Updates relevant entries according to the following formula.
  3952. .. math::
  3953. var = var - \alpha * \delta
  3954. Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent.
  3955. If they have different data types, lower priority data type will be converted to
  3956. relatively highest priority data type.
  3957. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  3958. Inputs:
  3959. - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
  3960. - **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
  3961. - **delta** (Tensor) - A tensor for the change, has the same type as `var`.
  3962. Outputs:
  3963. Tensor, represents the updated `var`.
  3964. Examples:
  3965. >>> import numpy as np
  3966. >>> import mindspore.nn as nn
  3967. >>> from mindspore import Tensor, Parameter
  3968. >>> from mindspore.ops import operations as P
  3969. >>> class Net(nn.Cell):
  3970. >>> def __init__(self):
  3971. >>> super(Net, self).__init__()
  3972. >>> self.apply_gradient_descent = P.ApplyGradientDescent()
  3973. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  3974. >>> self.alpha = 0.001
  3975. >>> def construct(self, delta):
  3976. >>> out = self.apply_gradient_descent(self.var, self.alpha, delta)
  3977. >>> return out
  3978. >>> net = Net()
  3979. >>> delta = Tensor(np.random.rand(3, 3).astype(np.float32))
  3980. >>> output = net(delta)
  3981. """
  3982. __mindspore_signature__ = (
  3983. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  3984. sig.make_sig('alpha', dtype=sig.sig_dtype.T1),
  3985. sig.make_sig('delta', dtype=sig.sig_dtype.T),
  3986. )
  3987. @prim_attr_register
  3988. def __init__(self):
  3989. "Initialize ApplyGradientDescent"
  3990. def infer_shape(self, var_shape, alpha_shape, delta_shape):
  3991. validator.check('delta shape', delta_shape, 'var shape', var_shape, Rel.EQ, self.name)
  3992. alpha_shape_len = len(alpha_shape)
  3993. validator.check_int(alpha_shape_len, 1, Rel.LE, "alpha's rank", self.name)
  3994. if alpha_shape_len == 1:
  3995. validator.check_int(alpha_shape[0], 1, Rel.EQ, "alpha_shape[0]", self.name)
  3996. return var_shape
  3997. def infer_dtype(self, var_dtype, alpha_dtype, delta_dtype):
  3998. valid_types = [mstype.float16, mstype.float32]
  3999. args = {'var': var_dtype, 'delta': delta_dtype}
  4000. validator.check_tensor_type_same(args, valid_types, self.name)
  4001. validator.check_scalar_or_tensor_type_same({"alpha": alpha_dtype}, valid_types, self.name)
  4002. return var_dtype
  4003. class ApplyProximalGradientDescent(PrimitiveWithInfer):
  4004. r"""
  4005. Updates relevant entries according to the FOBOS(Forward Backward Splitting) algorithm.
  4006. .. math::
  4007. \text{prox_v} = var - \alpha * \delta
  4008. .. math::
  4009. var = \frac{sign(\text{prox_v})}{1 + \alpha * l2} * \max(\left| \text{prox_v} \right| - alpha * l1, 0)
  4010. Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent.
  4011. If they have different data types, lower priority data type will be converted to
  4012. relatively highest priority data type.
  4013. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  4014. Inputs:
  4015. - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
  4016. - **alpha** (Union[Number, Tensor]) - Saling factor, must be a scalar. With float32 or float16 data type.
  4017. - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be scalar.
  4018. With float32 or float16 data type.
  4019. - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be scalar.
  4020. With float32 or float16 data type.
  4021. - **delta** (Tensor) - A tensor for the change, has the same type as `var`.
  4022. Outputs:
  4023. Tensor, represents the updated `var`.
  4024. Examples:
  4025. >>> import numpy as np
  4026. >>> import mindspore.nn as nn
  4027. >>> from mindspore import Tensor, Parameter
  4028. >>> from mindspore.ops import operations as P
  4029. >>> class Net(nn.Cell):
  4030. >>> def __init__(self):
  4031. >>> super(Net, self).__init__()
  4032. >>> self.apply_proximal_gradient_descent = P.ApplyProximalGradientDescent()
  4033. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  4034. >>> self.alpha = 0.001
  4035. >>> self.l1 = 0.0
  4036. >>> self.l2 = 0.0
  4037. >>> def construct(self, delta):
  4038. >>> out = self.apply_proximal_gradient_descent(self.var, self.alpha, self.l1, self.l2, delta)
  4039. >>> return out
  4040. >>> net = Net()
  4041. >>> delta = Tensor(np.random.rand(3, 3).astype(np.float32))
  4042. >>> output = net(delta)
  4043. """
  4044. __mindspore_signature__ = (
  4045. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  4046. sig.make_sig('alpha', dtype=sig.sig_dtype.T1),
  4047. sig.make_sig('l1', dtype=sig.sig_dtype.T2),
  4048. sig.make_sig('l2', dtype=sig.sig_dtype.T3),
  4049. sig.make_sig('delta', dtype=sig.sig_dtype.T),
  4050. )
  4051. @prim_attr_register
  4052. def __init__(self):
  4053. "Initialize ApplyGradientDescent"
  4054. def infer_shape(self, var_shape, alpha_shape, l1_shape, l2_shape, delta_shape):
  4055. validator.check('delta shape', delta_shape, 'var shape', var_shape, Rel.EQ, self.name)
  4056. alpha_shape_len = len(alpha_shape)
  4057. validator.check_int(alpha_shape_len, 1, Rel.LE, "alpha's rank", self.name)
  4058. if alpha_shape_len == 1:
  4059. validator.check_int(alpha_shape[0], 1, Rel.EQ, "alpha_shape[0]", self.name)
  4060. l1_shape_len = len(l1_shape)
  4061. validator.check_int(l1_shape_len, 1, Rel.LE, "l1's rank", self.name)
  4062. if l1_shape_len == 1:
  4063. validator.check_int(l1_shape[0], 1, Rel.EQ, "l1_shape[0]", self.name)
  4064. l2_shape_len = len(l2_shape)
  4065. validator.check_int(l2_shape_len, 1, Rel.LE, "l2's rank", self.name)
  4066. if l2_shape_len == 1:
  4067. validator.check_int(l2_shape[0], 1, Rel.EQ, "l2_shape[0]", self.name)
  4068. return var_shape
  4069. def infer_dtype(self, var_dtype, alpha_dtype, l1_dtype, l2_dtype, delta_dtype):
  4070. valid_types = [mstype.float16, mstype.float32]
  4071. args = {'var': var_dtype, 'delta': delta_dtype}
  4072. validator.check_tensor_type_same(args, valid_types, self.name)
  4073. validator.check_scalar_or_tensor_type_same({"alpha": alpha_dtype}, valid_types, self.name)
  4074. validator.check_scalar_or_tensor_type_same({"l1": l1_dtype}, valid_types, self.name)
  4075. validator.check_scalar_or_tensor_type_same({"l2": l2_dtype}, valid_types, self.name)
  4076. return var_dtype
  4077. class LARSUpdate(PrimitiveWithInfer):
  4078. """
  4079. Conducts lars (layer-wise adaptive rate scaling) update on the sum of squares of gradient.
  4080. Args:
  4081. epsilon (float): Term added to the denominator to improve numerical stability. Default: 1e-05.
  4082. hyperpara (float): Trust coefficient for calculating the local learning rate. Default: 0.001.
  4083. use_clip (bool): Whether to use clip operation for calculating the local learning rate. Default: False.
  4084. Inputs:
  4085. - **weight** (Tensor) - The weight to be updated.
  4086. - **gradient** (Tensor) - The gradient of weight, which has the same shape and dtype with weight.
  4087. - **norm_weight** (Tensor) - A scalar tensor, representing the sum of squares of weight.
  4088. - **norm_gradient** (Tensor) - A scalar tensor, representing the sum of squares of gradient.
  4089. - **weight_decay** (Union[Number, Tensor]) - Weight decay. It must be a scalar tensor or number.
  4090. - **learning_rate** (Union[Number, Tensor]) - Learning rate. It must be a scalar tensor or number.
  4091. Outputs:
  4092. Tensor, represents the new gradient.
  4093. Examples:
  4094. >>> from mindspore import Tensor
  4095. >>> from mindspore.ops import operations as P
  4096. >>> from mindspore.ops import functional as F
  4097. >>> import mindspore.nn as nn
  4098. >>> import numpy as np
  4099. >>> class Net(nn.Cell):
  4100. >>> def __init__(self):
  4101. >>> super(Net, self).__init__()
  4102. >>> self.lars = P.LARSUpdate()
  4103. >>> self.reduce = P.ReduceSum()
  4104. >>> def construct(self, weight, gradient):
  4105. >>> w_square_sum = self.reduce(F.square(weight))
  4106. >>> grad_square_sum = self.reduce(F.square(gradient))
  4107. >>> grad_t = self.lars(weight, gradient, w_square_sum, grad_square_sum, 0.0, 1.0)
  4108. >>> return grad_t
  4109. >>> weight = np.random.random(size=(2, 3)).astype(np.float32)
  4110. >>> gradient = np.random.random(size=(2, 3)).astype(np.float32)
  4111. >>> net = Net()
  4112. >>> ms_output = net(Tensor(weight), Tensor(gradient))
  4113. """
  4114. @prim_attr_register
  4115. def __init__(self, epsilon=1e-05, hyperpara=0.001, use_clip=False):
  4116. """init"""
  4117. validator.check_value_type("epsilon", epsilon, [float], self.name)
  4118. validator.check_value_type("hyperpara", hyperpara, [float], self.name)
  4119. validator.check_value_type("use_clip", use_clip, [bool], self.name)
  4120. def infer_shape(self, weight_shape, gradient_shape, norm_weight_shape, norm_gradient_shape, weight_decay_shape,
  4121. learning_rate_shape):
  4122. validator.check("weight shape", weight_shape, "gradient shape", gradient_shape, Rel.EQ, self.name)
  4123. validator.check("norm weight shape", norm_weight_shape, "norm gradient shape", norm_gradient_shape, Rel.EQ,
  4124. self.name)
  4125. shp_len = len(weight_decay_shape)
  4126. validator.check_int(shp_len, 1, Rel.LE, "weight decay's rank", self.name)
  4127. if shp_len == 1:
  4128. validator.check_int(weight_decay_shape[0], 1, Rel.EQ, "weight_decay_shape[0]", self.name)
  4129. shp_len = len(learning_rate_shape)
  4130. validator.check_int(shp_len, 1, Rel.LE, "learning rate's rank", self.name)
  4131. if shp_len == 1:
  4132. validator.check_int(learning_rate_shape[0], 1, Rel.EQ, "learning_rate_shape[0]", self.name)
  4133. return weight_shape
  4134. def infer_dtype(self, weight_dtype, gradient_dtype, norm_weight_dtype, norm_gradient_dtype,
  4135. weight_decay_dtype, learning_rate_dtype):
  4136. args = {"Weight dtype": weight_dtype, "gradient dtype": gradient_dtype, "norm weight dtype": norm_weight_dtype,
  4137. "norm gradient dtype": norm_gradient_dtype}
  4138. validator.check_tensor_type_same(args, [mstype.float16, mstype.float32, mstype.int16, mstype.int32], self.name)
  4139. validator.check_scalar_or_tensor_type_same({"weight_decay": weight_decay_dtype},
  4140. [mstype.float16, mstype.float32, mstype.float64], self.name)
  4141. validator.check_scalar_or_tensor_type_same({"learning_rate": learning_rate_dtype},
  4142. [mstype.float16, mstype.float32, mstype.float64], self.name)
  4143. return weight_dtype
  4144. class ApplyFtrl(PrimitiveWithInfer):
  4145. """
  4146. Updates relevant entries according to the FTRL scheme.
  4147. Args:
  4148. use_locking (bool): Use locks for updating operation if true . Default: False.
  4149. Inputs:
  4150. - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
  4151. - **accum** (Parameter) - The accumulation to be updated, must be same type and shape as `var`.
  4152. - **linear** (Parameter) - the linear coefficient to be updated, must be same type and shape as `var`.
  4153. - **grad** (Tensor) - Gradient. The data type must be float16 or float32.
  4154. - **lr** (Union[Number, Tensor]) - The learning rate value, must be positive. Default: 0.001.
  4155. It must be a float number or a scalar tensor with float16 or float32 data type.
  4156. - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be greater than or equal to zero.
  4157. Default: 0.0. It must be a float number or a scalar tensor with float16 or float32 data type.
  4158. - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be greater than or equal to zero.
  4159. Default: 0.0. It must be a float number or a scalar tensor with float16 or float32 data type.
  4160. - **lr_power** (Union[Number, Tensor]) - Learning rate power controls how the learning rate decreases
  4161. during training, must be less than or equal to zero. Use fixed learning rate if lr_power is zero.
  4162. Default: -0.5. It must be a float number or a scalar tensor with float16 or float32 data type.
  4163. Outputs:
  4164. Tensor, represents the updated `var`.
  4165. Examples:
  4166. >>> import mindspore
  4167. >>> import mindspore.nn as nn
  4168. >>> import numpy as np
  4169. >>> from mindspore import Parameter
  4170. >>> from mindspore import Tensor
  4171. >>> from mindspore.ops import operations as P
  4172. >>> class ApplyFtrlNet(nn.Cell):
  4173. >>> def __init__(self):
  4174. >>> super(ApplyFtrlNet, self).__init__()
  4175. >>> self.apply_ftrl = P.ApplyFtrl()
  4176. >>> self.lr = 0.001
  4177. >>> self.l1 = 0.0
  4178. >>> self.l2 = 0.0
  4179. >>> self.lr_power = -0.5
  4180. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  4181. >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
  4182. >>> self.linear = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="linear")
  4183. >>>
  4184. >>> def construct(self, grad):
  4185. >>> out = self.apply_ftrl(self.var, self.accum, self.linear, grad, self.lr, self.l1, self.l2,
  4186. >>> self.lr_power)
  4187. >>> return out
  4188. >>>
  4189. >>> net = ApplyFtrlNet()
  4190. >>> input_x = Tensor(np.random.randint(-4, 4, (3, 3)), mindspore.float32)
  4191. >>> result = net(input_x)
  4192. [[0.67455846 0.14630564 0.160499 ]
  4193. [0.16329421 0.00415689 0.05202988]
  4194. [0.18672481 0.17418946 0.36420345]]
  4195. """
  4196. @prim_attr_register
  4197. def __init__(self, use_locking=False):
  4198. self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'lr', 'l1', 'l2', 'lr_power'],
  4199. outputs=['output'])
  4200. self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  4201. self.is_tbe = context.get_context("device_target") == "Ascend"
  4202. def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, lr_shape, l1_shape, l2_shape,
  4203. lr_power_shape):
  4204. validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
  4205. validator.check('var shape', var_shape, 'linear shape', linear_shape, Rel.EQ, self.name)
  4206. if self.is_tbe:
  4207. return var_shape, var_shape, var_shape
  4208. return var_shape
  4209. def infer_dtype(self, var_type, accum_type, linear_type, grad_type, lr_type, l1_type, l2_type, lr_power_type):
  4210. valid_types = [mstype.float16, mstype.float32]
  4211. args = {'var': var_type, 'accum': accum_type, 'linear': linear_type, 'grad': grad_type}
  4212. validator.check_tensor_type_same(args, valid_types, self.name)
  4213. validator.check_scalar_or_tensor_type_same({"lr": lr_type}, valid_types, self.name)
  4214. validator.check_scalar_or_tensor_type_same({"l1": l1_type}, valid_types, self.name)
  4215. validator.check_scalar_or_tensor_type_same({"l2": l2_type}, valid_types, self.name)
  4216. validator.check_scalar_or_tensor_type_same({"lr_power": lr_power_type}, valid_types, self.name)
  4217. if self.is_tbe:
  4218. return var_type, var_type, var_type
  4219. return var_type
  4220. class SparseApplyFtrl(PrimitiveWithCheck):
  4221. """
  4222. Updates relevant entries according to the FTRL-proximal scheme.
  4223. All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
  4224. If they have different data types, lower priority data type will be converted to
  4225. relatively highest priority data type.
  4226. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  4227. Args:
  4228. lr (float): The learning rate value, must be positive.
  4229. l1 (float): l1 regularization strength, must be greater than or equal to zero.
  4230. l2 (float): l2 regularization strength, must be greater than or equal to zero.
  4231. lr_power (float): Learning rate power controls how the learning rate decreases during training,
  4232. must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
  4233. use_locking (bool): Use locks for updating operation if true . Default: False.
  4234. Inputs:
  4235. - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
  4236. - **accum** (Parameter) - The accumulation to be updated, must be same data type and shape as `var`.
  4237. - **linear** (Parameter) - the linear coefficient to be updated, must be the same data type and shape as `var`.
  4238. - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient.
  4239. - **indices** (Tensor) - A vector of indices in the first dimension of `var` and `accum`.
  4240. The shape of `indices` must be the same as `grad` in the first dimension. The type must be int32.
  4241. Outputs:
  4242. - **var** (Tensor) - Tensor, has the same shape and data type as `var`.
  4243. - **accum** (Tensor) - Tensor, has the same shape and data type as `accum`.
  4244. - **linear** (Tensor) - Tensor, has the same shape and data type as `linear`.
  4245. Examples:
  4246. >>> import mindspore
  4247. >>> import mindspore.nn as nn
  4248. >>> import numpy as np
  4249. >>> from mindspore import Parameter
  4250. >>> from mindspore import Tensor
  4251. >>> from mindspore.ops import operations as P
  4252. >>> class SparseApplyFtrlNet(nn.Cell):
  4253. >>> def __init__(self):
  4254. >>> super(SparseApplyFtrlNet, self).__init__()
  4255. >>> self.sparse_apply_ftrl = P.SparseApplyFtrl(lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
  4256. >>> self.var = Parameter(Tensor(np.random.rand(1, 1).astype(np.float32)), name="var")
  4257. >>> self.accum = Parameter(Tensor(np.random.rand(1, 1).astype(np.float32)), name="accum")
  4258. >>> self.linear = Parameter(Tensor(np.random.rand(1, 1).astype(np.float32)), name="linear")
  4259. >>>
  4260. >>> def construct(self, grad, indices):
  4261. >>> out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices)
  4262. >>> return out
  4263. >>>
  4264. >>> net = SparseApplyFtrlNet()
  4265. >>> grad = Tensor(np.random.rand(1, 1).astype(np.float32))
  4266. >>> indices = Tensor(np.ones([1]), mindspore.int32)
  4267. >>> output = net(grad, indices)
  4268. ([[1.02914639e-01]], [[7.60280550e-01]], [[7.64630079e-01]])
  4269. """
  4270. __mindspore_signature__ = (
  4271. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  4272. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  4273. sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  4274. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  4275. sig.make_sig('indices', dtype=sig.sig_dtype.T1),
  4276. )
  4277. @prim_attr_register
  4278. def __init__(self, lr, l1, l2, lr_power, use_locking=False):
  4279. validator.check_value_type("lr", lr, [float], self.name)
  4280. validator.check_value_type("l1", l1, [float], self.name)
  4281. validator.check_value_type("l2", l2, [float], self.name)
  4282. validator.check_value_type("lr_power", lr_power, [float], self.name)
  4283. self.lr = validator.check_positive_float(lr, "lr", self.name)
  4284. self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
  4285. self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
  4286. self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
  4287. self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  4288. self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'],
  4289. outputs=['var', 'accum', 'linear'])
  4290. def check_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape):
  4291. validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
  4292. validator.check('var shape', var_shape, 'linear shape', linear_shape, Rel.EQ, self.name)
  4293. if len(var_shape) > 1:
  4294. validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name)
  4295. validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
  4296. validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
  4297. def check_dtype(self, var_dtype, accum_dtype, linear_dtype, grad_dtype, indices_dtype):
  4298. args = {"var_dtype": var_dtype, "accum_dtype": accum_dtype,
  4299. "linear_dtype": linear_dtype, "grad_dtype": grad_dtype}
  4300. validator.check_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
  4301. validator.check_tensor_type_same({"indices_dtype": indices_dtype}, [mstype.int32, mstype.int64], self.name)
  4302. class SparseApplyFtrlV2(PrimitiveWithInfer):
  4303. """
  4304. Updates relevant entries according to the FTRL-proximal scheme.
  4305. All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
  4306. If they have different data types, lower priority data type will be converted to
  4307. relatively highest priority data type.
  4308. RuntimeError exception will be thrown when the data type conversion of Parameter is required.
  4309. Args:
  4310. lr (float): The learning rate value, must be positive.
  4311. l1 (float): l1 regularization strength, must be greater than or equal to zero.
  4312. l2 (float): l2 regularization strength, must be greater than or equal to zero.
  4313. l2_shrinkage (float): L2 shrinkage regularization.
  4314. lr_power (float): Learning rate power controls how the learning rate decreases during training,
  4315. must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
  4316. use_locking (bool): If `True`, the var and accumulation tensors will be protected from being updated.
  4317. Default: False.
  4318. Inputs:
  4319. - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
  4320. - **accum** (Parameter) - The accumulation to be updated, must be same data type and shape as `var`.
  4321. - **linear** (Parameter) - the linear coefficient to be updated, must be same data type and shape as `var`.
  4322. - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient.
  4323. - **indices** (Tensor) - A vector of indices in the first dimension of `var` and `accum`.
  4324. The shape of `indices` must be the same as `grad` in the first dimension. The type must be int32.
  4325. Outputs:
  4326. Tuple of 3 Tensor, the updated parameters.
  4327. - **var** (Tensor) - Tensor, has the same shape and data type as `var`.
  4328. - **accum** (Tensor) - Tensor, has the same shape and data type as `accum`.
  4329. - **linear** (Tensor) - Tensor, has the same shape and data type as `linear`.
  4330. Examples:
  4331. >>> import mindspore
  4332. >>> import mindspore.nn as nn
  4333. >>> import numpy as np
  4334. >>> from mindspore import Parameter
  4335. >>> from mindspore import Tensor
  4336. >>> from mindspore.ops import operations as P
  4337. >>> class SparseApplyFtrlV2Net(nn.Cell):
  4338. >>> def __init__(self):
  4339. >>> super(SparseApplyFtrlV2Net, self).__init__()
  4340. >>> self.sparse_apply_ftrl_v2 = P.SparseApplyFtrlV2(lr=0.01, l1=0.0, l2=0.0,
  4341. l2_shrinkage=0.0, lr_power=-0.5)
  4342. >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
  4343. >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
  4344. >>> self.linear = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="linear")
  4345. >>>
  4346. >>> def construct(self, grad, indices):
  4347. >>> out = self.sparse_apply_ftrl_v2(self.var, self.accum, self.linear, grad, indices)
  4348. >>> return out
  4349. >>>
  4350. >>> net = SparseApplyFtrlV2Net()
  4351. >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  4352. >>> indices = Tensor(np.ones([3]), mindspore.int32)
  4353. >>> output = net(grad, indices)
  4354. """
  4355. __mindspore_signature__ = (
  4356. sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  4357. sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  4358. sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
  4359. sig.make_sig('grad', dtype=sig.sig_dtype.T),
  4360. sig.make_sig('indices', dtype=sig.sig_dtype.T1),
  4361. )
  4362. @prim_attr_register
  4363. def __init__(self, lr, l1, l2, l2_shrinkage, lr_power, use_locking=False):
  4364. validator.check_value_type("lr", lr, [float], self.name)
  4365. validator.check_value_type("l1", l1, [float], self.name)
  4366. validator.check_value_type("l2", l2, [float], self.name)
  4367. validator.check_value_type("lr_power", lr_power, [float], self.name)
  4368. self.lr = validator.check_positive_float(lr, "lr", self.name)
  4369. self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
  4370. self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
  4371. self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
  4372. self.l2_shrinkage = validator.check_value_type("l2_shrinkage", l2_shrinkage, [float], self.name)
  4373. self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
  4374. def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape):
  4375. validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
  4376. validator.check('var shape', var_shape, 'linear shape', linear_shape, Rel.EQ, self.name)
  4377. if len(var_shape) > 1:
  4378. validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name)
  4379. validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
  4380. validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
  4381. return var_shape, accum_shape, linear_shape
  4382. def infer_dtype(self, var_dtype, accum_dtype, linear_dtype, grad_dtype, indices_dtype):
  4383. args = {"var_dtype": var_dtype, "accum_dtype": accum_dtype,
  4384. "linear_dtype": linear_dtype, "grad_dtype": grad_dtype}
  4385. validator.check_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
  4386. validator.check_tensor_type_same({"indices_dtype": indices_dtype}, [mstype.int32], self.name)
  4387. return var_dtype, accum_dtype, linear_dtype
  4388. class Dropout(PrimitiveWithInfer):
  4389. """
  4390. During training, randomly zeroes some of the elements of the input tensor with probability.
  4391. Args:
  4392. keep_prob (float): The keep rate, between 0 and 1, e.g. keep_prob = 0.9,
  4393. means dropping out 10% of input units.
  4394. Inputs:
  4395. - **shape** (tuple[int]) - The shape of target mask.
  4396. Outputs:
  4397. Tensor, the value of generated mask for input shape.
  4398. Examples:
  4399. >>> dropout = P.Dropout(keep_prob=0.5)
  4400. >>> in = Tensor((20, 16, 50, 50))
  4401. >>> out = dropout(in)
  4402. """
  4403. @prim_attr_register
  4404. def __init__(self, keep_prob=0.5):
  4405. self.keep_prob = validator.check_float_range(keep_prob, 0, 1, Rel.INC_RIGHT, "keep_prob", self.name)
  4406. def infer_shape(self, x_shape):
  4407. validator.check_int(len(x_shape), 1, Rel.GE, "x_shape", self.name)
  4408. mask_shape = x_shape
  4409. return x_shape, mask_shape
  4410. def infer_dtype(self, x_dtype):
  4411. valid_types = (mstype.float16, mstype.float32)
  4412. validator.check_subclass("x", x_dtype, mstype.tensor, self.name)
  4413. validator.check_tensor_type_same({"x_dtype": x_dtype}, valid_types, self.name)
  4414. return x_dtype, x_dtype
  4415. class CTCLoss(PrimitiveWithInfer):
  4416. """
  4417. Calculates the CTC (Connectionist Temporal Classification) loss and the gradient.
  4418. Args:
  4419. preprocess_collapse_repeated (bool): If true, repeated labels will be collapsed prior to the CTC calculation.
  4420. Default: False.
  4421. ctc_merge_repeated (bool): If false, during CTC calculation, repeated non-blank labels will not be merged
  4422. and these labels will be interpreted as individual ones. This is a simplfied
  4423. version of CTC. Default: True.
  4424. ignore_longer_outputs_than_inputs (bool): If true, sequences with longer outputs than inputs will be ignored.
  4425. Default: False.
  4426. Inputs:
  4427. - **inputs** (Tensor) - The input Tensor must be a `3-D` tensor whose shape is
  4428. (`max_time`, `batch_size`, `num_classes`). `num_classes` must be `num_labels + 1` classes, `num_labels`
  4429. indicates the number of actual labels. Blank labels are reserved. Default blank label is `num_classes - 1`.
  4430. Data type must be float16, float32 or float64.
  4431. - **labels_indices** (Tensor) - The indices of labels. `labels_indices[i, :] == [b, t]` means `labels_values[i]`
  4432. stores the id for `(batch b, time t)`. The type must be int64 and rank must be 2.
  4433. - **labels_values** (Tensor) - A `1-D` input tensor. The values are associated with the given batch and time.
  4434. The type must be int32. `labels_values[i]` must in the range of `[0, num_classes)`.
  4435. - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of (`batch_size`).
  4436. The type must be int32. Each value in the tensor must not be greater than `max_time`.
  4437. Outputs:
  4438. - **loss** (Tensor) - A tensor containing log-probabilities, the shape is (`batch_size`). The tensor has
  4439. the same type with `inputs`.
  4440. - **gradient** (Tensor) - The gradient of `loss`, has the same type and shape with `inputs`.
  4441. Examples:
  4442. >>> inputs = Tensor(np.random.random((2, 2, 3)), mindspore.float32)
  4443. >>> labels_indices = Tensor(np.array([[0, 0], [1, 0]]), mindspore.int64)
  4444. >>> labels_values = Tensor(np.array([2, 2]), mindspore.int32)
  4445. >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32)
  4446. >>> ctc_loss = P.CTCLoss()
  4447. >>> output = ctc_loss(inputs, labels_indices, labels_values, sequence_length)
  4448. """
  4449. @prim_attr_register
  4450. def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=True,
  4451. ignore_longer_outputs_than_inputs=False):
  4452. self.init_prim_io_names(inputs=["inputs", "labels_indices", "labels_values", "sequence_length"],
  4453. outputs=["loss", "gradient"])
  4454. validator.check_value_type("preprocess_collapse_repeated", preprocess_collapse_repeated, [bool], self.name)
  4455. self.preprocess_collapse_repeated_ = preprocess_collapse_repeated
  4456. self.ctc_merge_repeated_ = validator.check_value_type("ctc_merge_repeated", ctc_merge_repeated,
  4457. [bool], self.name)
  4458. validator.check_value_type("ignore_longer_outputs_than_inputs",
  4459. ignore_longer_outputs_than_inputs, [bool], self.name)
  4460. self.ignore_longer_outputs_than_inputs_ = ignore_longer_outputs_than_inputs
  4461. def infer_shape(self, inputs, labels_indices, labels_values, sequence_length):
  4462. validator.check_int(len(inputs), 3, Rel.EQ, "inputs rank", self.name)
  4463. validator.check_int(len(labels_indices), 2, Rel.EQ, "labels_indices rank", self.name)
  4464. validator.check_int(labels_indices[1], 2, Rel.EQ, "labels_indices dim one", self.name)
  4465. validator.check_int(len(labels_values), 1, Rel.EQ, "labels_values rank", self.name)
  4466. validator.check_int(len(sequence_length), 1, Rel.EQ, "sequence_length rank", self.name)
  4467. validator.check('labels_indices size', labels_indices[0], 'labels_values size',
  4468. labels_values[0], Rel.EQ, self.name)
  4469. validator.check('inputs batch_size', inputs[1], 'sequence_length batch_size',
  4470. sequence_length[0], Rel.EQ, self.name)
  4471. batch_size = []
  4472. batch_size.append(inputs[1])
  4473. return batch_size, inputs
  4474. def infer_dtype(self, inputs, labels_indices, labels_values, sequence_length):
  4475. valid_dtype = [mstype.float16, mstype.float32, mstype.double]
  4476. validator.check_tensor_type_same({"inputs_dtype": inputs}, valid_dtype, self.name)
  4477. validator.check_tensor_type_same({"labels_indices_dtype": labels_indices}, [mstype.int64], self.name)
  4478. validator.check_tensor_type_same({"labels_values_dtype": labels_values}, [mstype.int32], self.name)
  4479. validator.check_tensor_type_same({"sequence_length_dtype": sequence_length}, [mstype.int32], self.name)
  4480. return inputs, inputs
  4481. class CTCGreedyDecoder(PrimitiveWithInfer):
  4482. """
  4483. Performs greedy decoding on the logits given in inputs.
  4484. Args:
  4485. merge_repeated (bool): If true, merge repeated classes in output. Default: True.
  4486. Inputs:
  4487. - **inputs** (Tensor) - The input Tensor must be a `3-D` tensor whose shape is
  4488. (`max_time`, `batch_size`, `num_classes`). `num_classes` must be `num_labels + 1` classes,
  4489. `num_labels` indicates the number of actual labels. Blank labels are reserved.
  4490. Default blank label is `num_classes - 1`. Data type must be float32 or float64.
  4491. - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of (`batch_size`).
  4492. The type must be int32. Each value in the tensor must not greater than `max_time`.
  4493. Outputs:
  4494. - **decoded_indices** (Tensor) - A tensor with shape of (`total_decoded_outputs`, 2).
  4495. Data type is int64.
  4496. - **decoded_values** (Tensor) - A tensor with shape of (`total_decoded_outputs`),
  4497. it stores the decoded classes. Data type is int64.
  4498. - **decoded_shape** (Tensor) - The value of tensor is [`batch_size`, `max_decoded_legth`].
  4499. Data type is int64.
  4500. - **log_probability** (Tensor) - A tensor with shape of (`batch_size`, 1),
  4501. containing sequence log-probability, has the same type as `inputs`.
  4502. Examples:
  4503. >>> class CTCGreedyDecoderNet(nn.Cell):
  4504. >>> def __init__(self):
  4505. >>> super(CTCGreedyDecoderNet, self).__init__()
  4506. >>> self.ctc_greedy_decoder = P.CTCGreedyDecoder()
  4507. >>> self.assert_op = P.Assert(300)
  4508. >>>
  4509. >>> def construct(self, inputs, sequence_length):
  4510. >>> out = self.ctc_greedy_decoder(inputs,sequence_length)
  4511. >>> self.assert_op(True, (out[0], out[1], out[2], out[3]))
  4512. >>> return out[2]
  4513. >>>
  4514. >>> inputs = Tensor(np.random.random((2, 2, 3)), mindspore.float32)
  4515. >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32)
  4516. >>> net = CTCGreedyDecoderNet()
  4517. >>> output = net(inputs, sequence_length)
  4518. """
  4519. @prim_attr_register
  4520. def __init__(self, merge_repeated=True):
  4521. self.merge_repeated = validator.check_value_type("merge_repeated", merge_repeated, [bool], self.name)
  4522. def infer_shape(self, inputs_shape, sequence_length_shape):
  4523. validator.check_int(len(inputs_shape), 3, Rel.EQ, "inputs rank", self.name)
  4524. validator.check_int(len(sequence_length_shape), 1, Rel.EQ, "sequence_length rank", self.name)
  4525. validator.check('inputs batch_size', inputs_shape[1], 'sequence_length batch_size',
  4526. sequence_length_shape[0], Rel.EQ, self.name)
  4527. total_decoded_outputs = -1
  4528. decoded_indices_shape = [total_decoded_outputs, 2]
  4529. decoded_values = [total_decoded_outputs]
  4530. decoded_shape = [2]
  4531. log_probability_shape = [inputs_shape[1], 1]
  4532. return decoded_indices_shape, decoded_values, decoded_shape, log_probability_shape
  4533. def infer_dtype(self, inputs_dtype, sequence_length_dtype):
  4534. validator.check_tensor_type_same({"inputs_dtype": inputs_dtype}, [mstype.float32, mstype.double], self.name)
  4535. validator.check_tensor_type_same({"sequence_length_dtype": sequence_length_dtype}, [mstype.int32], self.name)
  4536. decoded_type = mstype.tensor_type(mstype.int64)
  4537. return decoded_type, decoded_type, decoded_type, inputs_dtype
  4538. class BasicLSTMCell(PrimitiveWithInfer):
  4539. r"""
  4540. Applies the long short-term memory (LSTM) to the input.
  4541. .. math::
  4542. \begin{array}{ll} \\
  4543. i_t = \sigma(W_{ix} x_t + b_{ix} + W_{ih} h_{(t-1)} + b_{ih}) \\
  4544. f_t = \sigma(W_{fx} x_t + b_{fx} + W_{fh} h_{(t-1)} + b_{fh}) \\
  4545. \tilde{c}_t = \tanh(W_{cx} x_t + b_{cx} + W_{ch} h_{(t-1)} + b_{ch}) \\
  4546. o_t = \sigma(W_{ox} x_t + b_{ox} + W_{oh} h_{(t-1)} + b_{oh}) \\
  4547. c_t = f_t * c_{(t-1)} + i_t * \tilde{c}_t \\
  4548. h_t = o_t * \tanh(c_t) \\
  4549. \end{array}
  4550. Here :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b`
  4551. are learnable weights between the output and the input in the formula. For instance,
  4552. :math:`W_{ix}, b_{ix}` are the weight and bias used to transform from input :math:`x` to :math:`i`.
  4553. Details can be found in paper `LONG SHORT-TERM MEMORY
  4554. <https://www.bioinf.jku.at/publications/older/2604.pdf>`_ and
  4555. `Long Short-Term Memory Recurrent Neural Network Architectures for Large Scale Acoustic Modeling
  4556. <https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/43905.pdf>`_.
  4557. Args:
  4558. keep_prob (float): If not 1.0, append `Dropout` layer on the outputs of each
  4559. LSTM layer except the last layer. Default 1.0. The range of dropout is [0.0, 1.0].
  4560. forget_bias (float): Add forget bias to forget gate biases in order to decrease former scale. Default: 1.0.
  4561. state_is_tuple (bool): If true, the state is a tuple of 2 tensors, containing h and c; If false, the state is
  4562. a tensor and it needs to be split first. Default: True.
  4563. activation (str): Activation. Default: "tanh". Only "tanh" is currently supported.
  4564. Inputs:
  4565. - **x** (Tensor) - Current words. Tensor of shape (`batch_size`, `input_size`).
  4566. The data type must be float16 or float32.
  4567. - **h** (Tensor) - Hidden state last moment. Tensor of shape (`batch_size`, `hidden_size`).
  4568. The data type must be float16 or float32.
  4569. - **c** (Tensor) - Cell state last moment. Tensor of shape (`batch_size`, `hidden_size`).
  4570. The data type must be float16 or float32.
  4571. - **w** (Tensor) - Weight. Tensor of shape (`input_size + hidden_size`, `4 x hidden_size`).
  4572. The data type must be float16 or float32.
  4573. - **b** (Tensor) - Bias. Tensor of shape (`4 x hidden_size`).
  4574. The data type must be the same as `c`.
  4575. Outputs:
  4576. - **ct** (Tensor) - Forward :math:`c_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`).
  4577. Has the same type with input `c`.
  4578. - **ht** (Tensor) - Cell output. Tensor of shape (`batch_size`, `hidden_size`). With data type of float16.
  4579. - **it** (Tensor) - Forward :math:`i_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`).
  4580. Has the same type with input `c`.
  4581. - **jt** (Tensor) - Forward :math:`j_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`).
  4582. Has the same type with input `c`.
  4583. - **ft** (Tensor) - Forward :math:`f_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`).
  4584. Has the same type with input `c`.
  4585. - **ot** (Tensor) - Forward :math:`o_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`).
  4586. Has the same type with input `c`.
  4587. - **tanhct** (Tensor) - Forward :math:`tanh c_t` cache at moment `t`.
  4588. Tensor of shape (`batch_size`, `hidden_size`), has the same type with input `c`.
  4589. Examples:
  4590. >>> x = Tensor(np.random.rand(1, 32).astype(np.float16))
  4591. >>> h = Tensor(np.random.rand(1, 64).astype(np.float16))
  4592. >>> c = Tensor(np.random.rand(1, 64).astype(np.float16))
  4593. >>> w = Tensor(np.random.rand(96, 256).astype(np.float16))
  4594. >>> b = Tensor(np.random.rand(256, ).astype(np.float16))
  4595. >>> lstm = P.BasicLSTMCell(keep_prob=1.0, forget_bias=1.0, state_is_tuple=True, activation='tanh')
  4596. >>> lstm(x, h, c, w, b)
  4597. """
  4598. @prim_attr_register
  4599. def __init__(self, keep_prob=1.0, forget_bias=1.0, state_is_tuple=True, activation='tanh'):
  4600. self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
  4601. self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, Rel.INC_BOTH, "keep_prob", self.name)
  4602. self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name)
  4603. self.state_is_tuple = validator.check_value_type("state_is_tuple", state_is_tuple, [bool], self.name)
  4604. self.activation = validator.check_string(activation, ['tanh'], "activation", self.name)
  4605. self.add_prim_attr("io_format", "ND")
  4606. def infer_shape(self, x_shape, h_shape, c_shape, w_shape, b_shape):
  4607. validator.check_int(len(x_shape), 2, Rel.EQ, "x rank", self.name)
  4608. validator.check_int(len(h_shape), 2, Rel.EQ, "h rank", self.name)
  4609. validator.check_int(len(c_shape), 2, Rel.EQ, "c rank", self.name)
  4610. validator.check_int(len(w_shape), 2, Rel.EQ, "w rank", self.name)
  4611. validator.check_int(len(b_shape), 1, Rel.EQ, "b rank", self.name)
  4612. validator.check("x_shape[0]", x_shape[0], "h_shape[0]", h_shape[0], Rel.EQ, self.name)
  4613. validator.check("c_shape[0]", c_shape[0], "h_shape[0]", h_shape[0], Rel.EQ, self.name)
  4614. validator.check("c_shape[1]", c_shape[1], "h_shape[1]", h_shape[1], Rel.EQ, self.name)
  4615. validator.check("w_shape[1]", w_shape[1], "4*h_shape[1]", 4 * h_shape[1], Rel.EQ, self.name)
  4616. validator.check("w_shape[0]", w_shape[0], "x_shape[1]+h_shape[1]", x_shape[1] + h_shape[1], Rel.EQ, self.name)
  4617. validator.check("b_shape[0]", b_shape[0], "4*h_shape[1]", 4 * h_shape[1], Rel.EQ, self.name)
  4618. ct_shape = c_shape
  4619. ht_shape = c_shape
  4620. it_shape = c_shape
  4621. jt_shape = c_shape
  4622. ft_shape = c_shape
  4623. ot_shape = c_shape
  4624. tanhct_shape = c_shape
  4625. return (ct_shape, ht_shape, it_shape, jt_shape, ft_shape, ot_shape, tanhct_shape)
  4626. def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype, b_dtype):
  4627. validator.check_tensor_type_same({"x_dtype": x_dtype}, [mstype.float16, mstype.float32], self.name)
  4628. validator.check_tensor_type_same({"h_dtype": h_dtype}, [mstype.float16, mstype.float32], self.name)
  4629. validator.check_tensor_type_same({"w_dtype": w_dtype}, [mstype.float16, mstype.float32], self.name)
  4630. args = {"c_dtype": c_dtype, "b_dtype": b_dtype}
  4631. validator.check_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
  4632. return (c_dtype, mstype.float16, c_dtype, c_dtype, c_dtype, c_dtype, c_dtype)
  4633. class DynamicRNN(PrimitiveWithInfer):
  4634. r"""
  4635. DynamicRNN Operator.
  4636. Args:
  4637. cell_type (str): An string identifying the cell type in the op. Default: 'LSTM'.
  4638. Only 'LSTM' is currently supported.
  4639. direction (str): An string identifying the direction in the op. Default: 'UNIDIRECTIONAL'.
  4640. Only 'UNIDIRECTIONAL' is currently supported.
  4641. cell_depth (int): An integer identifying the cell depth in the op. Default: 1.
  4642. use_peephole (bool): An bool identifying if use peephole in the op. Default: False.
  4643. keep_prob (float): An float identifying the keep prob in the op. Default: 1.0.
  4644. cell_clip (float): An float identifying the cell clip in the op. Default: -1.0.
  4645. num_proj (int): An integer identifying the num proj in the op. Default: 0.
  4646. time_major (bool): An bool identifying the time major in the op. Default: True.
  4647. Only `True` is currently supported.
  4648. activation (str): An string identifying the type of activation function in the op. Default: 'tanh'.
  4649. Only 'tanh' is currently supported.
  4650. forget_bias (float): An float identifying the forget bias in the op. Default: 0.0.
  4651. is_training (bool): An bool identifying is training in the op. Default: True.
  4652. Inputs:
  4653. - **x** (Tensor) - Current words. Tensor of shape (`num_step`, `batch_size`, `input_size`).
  4654. The data type must be float16 or float32.
  4655. - **w** (Tensor) - Weight. Tensor of shape (`input_size + hidden_size`, `4 x hidden_size`).
  4656. The data type must be float16 or float32.
  4657. - **b** (Tensor) - Bias. Tensor of shape (`4 x hidden_size`).
  4658. The data type must be float16 or float32.
  4659. - **seq_length** (Tensor) - The length of each batch. Tensor of shape (`batch_size`).
  4660. Only `None` is currently supported.
  4661. - **init_h** (Tensor) - Hidden state of initial time. Tensor of shape (1, `batch_size`, `hidden_size`).
  4662. - **init_c** (Tensor) - Cell state of initial time. Tensor of shape (1, `batch_size`, `hidden_size`).
  4663. Outputs:
  4664. - **y** (Tensor) - A Tensor of shape (`num_step`, `batch_size`, `hidden_size`).
  4665. Has the same type with input `b`.
  4666. - **output_h** (Tensor) - A Tensor of shape (`num_step`, `batch_size`, `hidden_size`).
  4667. With data type of float16.
  4668. - **output_c** (Tensor) - A Tensor of shape (`num_step`, `batch_size`, `hidden_size`).
  4669. Has the same type with input `b`.
  4670. - **i** (Tensor) - A Tensor of shape (`num_step`, `batch_size`, `hidden_size`).
  4671. Has the same type with input `b`.
  4672. - **j** (Tensor) - A Tensor of shape (`num_step`, `batch_size`, `hidden_size`).
  4673. Has the same type with input `b`.
  4674. - **f** (Tensor) - A Tensor of shape (`num_step`, `batch_size`, `hidden_size`).
  4675. Has the same type with input `b`.
  4676. - **o** (Tensor) - A Tensor of shape (`num_step`, `batch_size`, `hidden_size`).
  4677. Has the same type with input `b`.
  4678. - **tanhct** (Tensor) - A Tensor of shape (`num_step`, `batch_size`, `hidden_size`).
  4679. Has the same type with input `b`.
  4680. Examples:
  4681. >>> x = Tensor(np.random.rand(2, 16, 64).astype(np.float16))
  4682. >>> w = Tensor(np.random.rand(96, 128).astype(np.float16))
  4683. >>> b = Tensor(np.random.rand(128).astype(np.float16))
  4684. >>> init_h = Tensor(np.random.rand(1, 16, 32).astype(np.float16))
  4685. >>> init_c = Tensor(np.random.rand(1, 16, 32).astype(np.float16))
  4686. >>> dynamic_rnn = P.DynamicRNN()
  4687. >>> output = lstm(x, w, b, None, init_h, init_c)
  4688. """
  4689. @prim_attr_register
  4690. def __init__(self,
  4691. cell_type='LSTM',
  4692. direction='UNIDIRECTIONAL',
  4693. cell_depth=1,
  4694. use_peephole=False,
  4695. keep_prob=1.0,
  4696. cell_clip=-1.0,
  4697. num_proj=0,
  4698. time_major=True,
  4699. activation='tanh',
  4700. forget_bias=0.0,
  4701. is_training=True):
  4702. self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name)
  4703. self.cell_depth = validator.check_value_type("cell_depth", cell_depth, [int], self.name)
  4704. self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
  4705. self.cell_clip = validator.check_value_type("cell_clip", cell_clip, [float], self.name)
  4706. self.num_proj = validator.check_value_type("num_proj", num_proj, [int], self.name)
  4707. self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name)
  4708. self.use_peephole = validator.check_value_type("use_peephole", use_peephole, [bool], self.name)
  4709. self.time_major = validator.check_value_type("time_major", time_major, [bool], self.name)
  4710. self.is_training = validator.check_value_type("is_training", is_training, [bool], self.name)
  4711. self.cell_type = validator.check_string(cell_type, ['LSTM'], "cell_type", self.name)
  4712. self.direction = validator.check_string(direction, ['UNIDIRECTIONAL'], "direction", self.name)
  4713. self.activation = validator.check_string(activation, ['tanh'], "activation", self.name)
  4714. self.add_prim_attr("io_format", "ND")
  4715. def infer_shape(self, x_shape, w_shape, b_shape, seq_shape, h_shape, c_shape):
  4716. validator.check_int(len(x_shape), 3, Rel.EQ, "x_shape", self.name)
  4717. validator.check_int(len(w_shape), 2, Rel.EQ, "w rank", self.name)
  4718. validator.check_int(len(b_shape), 1, Rel.EQ, "b rank", self.name)
  4719. validator.check_int(len(h_shape), 3, Rel.EQ, "h_shape", self.name)
  4720. validator.check_int(len(c_shape), 3, Rel.EQ, "c_shape", self.name)
  4721. if seq_shape is not None:
  4722. raise ValueError(f"For {self.name}, seq_shape should be None.")
  4723. num_step, batch_size, input_size = x_shape
  4724. hidden_size = w_shape[-1] // 4
  4725. validator.check("b_shape[-1]", b_shape[-1], "w_shape[-1]", w_shape[-1], Rel.EQ, self.name)
  4726. if w_shape[-1] % 4 != 0:
  4727. raise ValueError(f"For {self.name}, w_shape[-1] should multiple of 4.")
  4728. validator.check("w_shape[0]", w_shape[0], "input_size + hidden_size",
  4729. input_size + hidden_size, Rel.EQ, self.name)
  4730. validator.check("b_shape[0]", b_shape[0], "w_shape[1]", w_shape[1], Rel.EQ, self.name)
  4731. validator.check_int(h_shape[0], 1, Rel.EQ, "h_shape[0]", self.name)
  4732. validator.check("h_shape[1]", h_shape[1], "batch_size", batch_size, Rel.EQ, self.name)
  4733. validator.check("h_shape[2]", h_shape[2], "hidden_size", hidden_size, Rel.EQ, self.name)
  4734. validator.check("c_shape", c_shape, "h_shape", h_shape, Rel.EQ, self.name)
  4735. y_shape = (num_step, batch_size, hidden_size)
  4736. return y_shape, y_shape, y_shape, y_shape, y_shape, y_shape, y_shape, y_shape
  4737. def infer_dtype(self, x_dtype, w_dtype, b_dtype, seq_dtype, h_dtype, c_dtype):
  4738. validator.check_tensor_type_same({"x dtype": x_dtype}, (mstype.float32, mstype.float16), self.name)
  4739. validator.check_tensor_type_same({"w dtype": w_dtype}, (mstype.float32, mstype.float16), self.name)
  4740. validator.check_tensor_type_same({"b dtype": b_dtype}, (mstype.float32, mstype.float16), self.name)
  4741. validator.check_tensor_type_same({"h dtype": h_dtype}, (mstype.float32, mstype.float16), self.name)
  4742. validator.check_tensor_type_same({"c dtype": c_dtype}, (mstype.float32, mstype.float16), self.name)
  4743. return b_dtype, x_dtype, b_dtype, b_dtype, b_dtype, b_dtype, b_dtype, b_dtype
  4744. class InTopK(PrimitiveWithInfer):
  4745. r"""
  4746. Whether the targets are in the top `k` predictions.
  4747. Args:
  4748. k (int): Specifies the number of top elements to be used for computing precision.
  4749. Inputs:
  4750. - **x1** (Tensor) - A 2D Tensor defines the predictions of a batch of samples with float16 or float32 data type.
  4751. - **x2** (Tensor) - A 1D Tensor defines the labels of a batch of samples with int32 data type. The size of x2
  4752. must be equal to x1's first dimension. The values of `x2` can not be negative and
  4753. must be equal to or less than index of x1's second dimension.
  4754. Outputs:
  4755. Tensor has 1 dimension of type bool and the same shape with `x2`. For labeling sample `i` in `x2`,
  4756. if the label in the first `k` predictions for sample `i` is in `x1`, then the value is True, otherwise False.
  4757. Examples:
  4758. >>> x1 = Tensor(np.array([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]]), mindspore.float32)
  4759. >>> x2 = Tensor(np.array([1, 3]), mindspore.int32)
  4760. >>> in_top_k = P.InTopK(3)
  4761. >>> result = in_top_k(x1, x2)
  4762. [True False]
  4763. """
  4764. @prim_attr_register
  4765. def __init__(self, k):
  4766. """Initialize InTopK"""
  4767. self.init_prim_io_names(inputs=['x1', 'x2', 'k'], outputs=['y'])
  4768. validator.check_value_type("k", k, [int], self.name)
  4769. def infer_dtype(self, x1_dtype, x2_dtype):
  4770. validator.check_tensor_type_same({"x1": x1_dtype}, (mstype.float16, mstype.float32,), self.name)
  4771. validator.check_tensor_type_same({"x2": x2_dtype}, (mstype.int32,), self.name)
  4772. return mstype.tensor_type(mstype.bool_)
  4773. def infer_shape(self, x1_shape, x2_shape):
  4774. validator.check("x1 shape", len(x1_shape), "", 2, Rel.EQ, self.name)
  4775. validator.check("x2 shape", len(x2_shape), "", 1, Rel.EQ, self.name)
  4776. validator.check("size of x2", x2_shape[0], "x1's first dimension", x1_shape[0], Rel.EQ, self.name)
  4777. return x2_shape
  4778. class LRN(PrimitiveWithInfer):
  4779. r"""
  4780. Local Response Normalization.
  4781. Args:
  4782. depth_radius (int): Half-width of the 1-D normalization window with the shape of 0-D.
  4783. bias (float): An offset (usually positive to avoid dividing by 0).
  4784. alpha (float): A scale factor, usually positive.
  4785. beta (float): An exponent.
  4786. norm_region (str): Specifies normalization region. Options: "ACROSS_CHANNELS". Default: "ACROSS_CHANNELS".
  4787. Inputs:
  4788. - **x** (Tensor) - A 4D Tensor with float16 or float32 data type.
  4789. Outputs:
  4790. Tensor, with the same shape and data type as the input tensor.
  4791. Examples:
  4792. >>> x = Tensor(np.random.rand(1, 2, 2, 2), mindspore.float32)
  4793. >>> lrn = P.LRN()
  4794. >>> lrn(x)
  4795. [[[[0.18990143 0.59475636]
  4796. [0.6291904 0.1371534 ]]
  4797. [[0.6258911 0.4964315 ]
  4798. [0.3141494 0.43636137]]]]
  4799. """
  4800. @prim_attr_register
  4801. def __init__(self, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CHANNELS"):
  4802. """Initialize LRN"""
  4803. self.init_prim_io_names(inputs=['x'], outputs=['y'])
  4804. validator.check_value_type("depth_radius", depth_radius, [int], self.name)
  4805. validator.check_value_type("bias", bias, [float], self.name)
  4806. validator.check_value_type("alpha", alpha, [float], self.name)
  4807. validator.check_value_type("beta", beta, [float], self.name)
  4808. validator.check_value_type("norm_region", norm_region, [str], self.name)
  4809. validator.check_string(norm_region, ['ACROSS_CHANNELS'], 'norm_region', self.name)
  4810. validator.check_non_negative_int(depth_radius, "depth_radius", self.name)
  4811. def infer_dtype(self, x_dtype):
  4812. validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32,), self.name)
  4813. return x_dtype
  4814. def infer_shape(self, x_shape):
  4815. validator.check_int(len(x_shape), 4, Rel.EQ, "x_shape", self.name)
  4816. return x_shape
  4817. class UniformSampler(PrimitiveWithInfer):
  4818. r"""
  4819. Uniform candidate sampler.
  4820. This function samples a set of classes(sampled_candidates) from [0, range_max-1] based on uniform distribution.
  4821. If unique=True, candidates are drawn without replacement, else unique=False with replacement.
  4822. Args:
  4823. num_true (int): The number of target classes in each training example.
  4824. num_sampled (int): The number of classes to randomly sample. The **sampled_candidates** will have a shape
  4825. of num_sampled. If unique=True, num_sampled must be less than or equal to range_max.
  4826. unique (bool): Whether all sampled classes in a batch are unique.
  4827. range_max (int): The number of possible classes.
  4828. seed (int): Random seed, must be non-negative. Default: 0.
  4829. Inputs:
  4830. true_classes (int): A tensor. The target classes with a tensor shape of (batch_size, num_true).
  4831. Outputs:
  4832. A tuple of 3 tensors.
  4833. sampled_candidates: (int): The sampled_candidates is independent of the true classes. Shape: (num_sampled, ).
  4834. true_expected_count: (float): The expected counts under the sampling distribution of each of true_classes.
  4835. Shape: (batch_size, num_true).
  4836. sampled_expected_count: (float): The expected counts under the sampling distribution of each of
  4837. sampled_candidates. Shape: (num_sampled, ).
  4838. Examples:
  4839. >>> sampler = P.UniformSampler(1, 3, False, 4)
  4840. >>> SampledCandidates, TrueExpectedCount, SampledExpectedCount = sampler(Tensor(np.array([[1],[3],[4],[6],
  4841. [3]], dtype=np.int32)))
  4842. [1, 1, 3], [[0.75], [0.75], [0.75], [0.75], [0.75]], [0.75, 0.75, 0.75]
  4843. """
  4844. @prim_attr_register
  4845. def __init__(self, num_true, num_sampled, unique, range_max, seed=0):
  4846. """Initialize UniformSampler"""
  4847. validator.check_value_type("num_true", num_true, [int], self.name)
  4848. validator.check_value_type("num_sampled", num_sampled, [int], self.name)
  4849. validator.check_value_type("unique", unique, [bool], self.name)
  4850. validator.check_value_type("range_max", range_max, [int], self.name)
  4851. validator.check_value_type("seed", seed, [int], self.name)
  4852. validator.check("value of num_sampled", num_sampled, '', 0, Rel.GT, self.name)
  4853. if unique:
  4854. validator.check('value of num_sampled', num_sampled, "value of range_max", range_max, Rel.LE, self.name)
  4855. validator.check("value of seed", seed, '', 0, Rel.GE, self.name)
  4856. self.num_sampled = num_sampled
  4857. def infer_dtype(self, true_classes_type):
  4858. return (true_classes_type, mstype.float32, mstype.float32)
  4859. def infer_shape(self, true_classes_shape):
  4860. return ([self.num_sampled], true_classes_shape, [self.num_sampled])